# coreparse2.py  3/5/2002 http://www.gldialtone.com
# Syntax:  Coreparse FILENAME (COMPONENT)  where FILENAME is the name of 
#   your input file without extension, and COMPONENT is your optional top-level
#   element, if you only want an XSD schema of one component and its children.
# Example: "CoreParse CC4" will open input file CC4.XML, and generate CC4.XSD.
# Example: "CoreParse CC4 Ledger.Details" produces a schema with Ledger.Details
# and all of the components it uses.
# Download an MDB File of core components from http://www.arapxml.net
# Run the XML Export macro in the MDB file to generate your CC.XML text file.
# Download python from http://www.python.org/download/
# Read python howto: http://py-howto.sourceforge.net
# STEP 1:  Import all Core Components rows from text file into an 11-column array.
# STEP 2:  Sort and cleanup the big array, get rid of rows with empty UIDs.
# STEP 3:  Create additional lists (names and types), for each component.
# STEP 4:  Convert column 11 (child elements) strings into python lists.
# STEP 5:  Build "type images" -- XML scraps for Table 8-2 Content types.
# STEP 6:  Flag the Aggregate core components having 1 or 2 levels
# STEP 6a: some quick validation checks on Aggregate elements.
# STEP 6b: Analyze the Aggregate core components having 1 or 2 levels.
# STEP 6c: Determine the depth of Aggregate core components 3-5 levels deep.
# STEP 6d: Determine dependencies (identify elements to include in a custom XSD.)
# STEP 7a:  Write an XML Schema-- prolog and Basic core components:
# STEP 7b:  Write XML Schema Types for Aggregate components no Aggregate children elements.
# STEP 7c:  Write XML Schema Types for Aggregate components having children that are Aggregates.
# STEP 8a:  Write HTML header/prolog to document the same core components in HTML.
# STEP 8b:  Write all the data elements/components into an HTML table.

import string, re, sys, copy

if len(sys.argv) < 2:
   print "\n"
   print 'Core Component parser starting. '
   print 'Syntax:  Coreparse FILENAME <COMPONENT>  where FILENAME is the'
   print 'input file and COMPONENT is the optional top-level element.'
   print '   Example "CoreParse CC4" will open input file CC4.XML,  '
   print '   and generate output file CC4.XSD.'

# STEP 1:  Import all Core Components rows into a huge 11-column array.

xmlEndTag = '<'
xmlEmptyTag = '/>'

target = [ '<UID', '<DictionaryEntryName', '<CCTused', '<BasicOrAggregate', '<definition', '<remarks',\
 '<ObjectClass', '<PropertyTerm', '<RepresentationTerm', '<BusinessTerms', '<CoreComponentChildren', '<XMLdatatype' ]

aStartPos = [0,0,0, 0,0,0, 0,0,0, 0,0,0, 0]
for i in range (0, 12 ):
   aStartPos[i] = len( target[i]) + 1

dat = [[]]     # permanent 12-column array (list) is built, in memory.
aCCNames = []  # permanent list contains the DictionaryEntryName of each core component.
aCCTypes = []  # permanent list contains the XML Type of each core component.

ComponentNumber = 0
infile = open (sys.argv[1]+".XML")
line = infile.readline()
while line:
   ComponentNumber = ComponentNumber + 1
   dat.append( [] )
   for i in range (0,12):
      while line and string.find( line, target[i] ) == -1:
         line = string.lstrip(infile.readline())
      # found a line containing target[i].... now lets rip the value out of it!
      # we will append 12 strings 0-11 to the empty list we just appended to dat[].
      if string.find( line, xmlEmptyTag ) == -1:
         x = (string.find(line, xmlEndTag, aStartPos[i], 9999))
         if i < 3 or i>5:
            ss = re.sub(" ","",line[ aStartPos[i] : x ])
            dat[ComponentNumber].append( ss )
         else:
            dat[ComponentNumber].append( line[ aStartPos[i] : x ])

      else:
         dat[ComponentNumber].append( "n/a" )
   dat[ComponentNumber].append(0)   # thirteenth column dat[12] will hold levels count.
   dat[ComponentNumber].append(1)   # fourteenth column dat[13] will hold dependency flag.
   dat[ComponentNumber].append([])   # fifteenth column dat[14] will hold children parameters.
  
infile.close

# STEP 2:  Cleanup the big array, get rid of rows with empty UIDs, and Sort.
#  we will start with the highest number when deleting elements of a list.

class ReverseList:
  def __init__(self,l):
    self._list = l
    self.offset = len(l)-1

  def __getitem__(self,n):
    if n < len(self._list):
      return self._list[self.offset - n]
    else:
      raise IndexError

x = range(len(dat))
for i in ReverseList(x):
   if len(dat[i]) < 2 :
#      print i,dat[i]
      del dat[i]
      continue
   if len(dat[i][0]) < 2 :
#      print i,dat[i]
      del dat[i]
      continue

x = len(dat) + 1
print "Finished importing " + str(x) + " components into an array."


def mySort(a,b):
   return cmp(a[1],b[1])

dat.sort(mySort)



# STEP 3:  Create lists of DictionaryEntryName and Representation Term (Type),
# for each core component.   These will be used later.

outfile = open (sys.argv[1]+".err","w")

success = 1
for i in range(0,len(dat)):
   aCCNames.append(dat[i][1])
   ss = aCCNames[i]
   x = string.rfind((aCCNames[i]),".") + 1
   aCCTypes.append( ss[x:] )
   if aCCTypes[i] != dat[i][8] and dat[i][3] != "Content":
      outfile.write( "Naming violation in Representation Term. "+ aCCNames[i]+ '  '+dat[i][8]+'\n')
      #print dat[i][8]   #aCCTypes[i]
      success = 0


for i in range(0,len(dat)):
   outfile.write( "aCCNames[i]+aCCTypes[i]: "+ aCCNames[i]+ ' '+aCCTypes[i]+ ' '+dat[i][11] + '\n')

if success != 1:
   print "Recommend you exit CoreParse.py by hitting <crtl><break>. \nThere were case(s) where the DEN suffix didn't equal The representation term.\nI am using the DEN suffix. "
   zzzzzzzzzzzzzzzzzz = raw_input("zzzzzzzzzzzzzzzzzz press any key to continue.")

x = len(dat) + 1
print "Finished creating lookup list of Representation Terms for " + str(x) + " Dictionary Entry Names."


# STEP 4:  Find Components having children elements, and parse them into lists.
# The 11th column of the big array (dat) contains all the children, separated by
# a delimiter.  First we parse it into a list "children[]".  Then we replace the
# string which was originally found in column 11 with a list.

aCoreComponentChildren = []
delimiter = "&#13;&#10;"   # delimiter if XML is escaped.
delimiterLen = 10
ComponentNumber = len(dat)
for x in range (0,ComponentNumber):
   dat[x][10] = re.sub("n/a","",dat[x][10])
   if len( dat[x][10]) > 3:
      # we have found a core component having children elements.
      # we need to parse them out of the string into a list, in order to
      # manage them, and identify their data types.
      bigString = dat[x][10]
      bigLen = len(bigString)
      startPos = 0
      endPos = string.find(bigString, delimiter, startPos, bigLen)
      if endPos == -1:
         # evidently there is no delimiter! (no CCs have only one child)
         print "I found child elements in " + (sys.argv[1]+".XML") + " as follows: " + dat[x][1] + ' ' + bigString + "  --BUT they are not delimited by " + delimiter +"."
         sys.exit()
      children = []
      while endPos != -1:
         children.append( bigString[startPos:endPos] )
         startPos = endPos + delimiterLen
         endPos = string.find(bigString, delimiter, startPos, bigLen)
      children.append( bigString[startPos:bigLen] )

      # some people append UIDs to their core component children, in parentheses.
      # if so, we need to delete those.
      for i in range (0, len(children)):
         ss = children[i]
         startPos = string.find(ss, '(', 0 , 222)
         if startPos == -1:
            continue
         endPos = string.find(ss, ')', 0 , 222) + 1
         #print startPos
         #print children[i]
         children[i] = ss[0:startPos] + ss[endPos:]

      # some people append four parameters to their core component children, 
      # using four semicolons like (Child) ; minOccurs; maxOccurs; order; maxLength
      # We need to capture those, to create XML Schemas.
      childparms = []
      for i in range (0, len(children)):
         childparms.append(['0','0','0','0'])
         ss = children[i]
         semi1  = string.find(ss, ';', 0 , 222)
         if semi1 == -1:
            continue
         children[i] = ss[0:semi1]
         semi2 = string.count(ss, ';')
         if semi2 < 4:
            outfile.write(dat[x][1] + " ERROR: child does not have 4 semicolons " + children[i] + '\n')
            continue       

         semi1 = semi1 + 1
         semi2 = string.find(ss, ';', semi1 , 222)
         if len(ss[semi1:semi2])>0:
            #print i, " The 1st parameter of ",dat[x][1], children[i], " is ", ss[semi1:semi2]
            #print childparms
            childparms[i][0] = ss[semi1:semi2]

         semi2 = semi2 + 1
         semi3 = string.find(ss, ';', semi2 , 222)
         if len(ss[semi2:semi3])>0:
            childparms[i][1] = ss[semi2:semi3]

         semi3 = semi3 + 1
         semi4 = string.find(ss, ';', semi3 , 222)
         if len(ss[semi3:semi4])>0:
            childparms[i][2] = ss[semi3:semi4]

         if len(ss[semi4:])>0:
            childparms[i][3] = ss[semi4+1:]
            
         
         #print i, " The parameters of ", children[i], " are ", childparms[i]
         #zzzzzzzzzzzzzzzzzz = raw_input("zzzzzzzzzzzzzzzzzz press any key to continue.")
         
      dat[x][14] = copy.copy( childparms )
     
      

      # Find the Type of each child because when you declare
      # the child element in an XML Schema you have to know its type:
      childrenTypes = []
      for i in range (0, len(children)):
         try:
            # print "Here are the i, children[i] for aCCNames.index(children[i]: ", i, children[i]
            j = aCCNames.index(children[i])
         except:
            outfile.write(dat[x][1] + " has invalid child " + children[i] + '\n')

         childrenTypes.append( aCCTypes[j] )

         #print i, " The Type of ", children[i], " is ", childrenTypes[i]



      # It will be a miracle if the script ever gets this far.
      # aCoreComponentChildren.append( [ dat[x][0] , children ] ) #this was a dumb idea, using UID for a key.

      # Store the list in the big array.
      # print dat[x], "before copying:"
      dat[x][10] = copy.copy( children )

      # print dat[x], "after copying:"

      #zzzzzzzzzzzz = raw_input("zzzzzzzzzzzz press any key to continue")

x = ComponentNumber + 1
print "Finished parsing children of " + str(x) + " components into lists."

ComponentNumber = len(dat)
outfile.write("Here is the list of " + str(x) + " DENs in the array. \n")
for x in range (0,ComponentNumber):
   outfile.write( str(x) + ' ' + dat[x][1] + '\n' )

outfile.close

# STEP 5:  Build "type images" -- XML scraps for Table 8-2 Content types.
# The purpose of this is to identify the children elements of each of the
# 16 Representation Terms, and the XML Schema simple types that they use,
# so that these children elements can be directly inherited by each Core Component.
# In other words, if GL. Amount is of type "Amount. Type" then there would
# be no element named "Amount.Type" but rather, the following would appear:
#
#   <xs:element name="GL.Amount">
#     <xs:complexType>
#       <xs:all>
#         <xs:element name="Amount.Content" type="xs:decimal">
#           <xs:annotation>
#             <xs:documentation>A number of monetary units...</xs:documentation>
#           </xs:annotation>
#         </xs:element>
#           <xs:annotation>
#             <xs:documentation>The ISO 4217 currency code of the Amount.</xs:documentation>
#           </xs:annotation>
#         <xs:element name="AmountCurrency.Identification.Code" type="xs:string" minOccurs="0"/>
#       </xs:all>
#     </xs:complexType>
#   </xs:element>

aContentNames = []  # DictionaryEntryName of each Content element.
aContentTypes = []  # XML type of each Content element.
aTypesNames = []  # DictionaryEntryName of each Represention Term (Type) element.
aTypesXML = []  # lines of text to include in XML schemas whenever this Type is used in a CC, or BIE.

ComponentNumber = len(dat)
for x in range (0,ComponentNumber):
   if dat[x][3] == "Content":         # i.e., if the "BasicOrAggregate" element says "Content"
      aContentNames.append(dat[x][1])        # i.e., the DictionaryEntryName element
      aContentTypes.append(dat[x][11])       # i.e., the XMLdatatype field.


# for x in range (0,len(aContentNames)):
#    print aContentNames[x],aContentTypes[x]

print "Writing XSD file.  After execution, read coreparse.err error log:"
# zzzzzzzzzzzz = raw_input("zzzzzzzzzzzz press any key to continue")

# Find each of the children of each RepTerm and build XML scraps.
# (I have already created a Core Components in the source file for each Represention Term
# and entered "Type" into "BasicOrAggregate" field, and put its children in the 11th column.)

prefix = '             <xs:element name="'
midfix = '" type="'
suffix = '"/>'
suffixOptional = '" minOccurs="0" maxOccurs="1"/>'
ComponentNumber = len(dat)
TypeCount = -1
for x in range (0,ComponentNumber):
   if dat[x][3] == "Type":      # i.e., if the "BasicOrAggregate" element is "Type"
      TypeCount = TypeCount + 1
      y = len(dat[x][10])  # number of children elements, i.e. Content of the CCT Type.
      # print dat[x][1],y
      # print dat[x][10]
      aTypesNames.append(dat[x][1])
      z = aContentNames.index(dat[x][10][0])
      aTypesXML.append(prefix + aContentNames[z] + midfix + aContentTypes[z] + suffix +'\n')
      for i in range (1,y):
         #print dat[x][10][i]
         z = aContentNames.index(dat[x][10][i])
         if aCCTypes[x] == "Content":
            aTypesXML[TypeCount]= aTypesXML[TypeCount] + prefix + aContentNames[z] + midfix + aContentTypes[z] + suffix +'\n'
         else:
            aTypesXML[TypeCount]= aTypesXML[TypeCount] + prefix + aContentNames[z] + midfix + aContentTypes[z] + suffixOptional +'\n'


x = len(aTypesNames)
outfile.write("Here is the list of " + str(x) + " XML definitions of CCTs (CC Types). \n")
for i in range (0,x):
   outfile.write( str(i) + ' ' + aTypesNames[i] + '\n' +aTypesXML[i]+ '\n' )



# STEP 6:  Analyze the Aggregate core components having 1 or 2 levels
outfile.write("\nSTEP 6b:  Analyze the Aggregate core components having 1 or 2 levels.\n")

# STEP 6a: some quick validation checks:
ComponentNumber = len(dat)
for x in range (0,ComponentNumber):
   if dat[x][3] == "Aggregate" and dat[x][8] != "Details":
      outfile.write(dat[x][1] + ' claims to be "Aggregate"!  RepTerm should be "Details".\n')
   if dat[x][3] != "Aggregate" and dat[x][8] == "Details":
      outfile.write(dat[x][1] + ' is not an "Aggregate"!  But RepTerm says "Details".\n')

# STEP 6b: Analyze the Aggregate core components having 1 or 2 levels.
outfile.write("\nSTEP 6b: Analyze the Aggregate core components having 1 or 2 levels.\n")

AggParentIndex = []   # list of dat[] indexes of Aggregates having aggregate child(ren).
AggParentNames = []
AggChildIndex = []   # list of lists of dat[] indexes of Children which are aggregates.
AggChildNames = []
AggChildDepth = []
AggIndex = -1
target = "Details"
ComponentNumber = len(dat)
for x in range (0,ComponentNumber):
   if dat[x][3] == "Aggregate":         # i.e., the "BasicOrAggregate" element.
      dat[x][12]= 1          # flag-- it has at least 1 level of Details.
      AggParentIndex.append(x)                 # the row number of this Core Component.
      AggParentNames.append( dat[x][1] )        # the DEN.
      AggChildNames.append( [] )
      AggChildDepth.append(1)                  # for the time being... it has 1 or more levels.
      AggIndex = AggIndex + 1
      y = len(dat[x][10])    # count of basic and possibly aggregate children.
      cnt = 0
      for i in range (0, y):
         ss1 = dat[x][10][i]   # name of the child.
         if string.find( ss1, target ) != -1:        # contains "Details"?
            cnt = cnt + 1
      if cnt > 0:
         dat[x][12]= 2
         AggChildDepth[AggIndex] = 2               # for the time being... it has 2 or more levels.
         for i in range (0, y):
            ss1 = dat[x][10][i]   # name of the child.
            if string.find( ss1, target ) != -1:        # contains "Details"
               outfile.write( dat[x][1] + ' is Aggregate and has aggregate child ' + dat[x][10][i] + '\n')
               AggChildNames[AggIndex].append( dat[x][10][i] )
               dat[x][12]= 2

outfile.write("\nHere is the list of Aggregates and depths. \n")
x = len(AggParentNames)
for i in range (0,x):
   ss0 = '\n'+ str(AggParentIndex[i])+' '+  AggParentNames[i] + ' has '+str(AggChildDepth[i])+ ' or more levels. \n'
   outfile.write( ss0 )
   for j in range (0, len(AggChildNames[i])):
      outfile.write(AggChildNames[i][j] +'  ')

def fetchAggregateChildNames( componentNumber ):
   ttarget = 'Details'
   xx = len(dat[componentNumber][10])
   AggregateChildren = []
   for yy in range (0,xx):
      if string.find(dat[componentNumber][10][yy],ttarget) != -1:
         AggregateChildren.append(dat[componentNumber][10][yy])
   return AggregateChildren



# STEP 6c:  Count the depth of Aggregate core components more than 3 levels deep.
outfile.write("\nSTEP 6c:  Count the depth of Aggregate core components more than 3 levels deep.\n")

x = len(AggParentNames)
for i in range (0,x):                            # for each AggParent
   if AggChildDepth[i] == 2:
      for j in range (0, len(AggChildNames[i])):     # for each child get its childlist.
         ss0 = AggChildNames[i][j]
         nn0 = aCCNames.index(ss0)
         #print 'Working on ', AggChildNames[i][j], ' and here is the grandkidlist:\n'
         grandkidlist = fetchAggregateChildNames( nn0 )
         #print grandkidlist
         if len(grandkidlist)>0:
            dat[AggParentIndex[i]][12]= 3     # for the time being... it has 3 or more levels.
            AggChildDepth[i] = 3
            for k in range (0, len(grandkidlist)):
               #print k
               grakids = copy.copy(grandkidlist)
               #print grakids[k]
               ss1 = grakids[k]
               nn1 = aCCNames.index(ss1)
               #print '---Working on ', grakids[k], ' and here is the grakids2:\n'
               grakids2 = fetchAggregateChildNames( nn1 )
               #print '---', grakids2
               if len(grakids2)>0:
                  dat[AggParentIndex[i]][12]= 4     # for the time being... it has 4 or more levels.
                  AggChildDepth[i] = 4
                  for l in range (0, len(grakids2)):
                     ss2 = grakids2[l]
                     nn2 = aCCNames.index(ss2)
                     #print '---Working on ', grakids2[l], ' and here is the grakids3:\n'
                     grakids3 = fetchAggregateChildNames( nn2 )
                     #print '---', grakids3
                     if len(grakids3)>0:
                        dat[AggParentIndex[i]][12]= 5     # for the time being... it has 5 or more levels.
                        AggChildDepth[i] = 5 
                        for m in range (0, len(grakids3)):
                          ss3 = grakids3[m]
                          nn3 = aCCNames.index(ss3)
                          #print '---Working on ', grakids3[m], ' and here is the grakids4:\n'
                          grakids4 = fetchAggregateChildNames( nn3 )
                          #print '---', grakids4
                          if len(grakids4)>0:
                             dat[AggParentIndex[i]][12]= 6     # for the time being... it has 6 or more levels.
                             AggChildDepth[i] = 6
                       
outfile.write("\nHere is the list of Aggregates having grandchildren that are aggregates. \n")
x = len(AggParentNames)
for i in range (0,x):
   if AggChildDepth[i] > 2:
      outfile.write('\n')
      ss0 = '\n'+ str(AggParentIndex[i])+' '+  AggParentNames[i] + ' has depth of '+str(AggChildDepth[i])+ '. \n'
      outfile.write( ss0 )
      for j in range (0, len(AggChildNames[i])):
         outfile.write(AggChildNames[i][j] +'  ')

outfile.write('\n\n')


# STEP 6d: Determine dependencies (identify elements to include in a custom XSD.)

bCustom = 0
if len( sys.argv ) > 2:
   if len( sys.argv[2] ) > 0:
      try:
         argv2row = aCCNames.index(sys.argv[2])
         bCustom = 1
         for i in range (0, len(dat)):
            dat[i][13] = 0    # flag every element as NOT to include in the XSD..
      except:
         print "Your parameter " + sys.argv[2] + " does not exist in " + sys.argv[1]+ ".xml - Cannot build custom XSD schema. \n" 
         outfile.write("Your parameter " + sys.argv[2] + " does not exist in " + sys.argv[1]+ ".xml - Cannot build custom XSD schema. \n" )
      
if bCustom == 1:
   outfile.write("\nSTEP 6d:  Determine dependencies--identify elements to include in a custom XSD. \n") 
   outfile.write( sys.argv[2] + " (element " + str(argv2row) + ") has depth of " + str(dat[argv2row][12]) +" \n")
 
   dat[argv2row][13] = 1  # to mark the root element for inclusion in the XSD schema. 

   for sChild in dat[argv2row][10]:
      i = aCCNames.index(sChild)
      outfile.write( sChild + " is a child. \n")
      dat[i][13] = 1  #  flags the children of the root element for inclusion in the XSD. 
      for sGrandchild in dat[i][10]:
         j = aCCNames.index(sGrandchild)
         outfile.write( sGrandchild + " is a grandchild within " + sChild + "\n")
         dat[j][13] = 1  #  flags the grandchildren of the root element for inclusion in the XSD. 
         for sGrtGrdChild in dat[j][10]:
            k = aCCNames.index(sGrtGrdChild)
            outfile.write( sGrtGrdChild + " is a greatgrandchild within " + sGrandchild + "\n")
            dat[k][13] = 1  #  flags the greatGrandchildren of the root element. 
            for sGrtGrtGrdChild in dat[k][10]:
               l = aCCNames.index(sGrtGrtGrdChild)
               outfile.write( sGrtGrtGrdChild + " is a GrGreatGrandchild within " + sGrtGrdChild + "\n")
               dat[l][13] = 1  #  flags the greatGreatGrandchildren of the root element. 
               for sGrtGrtGrtGrdChild in dat[l][10]:
                  m = aCCNames.index(sGrtGrtGrtGrdChild)
                  outfile.write( sGrtGrtGrtGrdChild + " is a GrGrGreatGrandchild within " + sGrtGrtGrdChild + "\n")
                  dat[m][13] = 1  #  flags the greatGreatGreatGrandchildren of the root element. 


#   for yy in range (0,xx):
#      i = aCCNames.index(dat[argv2row][10][yy])
#      dat[i][13] = 1  #  marks the children of the root element for inclusion in the XSD. 

ComponentCount = len(dat)
outfile.write("Here is the list of " + str(ComponentCount) + " components, and their depth, and dependency. \n")
for x in range (0,ComponentCount):
   outfile.write( str(x)+' ' + str(dat[x][12]) +' ' + str(dat[x][13]) +' '+ dat[x][1] + '\n' )


outfile.close


# STEP 7a:  Write an XML Schema-- prolog and Basic core components:

prefix = '  <xs:complexType name="'
midfix1 = '">\n    <xs:annotation><xs:documentation>\n'
midfix2 = '\n    </xs:documentation></xs:annotation>\n       <xs:sequence>\n'
suffix = '       </xs:sequence>\n   </xs:complexType>\n'


outfile = open (sys.argv[1]+".xsd","w")
outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
outfile.write('<!-- generated from Core Component dictionary by coreparse.py python script -->\n')
outfile.write('<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="unqualified">\n')

# First we will do the Basic components (having no children elements) (Text.Type, etc.)
outfile.write('<!-- Basic components having no children elements -->\n')

for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][3] == "Basic":         # i.e., if the "BasicOrAggregate" element says "Basic", it has no children.
      #print dat[x][8]
      z = aTypesNames.index((dat[x][8] + '.Type') )
      ss0 = prefix + dat[x][1] + midfix1 + dat[x][4] + dat[x][5]+ midfix2    + aTypesXML[z] + suffix
      outfile.write( ss0 )


# STEP 7b:  Write XML Schema Types for Aggregate core components with no Aggregate children elements.
# These are the easy Aggregate components (having no Aggregate children elements)

prefix = '  <xs:complexType name="'
midfix1 = '">\n        <xs:annotation><xs:documentation>\n'
midfix2 = '\n        </xs:documentation></xs:annotation>\n            <xs:sequence>\n'
suffix = '            </xs:sequence>\n        </xs:complexType>\n'

prefixB = '             <xs:element name="'
midfixB = '" type="'
#suffixB = '" minOccurs="0"/>\n'
suffixB = '" minOccurs="'
suffixC = '" maxOccurs="'
suffixD = '"/>\n'

outfile.write('<!-- Easy aggregate components (having no children that are aggregates) -->\n')

for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][12] == 1:         # i.e., Aggregate tested above to have no detailed children.
      #print dat[x][8]
      #z = aTypesNames.index((dat[x][8] + '.Type') )
      ss0 = prefix + dat[x][1] + midfix1 + dat[x][4] +' '+ dat[x][5]+ midfix2
      outfile.write( ss0 )
      k = len(dat[x][10])

      for i in range (0,k):
         if dat[x][14][i][1] == '*':
            maxOcc = 'unbounded'
         else:
            maxOcc = dat[x][14][i][1]            
         ss0 = prefixB + dat[x][10][i] + midfixB + dat[x][10][i]  +suffixB + dat[x][14][i][0] + suffixC + maxOcc + suffixD
         outfile.write( ss0 )

      outfile.write('            </xs:sequence>\n       </xs:complexType>\n')

# STEP 7c:  Write XML Schema Types for Aggregate components having children that are Aggregates.

outfile.write('<!-- Multi-level aggregate components (having children that are aggregates) -->\n')

for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][12] > 1:
      #print dat[x][8]
      #z = aTypesNames.index((dat[x][8] + '.Type') )
      ss0 = prefix + dat[x][1] + midfix1 + dat[x][4] +' '+ dat[x][5]+ midfix2
      outfile.write( ss0 )
      k = len(dat[x][10])
      for i in range (0,k):
         if dat[x][14][i][1] == '*':
            maxOcc = 'unbounded'
         else:
            maxOcc = dat[x][14][i][1]            

         ss0 = prefixB + dat[x][10][i] + midfixB + dat[x][10][i]  +suffixB+ dat[x][14][i][0] + suffixC + maxOcc + suffixD
         outfile.write( ss0 )

      outfile.write('            </xs:sequence>\n       </xs:complexType>\n')

outfile.write('</xs:schema>\n')
outfile.close


# STEP 8a:  Write HTML header/prolog to document the same core components in HTML.

htmlStart =  """
<html>
<head>
<title>General Ledger Information Entities (GLIEs) - - - dump</title>
</head>
<body style="font-family: Verdana; font-size: 10pt"><center>
<font face="Times New Roman" size="2">The <a href="http://www.arapxml.net">AR/AP Project</a> proudly presents</font><br>
<font size="4"><b>General Ledger Information Entities (GLIEs) </font><font size="3"><br>
"""


htmlTablehead = """
</b></font><br><font size="2"><center>See <a href="coreParse.htm">coreParse.htm</a> for information about these entities. 
The following HTML table was <br>generated from the registry by coreparse2.py script. 
Basic Core Components are listed first, then Aggregates.  </font></center><br>

<TABLE DIR=LTR BORDER="1" cellspacing="0" width=987>
<tr> 
 <td width=58 valign=bottom><p><b><font face="Times New Roman" size="2"> UID</font></b></td>
 <td width=90 valign=bottom><p><b><font face="Times New Roman" size="2"> Dictionary Entry Name</font></b></td>
 <td width=51 valign=bottom><p><b><font face="Times New Roman" size="2"> CCT Used</font></b></td>
 <td width=60 valign=bottom><p><b><font face="Times New Roman" size="2"> Basic or Aggregt</font></b></td>
 <td width=153 valign=bottom><p><b><font face="Times New Roman" size="2"> definition</font></b></td>
 <td width=163 valign=bottom><p><b><font face="Times New Roman" size="2"> remarks</font></b></td>
 <td width=30 valign=bottom><p><b><font face="Times New Roman" size="1"> Object Class</font></b></td>
 <td width=30 valign=bottom><p><b><font face="Times New Roman" size="1"> Property Term</font></b></td>
 <td width=30 valign=bottom ><p><b><font face="Times New Roman" size="1">Represent<br> ationTerm</font></b></td>
 <td width=20 valign=bottom><p><b><font face="Times New Roman" size="1"> Busns Terms</font></b></td>
 <td width=168 valign=bottom><p><b><font face="Times New Roman" size="2"> Core Component Children</font></b></td>
 </tr> 

"""

cellBegin = []
cellBegin.append('<tr> <td width=58 valign="top"><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=90 valign="top"><p><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=51 valign="top"><p><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=60 valign="top"><p><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=153 valign="top"><p><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=163 valign="top"><p><font face="Times New Roman" size="2">')
cellBegin.append(' <td width=30 valign="top"><p><font face="Times New Roman" size="1">')
cellBegin.append(' <td width=30 valign="top"><p><font face="Times New Roman" size="1">')
cellBegin.append(' <td width=30 valign="top"><p><font face="Times New Roman" size="1">')
cellBegin.append(' <td width=20 valign="top"><p><font face="Times New Roman" size="1">')
cellBegin.append(' <td width=168 valign="top"><p><font face="Times New Roman" size="2">')

cellEnd = '</font></td>\n'
rowEnd = '</tr>\n'

htmlEnd = '</TABLE>\n</body>\n</html>\n'


outfile = open (sys.argv[1]+".htm","w")
outfile.write(htmlStart)
if bCustom == 1:
   outfile.write( sys.argv[2] + " and its components. \n")
else:
   outfile.write("(complete listing)")

outfile.write(htmlTablehead)
# STEP 8b:  Write all the data elements/components into an HTML table.

# First, the strings are so long the table is hard to read or print.  Insert Spaces so they will wrap in the browser.
for x in range (1,ComponentCount):
   dat[x][1] =string.replace(dat[x][1], '.', '. ')
   dat[x][2] =string.replace(dat[x][2], '.', '. ')
   ss = dat[x][6]
   dat[x][6] = ss[0:12] + '<br>' + ss [12:]
   ss = dat[x][7]
   dat[x][7] = ss[0:12] + '<br>' + ss [12:]
   dat[x][9] = '&nbsp;'  #business terms.


outfile.write('<!-- Basic components having no children elements -->\n')
for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][3] == "Basic":  
      for i in range (0,10):
         if len(dat[x][i]) == 0: 
            outfile.write( cellBegin[i] + '&nbsp;' + cellEnd )
         else:
            outfile.write( cellBegin[i] + dat[x][i] + cellEnd )
      ss0 = ''
      for eachChild in dat[x][10]:
         ss0 = ss0 + eachChild + '<br>'
      n = len(ss0) - 4 # get rid of the last <br>
      outfile.write( cellBegin[10] + ss0[0:n] +'&nbsp;' + cellEnd + rowEnd)

outfile.write('<!-- Easy aggregate components having no children that are aggregates -->\n')
for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][12] == 1: 
      for i in range (0,10):
         if len(dat[x][i]) == 0: 
            outfile.write( cellBegin[i] + '&nbsp;' + cellEnd )
         else:
            outfile.write( cellBegin[i] + dat[x][i] + cellEnd )
      ss0 = ''
      for eachChild in dat[x][10]:
         ss0 = ss0 + eachChild + '<br>'
      n = len(ss0) - 4 # get rid of the last <br>
      outfile.write( cellBegin[10] + ss0[0:n] +'&nbsp;' + cellEnd + rowEnd)

outfile.write('<!-- Aggregate components having children that are aggregates -->\n')
for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][12] > 1:
      for i in range (0,10):
         if len(dat[x][i]) == 0: 
            outfile.write( cellBegin[i] + '&nbsp;' + cellEnd )
         else:
            outfile.write( cellBegin[i] + dat[x][i] + cellEnd )
      ss0 = ''
      for eachChild in dat[x][10]:
         ss0 = ss0 + eachChild + '<br>'
      n = len(ss0) - 4 # get rid of the last <br>
      outfile.write( cellBegin[10] + ss0[0:n] +'&nbsp;' + cellEnd + rowEnd)
      
outfile.write('<!-- Core Component Types (CCTs) -->\n')
for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][3] == "Type":
      for i in range (0,10):
         if len(dat[x][i]) == 0: 
            outfile.write( cellBegin[i] + '&nbsp;' + cellEnd )
         else:
            outfile.write( cellBegin[i] + dat[x][i] + cellEnd )
      ss0 = ''
      for eachChild in dat[x][10]:
         ss0 = ss0 + eachChild + '<br>'
      n = len(ss0) - 4 # get rid of the last <br>
      outfile.write( cellBegin[10] + ss0[0:n] +'&nbsp;' + cellEnd + rowEnd)

outfile.write('<!-- Content/Supplemental components -->\n')
for x in range (1,ComponentCount):
   if dat[x][13] == 1 and dat[x][3] == "Content":
      for i in range (0,10):
         if len(dat[x][i]) == 0: 
            outfile.write( cellBegin[i] + '&nbsp;' + cellEnd )
         else:
            outfile.write( cellBegin[i] + dat[x][i] + cellEnd )
      ss0 = ''
      for eachChild in dat[x][10]:
         ss0 = ss0 + eachChild + '<br>'
      n = len(ss0) - 4 # get rid of the last <br>
      outfile.write( cellBegin[10] + ss0[0:n] +'&nbsp;' + cellEnd + rowEnd)


outfile.write(htmlEnd)
outfile.close


print "Yee haa!  finished."
#sys.exit()


