IPP XTD | Python Fiddle

#!/usr/bin/env python3
#IPP - Projekt c.2 - Prevod XML na DDL
#XTD:xsimek25

import sys
from xml.dom.minidom import parse
import argparse

#slovnik pro zaznamenani vyskytu potomku
appearances={}

#objekt pro element
class Element:
    def __init__(self,name,node):
        self.elementName = name
        self.elementNode = node
        self.children = {}
        self.childrenappear = {}
        self.attributes = {}
        self.value = {}
        self.etc=8888
        self.useetc=False
        self.barg=False
    
    #funkce pro pridani hodnoty
    def addvalue(self,value):
        self.value[value.lower()] = value
	
    #funkce pro finalni vypis na vystup
    def printout(self,outputfile,elementlist):
        outputfile.write("CREATE TABLE " + self.elementName + "( \n\tPRK_" + self.elementName + "_ID INT PRIMARY KEY")
        for child in self.children.keys():
            if child in self.childrenappear.keys():
                maxcount=self.childrenappear[child.lower()]
                #pokud je pocet vyskytu potomka vetsi nez 1
                if maxcount > 1:
                    #je spusten parametr -b
                    if self.barg is False:
                        #je spusten parametr --etc
                        if self.useetc is True:
                            if maxcount <= int(self.etc):
                                #cyklus pro vypis jednotlivych potomku s prislusnym indexem
                                for item in range(1,maxcount+1):
                                    outputfile.write(",\n\t" + child + str(item) + "_ID INT")
                            else:
                                if self.children[child.lower()] is not 0:
                                    outputfile.write(",\n\t" + child + "_ID INT")
                        else:
                            for item in range(1,maxcount+1):
                                outputfile.write(",\n\t" + child + str(item) + "_ID INT")
                    else:
                        if self.children[child.lower()] is not 0:
                            outputfile.write(",\n\t" + child + "_ID INT")
                else:
                    if self.children[child.lower()] is not 0:
                        outputfile.write(",\n\t" + child + "_ID INT")
		#vypis atributu                        
        for attr in self.attributes.keys():
            outputfile.write(",\n\t" + attr + " " + self.attributes[attr])
        #vypis hodnoty
        if len(self.value)>0:
            outputfile.write(",\n\t" + "value " + self.gettype())
        outputfile.write("\n);\n\n")
	#detekce datoveho typu hodnoty
    def gettype(self):
        for value in self.value.values():
            newvalue = value.lower()
            type = "NTEXT"
            try:
                int(newvalue)
                type = "INT"
            except:
                pass
            try:
                if float(newvalue):
                    type = "FLOAT"
            except:
                pass
            if (newvalue.strip()=='true') or (newvalue.strip()=='false') or (newvalue.strip()=='1') or (newvalue.strip()=='0') or (newvalue.strip()==''):
                type = "BIT"
        return type
	#detekce datoveho typu atributu
    def gettypeattribute(self,attribute):
        attributevalue=attribute.value.lower()
        type = "NVARCHAR"            
        try:
            int(attributevalue)
            type = "INT"
        except:
            pass
        try:
            float(attributevalue)
            if float(attributevalue).is_integer():
                type = "INT"
            else:
                type = "FLOAT"
        except:
            pass
        if (attributevalue.strip()=='true') or (attributevalue.strip()=='false') or (attributevalue.strip()=='1') or (attributevalue.strip()=='0') or (attributevalue.strip()==''):
            type = "BIT"
        return type

#hlavni funkce pro zpracovani argumentu v prikazovem radku
def argumentsparse():
    argumentparser = argparse.ArgumentParser(add_help=False)
    argumentparser.add_argument('-h', '--help', action='count', default=0, help='vypis napovedy')
    argumentparser.add_argument("--input", action='append', dest='input', default=[], help='zadany vstupni soubor ve formatu XML')
    argumentparser.add_argument('--output', action='append', dest='output', default=[], help='zadany vystupni soubor')
    argumentparser.add_argument('--header', action='append', dest='header', default=[],
                                help='na zacatek vstupniho souboru se vlozi zakomentovana hlavicka')
    argumentparser.add_argument('--isvalid', action='append', dest='isvalid', default=[],
                                help='kontrola vstupniho souboru (rozsireni)')
    argumentparser.add_argument('--etc', action='append', dest='etc', default=[],
                                help='n urcuje maximalni pocet sloupcu vzniklych se stejnojm. podelementu')
    argumentparser.add_argument('-a', action='store_true', default=False,
                                help='nebudou se generovat sloupce z atributu ve vstupnim XML souboru')
    argumentparser.add_argument('-b', action='store_true', default=False,
                                help='pokud bude element obsahovat vice podelementu stejneho nazvu, bude se uvazovat jako jediny, nelze kombinovat s --etc')
    argumentparser.add_argument('-g', action='store_true', default=False,
                                help='lze jej uplatnit se vsemi prepinaci krome --help, vystupnim souborem bude xml daneho tvaru')
    try:
        arguments = argumentparser.parse_args()
    except:
        sys.exit(1)
    if arguments.help == 1:
        if len(sys.argv) == 2:
            argumentparser.print_help()
            sys.exit(0)
        else:
            sys.exit(1)
    if arguments.help > 1:
        sys.exit(1)
    if len(arguments.input) > 1:
        sys.exit(1)
    if len(arguments.output) > 1:
        sys.exit(1)
    if len(arguments.header) > 1:
        sys.exit(1)
    if len(arguments.etc) > 1:
        sys.exit(1)
    if arguments.a > 1:
        sys.exit(1)
    if arguments.b > 1:
        sys.exit(1)
    if arguments.g > 1:
        sys.exit(1)
    #parametry -b a --etc nelze kombinovat
    if (arguments.b > 0) and (len(arguments.etc) > 0):
        sys.exit(1)
    return arguments

#funkce pro pripravu struktury se zadanou hodnotou parametru --etc
def testEtc(elementlist,etc):
    for element in elementlist.values():
        for child in element.children.keys():
            if child in element.childrenappear.keys():
                maxcount=element.childrenappear[child.lower()]
                if maxcount > etc:
                    related=elementlist[child.lower()]
                    related.children[element.elementName.lower()]=element.elementNode
                    related.childrenappear[element.elementName.lower()]=1
                    element.children[child.lower()]=0
				if etc == 0:
                    related=elementlist[child.lower()]
                    related.children[element.elementName.lower()]=element.elementNode
                    related.childrenappear[element.elementName.lower()]=0
                    element.children[child.lower()]=0
#funkce pro detekci konfliktu                    
def testConflicts(elementlist):
    for element in elementlist.values():
        for child in element.children.keys():
            #konflikt elementu a potomka
            if "PRK_"+element.elementName+"ID" == child+"_ID":
                sys.exit(90)
            for attribute in element.attributes.keys():
                #konflikt elementu a atributu
                if "PRK_"+element.elementName+"ID" == attribute:
                    sys.exit(90)
                #konflikt potomka a atributu
                if child+"_ID"==attribute:
                    sys.exit(90)

#funkce pro vyhodnoceni vztahu pro parametr -g                    
def relationshandle(elementlist):
    #prepinac pro udrzeni vyhledavani
    keepsearch = True
    #slovnik pro zaznamenani vztahu
    relations = {}
    for element in elementlist.keys():
        tempelem = elementlist[element]
        if element not in relations.keys():
            relations[element] = {}
        for child in tempelem.children.keys():
            if element != child:
                if child in relations.keys():
                    if element not in relations[child].keys():
                        relations[element][child]="N:1"
                        relations[child][element]="1:N"
                    else:
                        relations[element][child]="N:M"
                        relations[child][element]="N:M"
                else:
                    relations[element][child]="N:1"
                    relations[child]={}
                    relations[child][element]="1:N"
	#cyklus pro zaznamenani neanalyzovanych vztahu                    
    while keepsearch:
        keepsearch = False
        #temp slovnik pro vztah
        tempdict = {}
        for element in relations.keys():
            for child in relations[element].keys():
                for rel in relations[child].keys():
                    if rel not in relations[element].keys():
                        keepsearch = True
                        if relations[element][child] == relations[child][rel]:
                            relation = relations[element][child]
                        else:
                            relation = "N:M"
                        if not element in tempdict:
                            tempdict[element] = {}
                        tempdict[element][rel] = relation                        
        for tempmember in tempdict.keys():
            #ulozeni vztahu do hlavniho slovniku relations
            for item in tempdict[tempmember]:
                if tempmember in relations:
                    relations[tempmember][item] = tempdict[tempmember][item]
                else:
                    relations[tempmember] = {}
                    relations[tempmember][item] = tempdict[tempmember][item]
    return relations
#funkce pro validaci vstupniho souboru
def validate(arguments,elementlist):
    try:
        inputfile = open(arguments.isvalid[0])
    except:
        sys.exit(2)
    xmldom=parse(inputfile)
    catalog = xmldom.firstChild
    validate_elementlist = {}
    for node in catalog.childNodes:
        rootnames.append(node.nodeName)
        temproot = node
        xmlparse(node,temproot,validate_elementlist,arguments)
	#test na konflikty v nazvech
    testConflicts(validate_elementlist) 
    #zda lze vlozit strukturu do stavajici struktury
    for table in validate_elementlist.values():
        if table not in elementlist.values():
            return False
        for temptable in elementlist.values():
            for child in table.children.keys():
                if child not in temptable.children.keys():
                    return False
			for attribute in table.attributes.keys():
				if attribute not in temptable.attributes.keys():
                    return False
	return True

#funkce pro generovani vysledneho xml souboru na vystup
def relationstoout(relations,arguments,outputfile):
    if len(arguments.header)==1:
        outputfile.write("--"+arguments.header[0]+"\n\n")
    outputfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    outputfile.write("<tables>\n")
    for item in relations:
        outputfile.write('\t<table name="'+item+'">\n')
        outputfile.write('\t\t<relation to="'+item+'" relation_type="1:1" />\n')
        for deepitem in relations[item]:
            if deepitem != item:
                outputfile.write('\t\t<relation to="'+deepitem+'" relation_type="'+relations[item][deepitem]+'" />\n')
        outputfile.write('\t</table>\n')
    outputfile.write("</tables>\n")

#hlavni funkce pro parsovani vstupniho xml souboru
def xmlparse(root,temproot,elementlist,arguments):
    if root.nodeType == 1:
    	#korenovy uzel je typu uzel
        if root.nodeName not in elementlist.keys():
            newelement = Element(root.nodeName,root)
            elementlist[root.nodeName.lower()]=newelement
        else:
            newelement=elementlist[root.nodeName.lower()]
        if len(arguments.etc)==1:
            newelement.etc=int(arguments.etc[0])
            newelement.useetc = True
        if arguments.b is True:
            newelement.barg=True

if root.nodeType == 1:
        for child in root.childNodes:
            if child.nodeType == 1:
                tempcount=0
                for node in child.parentNode.getElementsByTagName(child.nodeName):
                    tempcount += 1
                if child.nodeName.lower() not in appearances.keys():
                    appearances[child.nodeName.lower()] = tempcount
                else:
                    if appearances[child.nodeName.lower()] < tempcount:
                        appearances[child.nodeName.lower()] = tempcount
                newelement.children[child.nodeName]=child
                newelement.childrenappear[child.nodeName.lower()]=tempcount
        if (arguments.a)==False:
            for i in range(len(root.attributes)):
                atname=root.attributes.item(i).name.lower()
                newelement.attributes[atname]=newelement.gettypeattribute(root.attributes.item(i))
    if root.hasChildNodes():
        if root.firstChild.nodeType==3:
            #uzel je typu text(hodnota)
            if len(root.firstChild.data.strip())>0:
                newelement.addvalue(root.firstChild.data)
    for child in root.childNodes:
        xmlparse(child,temproot,elementlist,arguments)

#hlavni funkce skriptu        
def main():
    arguments = argumentsparse()
    #analyza zadaneho vstupniho souboru
    if len(arguments.input) == 1:
        try:
            inputfile = open(arguments.input[0], 'r')
        except:
            sys.exit(2)
    else:
        #prepnuti vstupu na stdin
        inputfile = sys.stdin
    #analyza zadaneho vystupniho souboru
    if len(arguments.output) == 1:
        try:
            outputfile = open(arguments.output[0], 'w')
        except:
            sys.exit(3)
    else:
        #prepnuti vystupu na prikazovy radek
        outputfile = sys.stdout
    if len(arguments.header) == 1:
        outputfile.write("--"+arguments.header[0]+"\n\n")
    #hlavni slovnik pro uchovani uzlu (klic=nazev uzlu, hodnota=objekt elementu)
    elementlist = {}
    siblings = {}
    rootnames = []
    xmldom = parse(inputfile)
    catalog = xmldom.firstChild
    #cyklus pro parsovani vsech uzlu v souboru
    for node in catalog.childNodes:
        rootnames.append(node.nodeName)
        temproot = node
        xmlparse(node,temproot,elementlist,arguments)
	#test na konflikty v nazvech
    testConflicts(elementlist)
	
    #je spusten parametr -g => vystupem je xml soubor obsahujici vztahy mezi jednotlivymi tabulkami
    if arguments.g is True:
        if len(arguments.etc) == 1:
            testEtc(elementlist,int(arguments.etc[0]))
        relations=relationshandle(elementlist)
        relationstoout(relations,arguments,outputfile)
        sys.exit(0)
	#je spusten parametr --etc
    if len(arguments.etc) == 1:
        testEtc(elementlist,int(arguments.etc[0]))
	#finalni vypis tabulkove struktury na vystup
    if len(arguments.isvalid)==1:
        if validate(arguments,elementlist):
            for element in elementlist.values():
                element.printout(outputfile,elementlist)
		else:
            sys.exit(91)
    else:
    	for element in elementlist.values():
        	element.printout(outputfile,elementlist)
#zavolani hlavni funkce main pri spusteni skriptu
if __name__ == "__main__":
    main()

Python Fiddle

Python Cloud IDE