#!/usr/bin/env python3 #IPP - Projekt c.2 - Prevod XML na DDL #XTD:xsimek25 import sys from xml.dom.minidom import parse import argparse #slovnik pro zaznamenani vyskytu potomku appearances={} #objekt pro element class Element: def __init__(self,name,node): self.elementName = name self.elementNode = node self.children = {} self.childrenappear = {} self.attributes = {} self.value = {} self.etc=8888 self.useetc=False self.barg=False #funkce pro pridani hodnoty def addvalue(self,value): self.value[value.lower()] = value #funkce pro finalni vypis na vystup def printout(self,outputfile,elementlist): outputfile.write("CREATE TABLE " + self.elementName + "( \n\tPRK_" + self.elementName + "_ID INT PRIMARY KEY") for child in self.children.keys(): if child in self.childrenappear.keys(): maxcount=self.childrenappear[child.lower()] #pokud je pocet vyskytu potomka vetsi nez 1 if maxcount > 1: #je spusten parametr -b if self.barg is False: #je spusten parametr --etc if self.useetc is True: if maxcount <= int(self.etc): #cyklus pro vypis jednotlivych potomku s prislusnym indexem for item in range(1,maxcount+1): outputfile.write(",\n\t" + child + str(item) + "_ID INT") else: if self.children[child.lower()] is not 0: outputfile.write(",\n\t" + child + "_ID INT") else: for item in range(1,maxcount+1): outputfile.write(",\n\t" + child + str(item) + "_ID INT") else: if self.children[child.lower()] is not 0: outputfile.write(",\n\t" + child + "_ID INT") else: if self.children[child.lower()] is not 0: outputfile.write(",\n\t" + child + "_ID INT") #vypis atributu for attr in self.attributes.keys(): outputfile.write(",\n\t" + attr + " " + self.attributes[attr]) #vypis hodnoty if len(self.value)>0: outputfile.write(",\n\t" + "value " + self.gettype()) outputfile.write("\n);\n\n") #detekce datoveho typu hodnoty def gettype(self): for value in self.value.values(): newvalue = value.lower() type = "NTEXT" try: int(newvalue) type = "INT" except: pass try: if float(newvalue): type = "FLOAT" except: pass if (newvalue.strip()=='true') or (newvalue.strip()=='false') or (newvalue.strip()=='1') or (newvalue.strip()=='0') or (newvalue.strip()==''): type = "BIT" return type #detekce datoveho typu atributu def gettypeattribute(self,attribute): attributevalue=attribute.value.lower() type = "NVARCHAR" try: int(attributevalue) type = "INT" except: pass try: float(attributevalue) if float(attributevalue).is_integer(): type = "INT" else: type = "FLOAT" except: pass if (attributevalue.strip()=='true') or (attributevalue.strip()=='false') or (attributevalue.strip()=='1') or (attributevalue.strip()=='0') or (attributevalue.strip()==''): type = "BIT" return type #hlavni funkce pro zpracovani argumentu v prikazovem radku def argumentsparse(): argumentparser = argparse.ArgumentParser(add_help=False) argumentparser.add_argument('-h', '--help', action='count', default=0, help='vypis napovedy') argumentparser.add_argument("--input", action='append', dest='input', default=[], help='zadany vstupni soubor ve formatu XML') argumentparser.add_argument('--output', action='append', dest='output', default=[], help='zadany vystupni soubor') argumentparser.add_argument('--header', action='append', dest='header', default=[], help='na zacatek vstupniho souboru se vlozi zakomentovana hlavicka') argumentparser.add_argument('--isvalid', action='append', dest='isvalid', default=[], help='kontrola vstupniho souboru (rozsireni)') argumentparser.add_argument('--etc', action='append', dest='etc', default=[], help='n urcuje maximalni pocet sloupcu vzniklych se stejnojm. podelementu') argumentparser.add_argument('-a', action='store_true', default=False, help='nebudou se generovat sloupce z atributu ve vstupnim XML souboru') argumentparser.add_argument('-b', action='store_true', default=False, help='pokud bude element obsahovat vice podelementu stejneho nazvu, bude se uvazovat jako jediny, nelze kombinovat s --etc') argumentparser.add_argument('-g', action='store_true', default=False, help='lze jej uplatnit se vsemi prepinaci krome --help, vystupnim souborem bude xml daneho tvaru') try: arguments = argumentparser.parse_args() except: sys.exit(1) if arguments.help == 1: if len(sys.argv) == 2: argumentparser.print_help() sys.exit(0) else: sys.exit(1) if arguments.help > 1: sys.exit(1) if len(arguments.input) > 1: sys.exit(1) if len(arguments.output) > 1: sys.exit(1) if len(arguments.header) > 1: sys.exit(1) if len(arguments.etc) > 1: sys.exit(1) if arguments.a > 1: sys.exit(1) if arguments.b > 1: sys.exit(1) if arguments.g > 1: sys.exit(1) #parametry -b a --etc nelze kombinovat if (arguments.b > 0) and (len(arguments.etc) > 0): sys.exit(1) return arguments #funkce pro pripravu struktury se zadanou hodnotou parametru --etc def testEtc(elementlist,etc): for element in elementlist.values(): for child in element.children.keys(): if child in element.childrenappear.keys(): maxcount=element.childrenappear[child.lower()] if maxcount > etc: related=elementlist[child.lower()] related.children[element.elementName.lower()]=element.elementNode related.childrenappear[element.elementName.lower()]=1 element.children[child.lower()]=0 if etc == 0: related=elementlist[child.lower()] related.children[element.elementName.lower()]=element.elementNode related.childrenappear[element.elementName.lower()]=0 element.children[child.lower()]=0 #funkce pro detekci konfliktu def testConflicts(elementlist): for element in elementlist.values(): for child in element.children.keys(): #konflikt elementu a potomka if "PRK_"+element.elementName+"ID" == child+"_ID": sys.exit(90) for attribute in element.attributes.keys(): #konflikt elementu a atributu if "PRK_"+element.elementName+"ID" == attribute: sys.exit(90) #konflikt potomka a atributu if child+"_ID"==attribute: sys.exit(90) #funkce pro vyhodnoceni vztahu pro parametr -g def relationshandle(elementlist): #prepinac pro udrzeni vyhledavani keepsearch = True #slovnik pro zaznamenani vztahu relations = {} for element in elementlist.keys(): tempelem = elementlist[element] if element not in relations.keys(): relations[element] = {} for child in tempelem.children.keys(): if element != child: if child in relations.keys(): if element not in relations[child].keys(): relations[element][child]="N:1" relations[child][element]="1:N" else: relations[element][child]="N:M" relations[child][element]="N:M" else: relations[element][child]="N:1" relations[child]={} relations[child][element]="1:N" #cyklus pro zaznamenani neanalyzovanych vztahu while keepsearch: keepsearch = False #temp slovnik pro vztah tempdict = {} for element in relations.keys(): for child in relations[element].keys(): for rel in relations[child].keys(): if rel not in relations[element].keys(): keepsearch = True if relations[element][child] == relations[child][rel]: relation = relations[element][child] else: relation = "N:M" if not element in tempdict: tempdict[element] = {} tempdict[element][rel] = relation for tempmember in tempdict.keys(): #ulozeni vztahu do hlavniho slovniku relations for item in tempdict[tempmember]: if tempmember in relations: relations[tempmember][item] = tempdict[tempmember][item] else: relations[tempmember] = {} relations[tempmember][item] = tempdict[tempmember][item] return relations #funkce pro validaci vstupniho souboru def validate(arguments,elementlist): try: inputfile = open(arguments.isvalid[0]) except: sys.exit(2) xmldom=parse(inputfile) catalog = xmldom.firstChild validate_elementlist = {} for node in catalog.childNodes: rootnames.append(node.nodeName) temproot = node xmlparse(node,temproot,validate_elementlist,arguments) #test na konflikty v nazvech testConflicts(validate_elementlist) #zda lze vlozit strukturu do stavajici struktury for table in validate_elementlist.values(): if table not in elementlist.values(): return False for temptable in elementlist.values(): for child in table.children.keys(): if child not in temptable.children.keys(): return False for attribute in table.attributes.keys(): if attribute not in temptable.attributes.keys(): return False return True #funkce pro generovani vysledneho xml souboru na vystup def relationstoout(relations,arguments,outputfile): if len(arguments.header)==1: outputfile.write("--"+arguments.header[0]+"\n\n") outputfile.write('<?xml version="1.0" encoding="UTF-8"?>\n') outputfile.write("<tables>\n") for item in relations: outputfile.write('\t<table name="'+item+'">\n') outputfile.write('\t\t<relation to="'+item+'" relation_type="1:1" />\n') for deepitem in relations[item]: if deepitem != item: outputfile.write('\t\t<relation to="'+deepitem+'" relation_type="'+relations[item][deepitem]+'" />\n') outputfile.write('\t</table>\n') outputfile.write("</tables>\n") #hlavni funkce pro parsovani vstupniho xml souboru def xmlparse(root,temproot,elementlist,arguments): if root.nodeType == 1: #korenovy uzel je typu uzel if root.nodeName not in elementlist.keys(): newelement = Element(root.nodeName,root) elementlist[root.nodeName.lower()]=newelement else: newelement=elementlist[root.nodeName.lower()] if len(arguments.etc)==1: newelement.etc=int(arguments.etc[0]) newelement.useetc = True if arguments.b is True: newelement.barg=True if root.nodeType == 1: for child in root.childNodes: if child.nodeType == 1: tempcount=0 for node in child.parentNode.getElementsByTagName(child.nodeName): tempcount += 1 if child.nodeName.lower() not in appearances.keys(): appearances[child.nodeName.lower()] = tempcount else: if appearances[child.nodeName.lower()] < tempcount: appearances[child.nodeName.lower()] = tempcount newelement.children[child.nodeName]=child newelement.childrenappear[child.nodeName.lower()]=tempcount if (arguments.a)==False: for i in range(len(root.attributes)): atname=root.attributes.item(i).name.lower() newelement.attributes[atname]=newelement.gettypeattribute(root.attributes.item(i)) if root.hasChildNodes(): if root.firstChild.nodeType==3: #uzel je typu text(hodnota) if len(root.firstChild.data.strip())>0: newelement.addvalue(root.firstChild.data) for child in root.childNodes: xmlparse(child,temproot,elementlist,arguments) #hlavni funkce skriptu def main(): arguments = argumentsparse() #analyza zadaneho vstupniho souboru if len(arguments.input) == 1: try: inputfile = open(arguments.input[0], 'r') except: sys.exit(2) else: #prepnuti vstupu na stdin inputfile = sys.stdin #analyza zadaneho vystupniho souboru if len(arguments.output) == 1: try: outputfile = open(arguments.output[0], 'w') except: sys.exit(3) else: #prepnuti vystupu na prikazovy radek outputfile = sys.stdout if len(arguments.header) == 1: outputfile.write("--"+arguments.header[0]+"\n\n") #hlavni slovnik pro uchovani uzlu (klic=nazev uzlu, hodnota=objekt elementu) elementlist = {} siblings = {} rootnames = [] xmldom = parse(inputfile) catalog = xmldom.firstChild #cyklus pro parsovani vsech uzlu v souboru for node in catalog.childNodes: rootnames.append(node.nodeName) temproot = node xmlparse(node,temproot,elementlist,arguments) #test na konflikty v nazvech testConflicts(elementlist) #je spusten parametr -g => vystupem je xml soubor obsahujici vztahy mezi jednotlivymi tabulkami if arguments.g is True: if len(arguments.etc) == 1: testEtc(elementlist,int(arguments.etc[0])) relations=relationshandle(elementlist) relationstoout(relations,arguments,outputfile) sys.exit(0) #je spusten parametr --etc if len(arguments.etc) == 1: testEtc(elementlist,int(arguments.etc[0])) #finalni vypis tabulkove struktury na vystup if len(arguments.isvalid)==1: if validate(arguments,elementlist): for element in elementlist.values(): element.printout(outputfile,elementlist) else: sys.exit(91) else: for element in elementlist.values(): element.printout(outputfile,elementlist) #zavolani hlavni funkce main pri spusteni skriptu if __name__ == "__main__": main()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question