'''
Parser de fichiers XML

Fournit un dictionnaire des données sous la forme
{'root.child1.child2.node': 'value'}

@author: olivier.massot, sept. 2017
'''

import re

from lxml import etree  # @UnresolvedImport
from path import Path


_regex = re.compile(r"(?:\{https?:\/\/.*\})(.*)")

CACHE = {}

class XmlError(Exception):
    pass

def _clean_tag(tag):
    """ retire un eventuel schema {http://*} en debut de chaine """
    try:
        return _regex.search(tag).group(1)
    except:
        return tag

def _parse(filepath):
    if not filepath in CACHE:
        try:
            tree = etree.parse(filepath)
            CACHE[filepath] = tree
        except etree.XMLSyntaxError:
            raise XmlError("Le fichier XML contient des erreurs! ('{}')".format(filepath.name))
    return CACHE[filepath]

def getroottag(filepath):
    return _clean_tag(_parse(filepath).getroot().tag)

def first_children_tags(filepath):
    return [_clean_tag(elt.tag) for elt in _parse(filepath).getroot()]

def parse(filepath, start=""):
    """ parse un fichier Xml et retourne un dictionnaire
    de la forme {"node1.node2.element.tag": value}
    """
    tree = _parse(filepath)

    root = tree.getroot()

    data = {}

    def _iter(elt, in_start_node=False, breadcrumb=tuple()):

        if in_start_node:
            breadcrumb += (_clean_tag(elt.tag),)

        if not start or _clean_tag(elt.tag) == start:
            in_start_node = True

        for child in elt:
            if not len(child) > 0 and in_start_node:
                key = ".".join(breadcrumb + (_clean_tag(child.tag),))
                data[key] = child.text
            else:
                _iter(child, in_start_node, breadcrumb)

    _iter(root)

    return data


if __name__ == "__main__":
    import sys
    try:
        filepath = Path(sys.argv[1])
    except IndexError:
        print("Erreur: vous devez passer le chemin d'un fichier en parametre\nExemple:\n  python xmlparser.py c:\dict.xml")
        sys.exit(1)

    print("\n".join(list(parse(filepath).keys())))