''' Python 3.7+ @author: olivier.massot, sept 2018 ''' from datetime import datetime import json import logging import subprocess import tempfile import zipfile from jinja2.environment import Template from path import Path, TempDir import pypyodbc import shapefile import yaml from core import logconf from core.constants import MAIN logger = logging.getLogger("datachecker") logconf.start("datachecker", logging.INFO) pypyodbc.lowercase = False logger.disabled = True # TODO: Vérifier la projection (besoin de GDAL/OGR) # TODO: fonctions de controle spéciales # TODO: Rendu HTML class ReportField(): def __init__(self, name, value = "", error=""): self.name = name self.value = value self._error = error self._valid = True @property def error(self): return self._error @error.setter def error(self, error): self._valid = False logger.error("%s - %s ('%s')", self.name, error, self.value) self._error = error @property def valid(self): return self._valid class ReportRecord(): def __init__(self, index): self.index = index self._valid = True self._errors = [] self._fields = [] @property def errors(self): return self._errors def add_error(self, error): self._valid = False logger.error("Ligne %s - %s", self.index, error) self._errors.append(error) @property def valid(self): return self._valid @property def fields(self): return self._fields def add_field(self, field): if not field.valid: self._valid=False self._fields.append(field) class ReportFile(): def __init__(self, file): self.file = file self.headers = [] self._valid = True self._errors = [] self._records = [] @property def valid(self): return self._valid @property def records(self): return self._records def add_record(self, row): if not row.valid: self._valid=False self._records.append(row) @property def errors(self): return self._errors def add_error(self, error): self._valid=False logger.error("Fichier %s - %s", self.file, error) self._errors.append(error) class Report(): def __init__(self, title, report_files=[]): self.title = title self.report_files = report_files @property def valid(self): return all([r.valid for r in self.report_files]) def to_json(self): return json.dumps(self) def check(subject, checker): """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """ subject, checker = Path(subject), Path(checker) if subject.isfile(): with TempDir() as dirname: zip_ref = zipfile.ZipFile(subject, 'r') zip_ref.extractall(dirname) zip_ref.close() if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers dirname /= subject.stem return check_folder(dirname, checker) elif subject.isdir(): return check_folder(subject, checker) else: raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}") def check_folder(folder, checker): logger.info("***** Traitement de '%s' *****", folder.name) logger.info("> Controlleur: '%s'", checker.name) report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem)) with open(checker, "r") as cf: config = yaml.load(cf) databases = {} for dbname, dsn in config.get("databases", {}).items(): cnn = pypyodbc.connect(dsn) databases[dbname] = cnn for filename, model in config["files"].items(): path_ = folder / filename logger.info("* Traitement de %s", path_.name) report_file = ReportFile(path_.name) report.report_files.append(report_file) if not path_.isfile(): report_file.add_error("Fichier introuvable") continue try: sf = shapefile.Reader(path_) except shapefile.ShapefileException: report_file.add_error("Fichier illisible") continue if "srid" in config: pass xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)), int(config.get("xmax", float("inf"))), int(config.get("ymin", 0)), int(config.get("ymax", float("inf"))) ) if "shape_type" in model: shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"} if sf.shapeType != model["shape_type"]: report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]])) del sf continue records = sf.shapeRecords() if not records and not model["can_be_empty"]: report_file.add_error("Le fichier shapefile ne contient aucune donnees") del sf, records continue if not "fields" in model: continue fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag'] report_file.headers = list(model["fields"].keys()) # parcours et controle des enregistrements for i, record in enumerate(records): logger.info("\n> Enregistrement n°%s\n", i) report_record = ReportRecord(i) record_data = {field: record.record[i] for i, field in enumerate(fields)} try: x1, y1, x2, y2 = sf.shapes()[i].bbox except AttributeError: x1, y1 = sf.shapes()[i].points[0] x2, y2 = x1, y1 if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \ not ymin <= y1 <= ymax or not ymin <= y2 <= ymax: report_record.add_error("L'élément est situé hors de la zone géographique autorisée") for fieldname, fieldmodel in model["fields"].items(): report_field = ReportField(fieldname) try: val = record_data[fieldname] except KeyError: if fieldmodel.get("required", True): report_field.error = "Champs manquant" report_record.add_field(report_field) continue report_field.value = val type_ = fieldmodel.get("type", "str") if type_ == "int": try: _ = int(val) except (TypeError, ValueError): report_field.error = "Valeur Invalide, un nombre entier est attendu" report_record.add_field(report_field) continue elif type_ == "float": try: _ = float(val) except (TypeError, ValueError): report_field.error = "Valeur Invalide, un nombre décimal est attendu" report_record.add_field(report_field) continue elif type_ == "datetime": try: _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y")) except ValueError: report_field.error = "Valeur Invalide, une date est attendue" report_record.add_field(report_field) continue else: if not fieldmodel.get("allow_empty", False) and not val: report_field.error = "Ce champs ne peut pas être vide" report_record.add_field(report_field) continue if type_ == "str" and "max_len" in fieldmodel: if len(str(val)) > fieldmodel["max_len"]: report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"]) report_record.add_field(report_field) continue try: if not val in fieldmodel["in_list"]: report_field.error = "Valeur invalide, pas dans la liste" report_record.add_field(report_field) continue except KeyError: pass # if "in_table" in fieldmodel: # # db = databases[fieldmodel["in_table"]["db"]] # cursor = db.cursor() # cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"])) # rows = [val[0] for val in cursor.fetchall()] # # if not val in rows: # report_field.error = "Valeur invalide, pas dans la liste" # report_record.add_field(report_field) # continue report_record.add_field(report_field) report_file.add_record(report_record) del sf, records return report def render_to_html(report): with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f: template = Template(f.read()) return template.render(report=report) if __name__ == "__main__": subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip" checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml" report = check(subject, checker) result = render_to_html(report) result_filename = tempfile.mktemp(".html", dir=MAIN / "website") with open(result_filename, "w+") as f: f.write(result) subprocess.run("start {}".format(result_filename), shell=True) logger.info("-- Fin --")