|
@@ -4,10 +4,15 @@ Python 3.7+
|
|
|
@author: olivier.massot, sept 2018
|
|
@author: olivier.massot, sept 2018
|
|
|
'''
|
|
'''
|
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
|
|
+import json
|
|
|
import logging
|
|
import logging
|
|
|
|
|
+import subprocess
|
|
|
|
|
+import tempfile
|
|
|
import zipfile
|
|
import zipfile
|
|
|
|
|
|
|
|
|
|
+from jinja2.environment import Template
|
|
|
from path import Path, TempDir
|
|
from path import Path, TempDir
|
|
|
|
|
+import pypyodbc
|
|
|
import shapefile
|
|
import shapefile
|
|
|
import yaml
|
|
import yaml
|
|
|
|
|
|
|
@@ -17,11 +22,106 @@ from core.constants import MAIN
|
|
|
|
|
|
|
|
logger = logging.getLogger("datachecker")
|
|
logger = logging.getLogger("datachecker")
|
|
|
logconf.start("datachecker", logging.INFO)
|
|
logconf.start("datachecker", logging.INFO)
|
|
|
|
|
+pypyodbc.lowercase = False
|
|
|
|
|
+logger.disabled = True
|
|
|
|
|
|
|
|
# TODO: Vérifier la projection (besoin de GDAL/OGR)
|
|
# TODO: Vérifier la projection (besoin de GDAL/OGR)
|
|
|
# TODO: fonctions de controle spéciales
|
|
# TODO: fonctions de controle spéciales
|
|
|
# TODO: Rendu HTML
|
|
# TODO: Rendu HTML
|
|
|
|
|
|
|
|
|
|
+class ReportField():
|
|
|
|
|
+ def __init__(self, name, value = "", error=""):
|
|
|
|
|
+ self.name = name
|
|
|
|
|
+ self.value = value
|
|
|
|
|
+ self._error = error
|
|
|
|
|
+ self._valid = True
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def error(self):
|
|
|
|
|
+ return self._error
|
|
|
|
|
+
|
|
|
|
|
+ @error.setter
|
|
|
|
|
+ def error(self, error):
|
|
|
|
|
+ self._valid = False
|
|
|
|
|
+ logger.error("%s - %s ('%s')", self.name, error, self.value)
|
|
|
|
|
+ self._error = error
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def valid(self):
|
|
|
|
|
+ return self._valid
|
|
|
|
|
+
|
|
|
|
|
+class ReportRecord():
|
|
|
|
|
+ def __init__(self, index):
|
|
|
|
|
+ self.index = index
|
|
|
|
|
+ self._valid = True
|
|
|
|
|
+ self._errors = []
|
|
|
|
|
+ self._fields = []
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def errors(self):
|
|
|
|
|
+ return self._errors
|
|
|
|
|
+
|
|
|
|
|
+ def add_error(self, error):
|
|
|
|
|
+ self._valid = False
|
|
|
|
|
+ logger.error("Ligne %s - %s", self.index, error)
|
|
|
|
|
+ self._errors.append(error)
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def valid(self):
|
|
|
|
|
+ return self._valid
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def fields(self):
|
|
|
|
|
+ return self._fields
|
|
|
|
|
+
|
|
|
|
|
+ def add_field(self, field):
|
|
|
|
|
+ if not field.valid:
|
|
|
|
|
+ self._valid=False
|
|
|
|
|
+ self._fields.append(field)
|
|
|
|
|
+
|
|
|
|
|
+class ReportFile():
|
|
|
|
|
+ def __init__(self, file):
|
|
|
|
|
+ self.file = file
|
|
|
|
|
+ self.headers = []
|
|
|
|
|
+ self._valid = True
|
|
|
|
|
+ self._errors = []
|
|
|
|
|
+ self._records = []
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def valid(self):
|
|
|
|
|
+ return self._valid
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def records(self):
|
|
|
|
|
+ return self._records
|
|
|
|
|
+
|
|
|
|
|
+ def add_record(self, row):
|
|
|
|
|
+ if not row.valid:
|
|
|
|
|
+ self._valid=False
|
|
|
|
|
+ self._records.append(row)
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def errors(self):
|
|
|
|
|
+ return self._errors
|
|
|
|
|
+
|
|
|
|
|
+ def add_error(self, error):
|
|
|
|
|
+ self._valid=False
|
|
|
|
|
+ logger.error("Fichier %s - %s", self.file, error)
|
|
|
|
|
+ self._errors.append(error)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class Report():
|
|
|
|
|
+ def __init__(self, title, report_files=[]):
|
|
|
|
|
+ self.title = title
|
|
|
|
|
+ self.report_files = report_files
|
|
|
|
|
+
|
|
|
|
|
+ @property
|
|
|
|
|
+ def valid(self):
|
|
|
|
|
+ return all([r.valid for r in self.report_files])
|
|
|
|
|
+
|
|
|
|
|
+ def to_json(self):
|
|
|
|
|
+ return json.dumps(self)
|
|
|
|
|
+
|
|
|
def check(subject, checker):
|
|
def check(subject, checker):
|
|
|
""" prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
|
|
""" prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
|
|
|
subject, checker = Path(subject), Path(checker)
|
|
subject, checker = Path(subject), Path(checker)
|
|
@@ -31,36 +131,45 @@ def check(subject, checker):
|
|
|
zip_ref = zipfile.ZipFile(subject, 'r')
|
|
zip_ref = zipfile.ZipFile(subject, 'r')
|
|
|
zip_ref.extractall(dirname)
|
|
zip_ref.extractall(dirname)
|
|
|
zip_ref.close()
|
|
zip_ref.close()
|
|
|
- print()
|
|
|
|
|
if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
|
|
if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
|
|
|
dirname /= subject.stem
|
|
dirname /= subject.stem
|
|
|
- check_folder(dirname, checker)
|
|
|
|
|
|
|
+ return check_folder(dirname, checker)
|
|
|
|
|
|
|
|
elif subject.isdir():
|
|
elif subject.isdir():
|
|
|
- check_folder(subject, checker)
|
|
|
|
|
|
|
+ return check_folder(subject, checker)
|
|
|
else:
|
|
else:
|
|
|
raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
|
|
raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
|
|
|
|
|
|
|
|
def check_folder(folder, checker):
|
|
def check_folder(folder, checker):
|
|
|
- logging.info("***** Traitement de '%s' *****", folder.name)
|
|
|
|
|
|
|
+ logger.info("***** Traitement de '%s' *****", folder.name)
|
|
|
|
|
+
|
|
|
|
|
+ logger.info("> Controlleur: '%s'", checker.name)
|
|
|
|
|
|
|
|
- logging.info("> Controlleur: '%s'", checker.name)
|
|
|
|
|
|
|
+ report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
|
|
|
|
|
|
|
|
with open(checker, "r") as cf:
|
|
with open(checker, "r") as cf:
|
|
|
config = yaml.load(cf)
|
|
config = yaml.load(cf)
|
|
|
|
|
|
|
|
|
|
+ databases = {}
|
|
|
|
|
+ for dbname, dsn in config.get("databases", {}).items():
|
|
|
|
|
+ cnn = pypyodbc.connect(dsn)
|
|
|
|
|
+ databases[dbname] = cnn
|
|
|
|
|
+
|
|
|
for filename, model in config["files"].items():
|
|
for filename, model in config["files"].items():
|
|
|
path_ = folder / filename
|
|
path_ = folder / filename
|
|
|
- logging.info("* Traitement de %s", path_.name)
|
|
|
|
|
|
|
+ logger.info("* Traitement de %s", path_.name)
|
|
|
|
|
+
|
|
|
|
|
+ report_file = ReportFile(path_.name)
|
|
|
|
|
+ report.report_files.append(report_file)
|
|
|
|
|
|
|
|
if not path_.isfile():
|
|
if not path_.isfile():
|
|
|
- logger.error("Fichier introuvable")
|
|
|
|
|
|
|
+ report_file.add_error("Fichier introuvable")
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
sf = shapefile.Reader(path_)
|
|
sf = shapefile.Reader(path_)
|
|
|
except shapefile.ShapefileException:
|
|
except shapefile.ShapefileException:
|
|
|
- logger.error("Fichier SHAPE illisible")
|
|
|
|
|
|
|
+ report_file.add_error("Fichier illisible")
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
if "srid" in config:
|
|
if "srid" in config:
|
|
@@ -75,86 +184,122 @@ def check_folder(folder, checker):
|
|
|
if "shape_type" in model:
|
|
if "shape_type" in model:
|
|
|
shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
|
|
shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
|
|
|
if sf.shapeType != model["shape_type"]:
|
|
if sf.shapeType != model["shape_type"]:
|
|
|
- logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
|
|
|
|
|
|
|
+ report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
|
|
|
del sf
|
|
del sf
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
records = sf.shapeRecords()
|
|
records = sf.shapeRecords()
|
|
|
- if not records:
|
|
|
|
|
- if not model["can_be_empty"]:
|
|
|
|
|
- logger.error("Le fichier shapefile ne contient aucune donnees")
|
|
|
|
|
- del sf, records
|
|
|
|
|
- continue
|
|
|
|
|
- else:
|
|
|
|
|
- logger.warning("Le fichier shapefile ne contient aucune donnees")
|
|
|
|
|
|
|
+ if not records and not model["can_be_empty"]:
|
|
|
|
|
+ report_file.add_error("Le fichier shapefile ne contient aucune donnees")
|
|
|
|
|
+ del sf, records
|
|
|
|
|
+ continue
|
|
|
|
|
|
|
|
if not "fields" in model:
|
|
if not "fields" in model:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
|
|
fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
|
|
|
|
|
|
|
|
- # controle d'éventuels champs inconnus
|
|
|
|
|
- for f in fields:
|
|
|
|
|
- if f not in model["fields"]:
|
|
|
|
|
- logger.warning("Champs inconnu: %s", f)
|
|
|
|
|
|
|
+ report_file.headers = list(model["fields"].keys())
|
|
|
|
|
|
|
|
# parcours et controle des enregistrements
|
|
# parcours et controle des enregistrements
|
|
|
for i, record in enumerate(records):
|
|
for i, record in enumerate(records):
|
|
|
|
|
|
|
|
- logging.info("\n> Enregistrement n°%s\n", i)
|
|
|
|
|
|
|
+ logger.info("\n> Enregistrement n°%s\n", i)
|
|
|
|
|
+ report_record = ReportRecord(i)
|
|
|
|
|
+
|
|
|
record_data = {field: record.record[i] for i, field in enumerate(fields)}
|
|
record_data = {field: record.record[i] for i, field in enumerate(fields)}
|
|
|
|
|
|
|
|
- x1, y1, x2, y2 = sf.shapes()[i].bbox
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ x1, y1, x2, y2 = sf.shapes()[i].bbox
|
|
|
|
|
+ except AttributeError:
|
|
|
|
|
+ x1, y1 = sf.shapes()[i].points[0]
|
|
|
|
|
+ x2, y2 = x1, y1
|
|
|
|
|
+
|
|
|
if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
|
|
if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
|
|
|
not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
|
|
not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
|
|
|
- logger.error("L'élément est situé hors de la zone autorisée")
|
|
|
|
|
-
|
|
|
|
|
|
|
+ report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
|
|
|
|
|
+
|
|
|
for fieldname, fieldmodel in model["fields"].items():
|
|
for fieldname, fieldmodel in model["fields"].items():
|
|
|
|
|
|
|
|
|
|
+ report_field = ReportField(fieldname)
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
val = record_data[fieldname]
|
|
val = record_data[fieldname]
|
|
|
except KeyError:
|
|
except KeyError:
|
|
|
if fieldmodel.get("required", True):
|
|
if fieldmodel.get("required", True):
|
|
|
- logger.error("%s - Champs manquant", fieldname)
|
|
|
|
|
|
|
+ report_field.error = "Champs manquant"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
|
|
+ report_field.value = val
|
|
|
|
|
+
|
|
|
type_ = fieldmodel.get("type", "str")
|
|
type_ = fieldmodel.get("type", "str")
|
|
|
if type_ == "int":
|
|
if type_ == "int":
|
|
|
try:
|
|
try:
|
|
|
_ = int(val)
|
|
_ = int(val)
|
|
|
except (TypeError, ValueError):
|
|
except (TypeError, ValueError):
|
|
|
- logger.error("%s - Valeur invalide, un entier est attendu ('%s')", fieldname, val)
|
|
|
|
|
|
|
+ report_field.error = "Valeur Invalide, un nombre entier est attendu"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
elif type_ == "float":
|
|
elif type_ == "float":
|
|
|
try:
|
|
try:
|
|
|
_ = float(val)
|
|
_ = float(val)
|
|
|
except (TypeError, ValueError):
|
|
except (TypeError, ValueError):
|
|
|
- logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
|
|
|
|
|
|
|
+ report_field.error = "Valeur Invalide, un nombre décimal est attendu"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
elif type_ == "datetime":
|
|
elif type_ == "datetime":
|
|
|
try:
|
|
try:
|
|
|
_ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
|
|
_ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
|
|
|
except ValueError:
|
|
except ValueError:
|
|
|
- logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
|
|
|
|
|
|
|
+ report_field.error = "Valeur Invalide, une date est attendue"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
else:
|
|
else:
|
|
|
if not fieldmodel.get("allow_empty", False) and not val:
|
|
if not fieldmodel.get("allow_empty", False) and not val:
|
|
|
- logger.error("%s - Champs vide", fieldname)
|
|
|
|
|
|
|
+ report_field.error = "Ce champs ne peut pas être vide"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
if type_ == "str" and "max_len" in fieldmodel:
|
|
if type_ == "str" and "max_len" in fieldmodel:
|
|
|
if len(str(val)) > fieldmodel["max_len"]:
|
|
if len(str(val)) > fieldmodel["max_len"]:
|
|
|
- logger.error("%s - Trop long, la longueur max. est de %s ('%s')", fieldname, fieldmodel["max_len"], val)
|
|
|
|
|
-
|
|
|
|
|
|
|
+ report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
|
|
+ continue
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
if not val in fieldmodel["in_list"]:
|
|
if not val in fieldmodel["in_list"]:
|
|
|
- logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
|
|
|
|
|
|
|
+ report_field.error = "Valeur invalide, pas dans la liste"
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
continue
|
|
continue
|
|
|
except KeyError:
|
|
except KeyError:
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
|
|
+# if "in_table" in fieldmodel:
|
|
|
|
|
+#
|
|
|
|
|
+# db = databases[fieldmodel["in_table"]["db"]]
|
|
|
|
|
+# cursor = db.cursor()
|
|
|
|
|
+# cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
|
|
|
|
|
+# rows = [val[0] for val in cursor.fetchall()]
|
|
|
|
|
+#
|
|
|
|
|
+# if not val in rows:
|
|
|
|
|
+# report_field.error = "Valeur invalide, pas dans la liste"
|
|
|
|
|
+# report_record.add_field(report_field)
|
|
|
|
|
+# continue
|
|
|
|
|
+
|
|
|
|
|
+ report_record.add_field(report_field)
|
|
|
|
|
+ report_file.add_record(report_record)
|
|
|
|
|
+
|
|
|
del sf, records
|
|
del sf, records
|
|
|
|
|
+
|
|
|
|
|
+ return report
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def render_to_html(report):
|
|
|
|
|
+ with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f:
|
|
|
|
|
+ template = Template(f.read())
|
|
|
|
|
+
|
|
|
|
|
+ return template.render(report=report)
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
@@ -162,6 +307,15 @@ if __name__ == "__main__":
|
|
|
subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
|
|
subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
|
|
|
checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
|
|
checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
|
|
|
|
|
|
|
|
- check(subject, checker)
|
|
|
|
|
|
|
+ report = check(subject, checker)
|
|
|
|
|
+
|
|
|
|
|
+ result = render_to_html(report)
|
|
|
|
|
+
|
|
|
|
|
+ result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
|
|
|
|
|
+
|
|
|
|
|
+ with open(result_filename, "w+") as f:
|
|
|
|
|
+ f.write(result)
|
|
|
|
|
+
|
|
|
|
|
+ subprocess.run("start {}".format(result_filename), shell=True)
|
|
|
|
|
|
|
|
logger.info("-- Fin --")
|
|
logger.info("-- Fin --")
|