| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321 |
- '''
- Python 3.7+
- @author: olivier.massot, sept 2018
- '''
- from datetime import datetime
- import json
- import logging
- import subprocess
- import tempfile
- import zipfile
- from jinja2.environment import Template
- from path import Path, TempDir
- import pypyodbc
- import shapefile
- import yaml
- from core import logconf
- from core.constants import MAIN
- logger = logging.getLogger("datachecker")
- logconf.start("datachecker", logging.INFO)
- pypyodbc.lowercase = False
- logger.disabled = True
- # TODO: Vérifier la projection (besoin de GDAL/OGR)
- # TODO: fonctions de controle spéciales
- # TODO: Rendu HTML
- class ReportField():
- def __init__(self, name, value = "", error=""):
- self.name = name
- self.value = value
- self._error = error
- self._valid = True
- @property
- def error(self):
- return self._error
-
- @error.setter
- def error(self, error):
- self._valid = False
- logger.error("%s - %s ('%s')", self.name, error, self.value)
- self._error = error
- @property
- def valid(self):
- return self._valid
- class ReportRecord():
- def __init__(self, index):
- self.index = index
- self._valid = True
- self._errors = []
- self._fields = []
- @property
- def errors(self):
- return self._errors
-
- def add_error(self, error):
- self._valid = False
- logger.error("Ligne %s - %s", self.index, error)
- self._errors.append(error)
- @property
- def valid(self):
- return self._valid
- @property
- def fields(self):
- return self._fields
- def add_field(self, field):
- if not field.valid:
- self._valid=False
- self._fields.append(field)
- class ReportFile():
- def __init__(self, file):
- self.file = file
- self.headers = []
- self._valid = True
- self._errors = []
- self._records = []
- @property
- def valid(self):
- return self._valid
- @property
- def records(self):
- return self._records
- def add_record(self, row):
- if not row.valid:
- self._valid=False
- self._records.append(row)
- @property
- def errors(self):
- return self._errors
-
- def add_error(self, error):
- self._valid=False
- logger.error("Fichier %s - %s", self.file, error)
- self._errors.append(error)
- class Report():
- def __init__(self, title, report_files=[]):
- self.title = title
- self.report_files = report_files
- @property
- def valid(self):
- return all([r.valid for r in self.report_files])
- def to_json(self):
- return json.dumps(self)
- def check(subject, checker):
- """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
- subject, checker = Path(subject), Path(checker)
-
- if subject.isfile():
- with TempDir() as dirname:
- zip_ref = zipfile.ZipFile(subject, 'r')
- zip_ref.extractall(dirname)
- zip_ref.close()
- if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
- dirname /= subject.stem
- return check_folder(dirname, checker)
-
- elif subject.isdir():
- return check_folder(subject, checker)
- else:
- raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
- def check_folder(folder, checker):
- logger.info("***** Traitement de '%s' *****", folder.name)
-
- logger.info("> Controlleur: '%s'", checker.name)
-
- report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
-
- with open(checker, "r") as cf:
- config = yaml.load(cf)
-
- databases = {}
- for dbname, dsn in config.get("databases", {}).items():
- cnn = pypyodbc.connect(dsn)
- databases[dbname] = cnn
-
- for filename, model in config["files"].items():
- path_ = folder / filename
- logger.info("* Traitement de %s", path_.name)
-
- report_file = ReportFile(path_.name)
- report.report_files.append(report_file)
-
- if not path_.isfile():
- report_file.add_error("Fichier introuvable")
- continue
-
- try:
- sf = shapefile.Reader(path_)
- except shapefile.ShapefileException:
- report_file.add_error("Fichier illisible")
- continue
-
- if "srid" in config:
- pass
- xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
- int(config.get("xmax", float("inf"))),
- int(config.get("ymin", 0)),
- int(config.get("ymax", float("inf")))
- )
-
- if "shape_type" in model:
- shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
- if sf.shapeType != model["shape_type"]:
- report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
- del sf
- continue
- records = sf.shapeRecords()
- if not records and not model["can_be_empty"]:
- report_file.add_error("Le fichier shapefile ne contient aucune donnees")
- del sf, records
- continue
-
- if not "fields" in model:
- continue
-
- fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
-
- report_file.headers = list(model["fields"].keys())
-
- # parcours et controle des enregistrements
- for i, record in enumerate(records):
-
- logger.info("\n> Enregistrement n°%s\n", i)
- report_record = ReportRecord(i)
-
- record_data = {field: record.record[i] for i, field in enumerate(fields)}
-
- try:
- x1, y1, x2, y2 = sf.shapes()[i].bbox
- except AttributeError:
- x1, y1 = sf.shapes()[i].points[0]
- x2, y2 = x1, y1
-
- if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
- not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
- report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
-
- for fieldname, fieldmodel in model["fields"].items():
-
- report_field = ReportField(fieldname)
-
- try:
- val = record_data[fieldname]
- except KeyError:
- if fieldmodel.get("required", True):
- report_field.error = "Champs manquant"
- report_record.add_field(report_field)
- continue
-
- report_field.value = val
-
- type_ = fieldmodel.get("type", "str")
- if type_ == "int":
- try:
- _ = int(val)
- except (TypeError, ValueError):
- report_field.error = "Valeur Invalide, un nombre entier est attendu"
- report_record.add_field(report_field)
- continue
- elif type_ == "float":
- try:
- _ = float(val)
- except (TypeError, ValueError):
- report_field.error = "Valeur Invalide, un nombre décimal est attendu"
- report_record.add_field(report_field)
- continue
- elif type_ == "datetime":
- try:
- _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
- except ValueError:
- report_field.error = "Valeur Invalide, une date est attendue"
- report_record.add_field(report_field)
- continue
- else:
- if not fieldmodel.get("allow_empty", False) and not val:
- report_field.error = "Ce champs ne peut pas être vide"
- report_record.add_field(report_field)
- continue
-
- if type_ == "str" and "max_len" in fieldmodel:
- if len(str(val)) > fieldmodel["max_len"]:
- report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
- report_record.add_field(report_field)
- continue
-
- try:
- if not val in fieldmodel["in_list"]:
- report_field.error = "Valeur invalide, pas dans la liste"
- report_record.add_field(report_field)
- continue
- except KeyError:
- pass
-
- # if "in_table" in fieldmodel:
- #
- # db = databases[fieldmodel["in_table"]["db"]]
- # cursor = db.cursor()
- # cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
- # rows = [val[0] for val in cursor.fetchall()]
- #
- # if not val in rows:
- # report_field.error = "Valeur invalide, pas dans la liste"
- # report_record.add_field(report_field)
- # continue
-
- report_record.add_field(report_field)
- report_file.add_record(report_record)
-
- del sf, records
-
- return report
-
- def render_to_html(report):
- with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f:
- template = Template(f.read())
- return template.render(report=report)
- if __name__ == "__main__":
-
- subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
- checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
-
- report = check(subject, checker)
-
- result = render_to_html(report)
-
- result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
-
- with open(result_filename, "w+") as f:
- f.write(result)
-
- subprocess.run("start {}".format(result_filename), shell=True)
-
- logger.info("-- Fin --")
|