olivier.massot il y a 7 ans
Parent
commit
0f42e9e5bc
3 fichiers modifiés avec 203 ajouts et 34 suppressions
  1. 1 0
      .gitignore
  2. 187 33
      main.py
  3. 15 1
      resources/netgeo_v2-2_doe.yaml

+ 1 - 0
.gitignore

@@ -7,3 +7,4 @@ htmlcov/
 *.log
 *.log.1
 /work/*
+tmp*

+ 187 - 33
main.py

@@ -4,10 +4,15 @@ Python 3.7+
 @author: olivier.massot, sept 2018
 '''
 from datetime import datetime
+import json
 import logging
+import subprocess
+import tempfile
 import zipfile
 
+from jinja2.environment import Template
 from path import Path, TempDir
+import pypyodbc
 import shapefile
 import yaml
 
@@ -17,11 +22,106 @@ from core.constants import MAIN
 
 logger = logging.getLogger("datachecker")
 logconf.start("datachecker", logging.INFO)
+pypyodbc.lowercase = False
+logger.disabled = True
 
 # TODO: Vérifier la projection (besoin de GDAL/OGR)
 # TODO: fonctions de controle spéciales
 # TODO: Rendu HTML 
 
+class ReportField():
+    def __init__(self, name, value = "", error=""):
+        self.name = name
+        self.value = value
+        self._error = error
+        self._valid = True
+
+    @property
+    def error(self):
+        return self._error
+    
+    @error.setter
+    def error(self, error):
+        self._valid = False
+        logger.error("%s - %s ('%s')", self.name, error, self.value)
+        self._error = error
+
+    @property
+    def valid(self):
+        return self._valid
+
+class ReportRecord():
+    def __init__(self, index):
+        self.index = index
+        self._valid = True
+        self._errors = []
+        self._fields = []
+
+    @property
+    def errors(self):
+        return self._errors
+    
+    def add_error(self, error):
+        self._valid = False
+        logger.error("Ligne %s - %s", self.index, error)
+        self._errors.append(error)
+
+    @property
+    def valid(self):
+        return self._valid
+
+    @property
+    def fields(self):
+        return self._fields
+
+    def add_field(self, field):
+        if not field.valid:
+            self._valid=False
+        self._fields.append(field)
+
+class ReportFile():
+    def __init__(self, file):
+        self.file = file
+        self.headers = []
+        self._valid = True
+        self._errors = []
+        self._records = []
+
+    @property
+    def valid(self):
+        return self._valid   
+
+    @property
+    def records(self):
+        return self._records
+
+    def add_record(self, row):
+        if not row.valid:
+            self._valid=False
+        self._records.append(row)
+
+    @property
+    def errors(self):
+        return self._errors
+    
+    def add_error(self, error):
+        self._valid=False
+        logger.error("Fichier %s - %s", self.file, error)
+        self._errors.append(error)
+
+
+class Report():
+    def __init__(self, title, report_files=[]):
+        self.title = title
+        self.report_files = report_files
+
+    @property
+    def valid(self):
+        return all([r.valid for r in self.report_files])
+
+    def to_json(self):
+        return json.dumps(self)
+
 def check(subject, checker):    
     """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
     subject, checker = Path(subject), Path(checker)
@@ -31,36 +131,45 @@ def check(subject, checker):
             zip_ref = zipfile.ZipFile(subject, 'r')
             zip_ref.extractall(dirname)
             zip_ref.close()
-            print()
             if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
                 dirname /= subject.stem
-            check_folder(dirname, checker)
+            return check_folder(dirname, checker)
         
     elif subject.isdir():
-        check_folder(subject, checker)
+        return check_folder(subject, checker)
     else:
         raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
 
 def check_folder(folder, checker):
-    logging.info("*****   Traitement de '%s'   *****", folder.name)
+    logger.info("*****   Traitement de '%s'   *****", folder.name)
+    
+    logger.info("> Controlleur: '%s'", checker.name)
     
-    logging.info("> Controlleur: '%s'", checker.name)
+    report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
     
     with open(checker, "r") as cf:
         config = yaml.load(cf)
     
+    databases = {}
+    for dbname, dsn in config.get("databases", {}).items():
+        cnn = pypyodbc.connect(dsn)
+        databases[dbname] = cnn
+        
     for filename, model in config["files"].items():
         path_ = folder / filename
-        logging.info("* Traitement de %s", path_.name)
+        logger.info("* Traitement de %s", path_.name)
+        
+        report_file = ReportFile(path_.name)
+        report.report_files.append(report_file)
         
         if not path_.isfile():
-            logger.error("Fichier introuvable")
+            report_file.add_error("Fichier introuvable")
             continue
         
         try:
             sf = shapefile.Reader(path_)
         except shapefile.ShapefileException:
-            logger.error("Fichier SHAPE illisible")
+            report_file.add_error("Fichier illisible")
             continue
         
         if "srid" in config:
@@ -75,86 +184,122 @@ def check_folder(folder, checker):
         if "shape_type" in model:
             shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
             if sf.shapeType != model["shape_type"]:
-                logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
+                report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
                 del sf
                 continue
 
         records = sf.shapeRecords()
-        if not records:
-            if not model["can_be_empty"]:
-                logger.error("Le fichier shapefile ne contient aucune donnees")
-                del sf, records
-                continue
-            else:
-                logger.warning("Le fichier shapefile ne contient aucune donnees")
+        if not records and not model["can_be_empty"]:
+            report_file.add_error("Le fichier shapefile ne contient aucune donnees")
+            del sf, records
+            continue
         
         if not "fields" in model:
             continue
         
         fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
         
-        # controle d'éventuels champs inconnus
-        for f in fields:
-            if f not in model["fields"]:
-                logger.warning("Champs inconnu: %s", f)
+        report_file.headers = list(model["fields"].keys())
         
         # parcours et controle des enregistrements
         for i, record in enumerate(records):
             
-            logging.info("\n> Enregistrement n°%s\n", i)
+            logger.info("\n> Enregistrement n°%s\n", i)
+            report_record = ReportRecord(i)
+            
             record_data = {field: record.record[i] for i, field in enumerate(fields)}
             
-            x1, y1, x2, y2 = sf.shapes()[i].bbox
+            try:
+                x1, y1, x2, y2 = sf.shapes()[i].bbox
+            except AttributeError:
+                x1, y1 = sf.shapes()[i].points[0]
+                x2, y2 = x1, y1
+            
             if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
                not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
-                logger.error("L'élément est situé hors de la zone autorisée")
-            
+                report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
+                
             for fieldname, fieldmodel in model["fields"].items():
                 
+                report_field = ReportField(fieldname)
+                
                 try:
                     val = record_data[fieldname]
                 except KeyError:
                     if fieldmodel.get("required", True):
-                        logger.error("%s - Champs manquant", fieldname)
+                        report_field.error = "Champs manquant"
+                    report_record.add_field(report_field)
                     continue
                 
+                report_field.value = val
+                
                 type_ = fieldmodel.get("type", "str")
                 if type_ == "int":
                     try:
                         _ = int(val)
                     except (TypeError, ValueError):
-                        logger.error("%s - Valeur invalide, un entier est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, un nombre entier est attendu"
+                        report_record.add_field(report_field)
                         continue
                 elif type_ == "float":
                     try:
                         _ = float(val)
                     except (TypeError, ValueError):
-                        logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, un nombre décimal est attendu"
+                        report_record.add_field(report_field)
                         continue
                 elif type_ == "datetime":
                     try:
                         _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
                     except ValueError:
-                        logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, une date est attendue"
+                        report_record.add_field(report_field)
                         continue
                 else: 
                     if not fieldmodel.get("allow_empty", False) and not val:
-                        logger.error("%s - Champs vide", fieldname)
+                        report_field.error = "Ce champs ne peut pas être vide"
+                        report_record.add_field(report_field)
                         continue
                 
                 if type_ == "str" and "max_len" in fieldmodel:
                     if len(str(val)) > fieldmodel["max_len"]:
-                        logger.error("%s - Trop long, la longueur max. est de %s ('%s')", fieldname, fieldmodel["max_len"], val)
-                    
+                        report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
+                        report_record.add_field(report_field)
+                        continue
                 
                 try:
                     if not val in fieldmodel["in_list"]:
-                        logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
+                        report_field.error = "Valeur invalide, pas dans la liste"
+                        report_record.add_field(report_field)
                         continue
                 except KeyError:
                     pass
         
+#                 if "in_table" in fieldmodel:
+#                     
+#                     db = databases[fieldmodel["in_table"]["db"]]
+#                     cursor = db.cursor()
+#                     cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
+#                     rows = [val[0] for val in cursor.fetchall()]
+#                     
+#                     if not val in rows:
+#                         report_field.error = "Valeur invalide, pas dans la liste"
+#                         report_record.add_field(report_field)
+#                         continue
+                
+                report_record.add_field(report_field)
+            report_file.add_record(report_record)
+            
         del sf, records
+        
+    return report
+        
+
+def render_to_html(report):
+    with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f:
+        template = Template(f.read())
+
+    return template.render(report=report)
 
 
 if __name__ == "__main__":
@@ -162,6 +307,15 @@ if __name__ == "__main__":
     subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
     checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
     
-    check(subject, checker)
+    report = check(subject, checker)
+    
+    result = render_to_html(report)
+    
+    result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
+    
+    with open(result_filename, "w+") as f:
+        f.write(result)
+    
+    subprocess.run("start {}".format(result_filename), shell=True)
     
     logger.info("-- Fin --")

+ 15 - 1
resources/netgeo_v2-2_doe.yaml

@@ -4,6 +4,9 @@ ymin: 8147750.0839389534667134
 ymax: 8294000.0620922148227692
 srid: 3949
 
+databases:
+  sig_postgres: "DRIVER={PostgreSQL Unicode};DATABASE=sig50;SERVER=clusterpg.linux.infra.cloud.local;PORT=5432;UID=sigr;PWD=T38Msh2R4q"
+  
 files:
   "artere_geo.shp":
     shape_type: 3
@@ -17,6 +20,10 @@ files:
       AR_ID_INSE:
         type: str
         max_len: 5
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       AR_LONG:
         type: float
       AR_ETAT:
@@ -263,6 +270,10 @@ files:
       NO_ID_INSE:
         type: str
         max_len: 5
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       NO_NOM:
         type: str
         max_len: 20
@@ -367,7 +378,10 @@ files:
       TR_ID_INSE:
         type: str
         max_len: 5
-        in_list: []
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       TR_VOIE:
         type: str
         max_len: 200