Browse Source

Corrections

olivier.massot 7 years ago
parent
commit
0f42e9e5bc
3 changed files with 203 additions and 34 deletions
  1. 1 0
      .gitignore
  2. 187 33
      main.py
  3. 15 1
      resources/netgeo_v2-2_doe.yaml

+ 1 - 0
.gitignore

@@ -7,3 +7,4 @@ htmlcov/
 *.log
 *.log
 *.log.1
 *.log.1
 /work/*
 /work/*
+tmp*

+ 187 - 33
main.py

@@ -4,10 +4,15 @@ Python 3.7+
 @author: olivier.massot, sept 2018
 @author: olivier.massot, sept 2018
 '''
 '''
 from datetime import datetime
 from datetime import datetime
+import json
 import logging
 import logging
+import subprocess
+import tempfile
 import zipfile
 import zipfile
 
 
+from jinja2.environment import Template
 from path import Path, TempDir
 from path import Path, TempDir
+import pypyodbc
 import shapefile
 import shapefile
 import yaml
 import yaml
 
 
@@ -17,11 +22,106 @@ from core.constants import MAIN
 
 
 logger = logging.getLogger("datachecker")
 logger = logging.getLogger("datachecker")
 logconf.start("datachecker", logging.INFO)
 logconf.start("datachecker", logging.INFO)
+pypyodbc.lowercase = False
+logger.disabled = True
 
 
 # TODO: Vérifier la projection (besoin de GDAL/OGR)
 # TODO: Vérifier la projection (besoin de GDAL/OGR)
 # TODO: fonctions de controle spéciales
 # TODO: fonctions de controle spéciales
 # TODO: Rendu HTML 
 # TODO: Rendu HTML 
 
 
+class ReportField():
+    def __init__(self, name, value = "", error=""):
+        self.name = name
+        self.value = value
+        self._error = error
+        self._valid = True
+
+    @property
+    def error(self):
+        return self._error
+    
+    @error.setter
+    def error(self, error):
+        self._valid = False
+        logger.error("%s - %s ('%s')", self.name, error, self.value)
+        self._error = error
+
+    @property
+    def valid(self):
+        return self._valid
+
+class ReportRecord():
+    def __init__(self, index):
+        self.index = index
+        self._valid = True
+        self._errors = []
+        self._fields = []
+
+    @property
+    def errors(self):
+        return self._errors
+    
+    def add_error(self, error):
+        self._valid = False
+        logger.error("Ligne %s - %s", self.index, error)
+        self._errors.append(error)
+
+    @property
+    def valid(self):
+        return self._valid
+
+    @property
+    def fields(self):
+        return self._fields
+
+    def add_field(self, field):
+        if not field.valid:
+            self._valid=False
+        self._fields.append(field)
+
+class ReportFile():
+    def __init__(self, file):
+        self.file = file
+        self.headers = []
+        self._valid = True
+        self._errors = []
+        self._records = []
+
+    @property
+    def valid(self):
+        return self._valid   
+
+    @property
+    def records(self):
+        return self._records
+
+    def add_record(self, row):
+        if not row.valid:
+            self._valid=False
+        self._records.append(row)
+
+    @property
+    def errors(self):
+        return self._errors
+    
+    def add_error(self, error):
+        self._valid=False
+        logger.error("Fichier %s - %s", self.file, error)
+        self._errors.append(error)
+
+
+class Report():
+    def __init__(self, title, report_files=[]):
+        self.title = title
+        self.report_files = report_files
+
+    @property
+    def valid(self):
+        return all([r.valid for r in self.report_files])
+
+    def to_json(self):
+        return json.dumps(self)
+
 def check(subject, checker):    
 def check(subject, checker):    
     """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
     """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
     subject, checker = Path(subject), Path(checker)
     subject, checker = Path(subject), Path(checker)
@@ -31,36 +131,45 @@ def check(subject, checker):
             zip_ref = zipfile.ZipFile(subject, 'r')
             zip_ref = zipfile.ZipFile(subject, 'r')
             zip_ref.extractall(dirname)
             zip_ref.extractall(dirname)
             zip_ref.close()
             zip_ref.close()
-            print()
             if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
             if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
                 dirname /= subject.stem
                 dirname /= subject.stem
-            check_folder(dirname, checker)
+            return check_folder(dirname, checker)
         
         
     elif subject.isdir():
     elif subject.isdir():
-        check_folder(subject, checker)
+        return check_folder(subject, checker)
     else:
     else:
         raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
         raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
 
 
 def check_folder(folder, checker):
 def check_folder(folder, checker):
-    logging.info("*****   Traitement de '%s'   *****", folder.name)
+    logger.info("*****   Traitement de '%s'   *****", folder.name)
+    
+    logger.info("> Controlleur: '%s'", checker.name)
     
     
-    logging.info("> Controlleur: '%s'", checker.name)
+    report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
     
     
     with open(checker, "r") as cf:
     with open(checker, "r") as cf:
         config = yaml.load(cf)
         config = yaml.load(cf)
     
     
+    databases = {}
+    for dbname, dsn in config.get("databases", {}).items():
+        cnn = pypyodbc.connect(dsn)
+        databases[dbname] = cnn
+        
     for filename, model in config["files"].items():
     for filename, model in config["files"].items():
         path_ = folder / filename
         path_ = folder / filename
-        logging.info("* Traitement de %s", path_.name)
+        logger.info("* Traitement de %s", path_.name)
+        
+        report_file = ReportFile(path_.name)
+        report.report_files.append(report_file)
         
         
         if not path_.isfile():
         if not path_.isfile():
-            logger.error("Fichier introuvable")
+            report_file.add_error("Fichier introuvable")
             continue
             continue
         
         
         try:
         try:
             sf = shapefile.Reader(path_)
             sf = shapefile.Reader(path_)
         except shapefile.ShapefileException:
         except shapefile.ShapefileException:
-            logger.error("Fichier SHAPE illisible")
+            report_file.add_error("Fichier illisible")
             continue
             continue
         
         
         if "srid" in config:
         if "srid" in config:
@@ -75,86 +184,122 @@ def check_folder(folder, checker):
         if "shape_type" in model:
         if "shape_type" in model:
             shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
             shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
             if sf.shapeType != model["shape_type"]:
             if sf.shapeType != model["shape_type"]:
-                logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
+                report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
                 del sf
                 del sf
                 continue
                 continue
 
 
         records = sf.shapeRecords()
         records = sf.shapeRecords()
-        if not records:
-            if not model["can_be_empty"]:
-                logger.error("Le fichier shapefile ne contient aucune donnees")
-                del sf, records
-                continue
-            else:
-                logger.warning("Le fichier shapefile ne contient aucune donnees")
+        if not records and not model["can_be_empty"]:
+            report_file.add_error("Le fichier shapefile ne contient aucune donnees")
+            del sf, records
+            continue
         
         
         if not "fields" in model:
         if not "fields" in model:
             continue
             continue
         
         
         fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
         fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
         
         
-        # controle d'éventuels champs inconnus
-        for f in fields:
-            if f not in model["fields"]:
-                logger.warning("Champs inconnu: %s", f)
+        report_file.headers = list(model["fields"].keys())
         
         
         # parcours et controle des enregistrements
         # parcours et controle des enregistrements
         for i, record in enumerate(records):
         for i, record in enumerate(records):
             
             
-            logging.info("\n> Enregistrement n°%s\n", i)
+            logger.info("\n> Enregistrement n°%s\n", i)
+            report_record = ReportRecord(i)
+            
             record_data = {field: record.record[i] for i, field in enumerate(fields)}
             record_data = {field: record.record[i] for i, field in enumerate(fields)}
             
             
-            x1, y1, x2, y2 = sf.shapes()[i].bbox
+            try:
+                x1, y1, x2, y2 = sf.shapes()[i].bbox
+            except AttributeError:
+                x1, y1 = sf.shapes()[i].points[0]
+                x2, y2 = x1, y1
+            
             if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
             if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
                not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
                not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
-                logger.error("L'élément est situé hors de la zone autorisée")
-            
+                report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
+                
             for fieldname, fieldmodel in model["fields"].items():
             for fieldname, fieldmodel in model["fields"].items():
                 
                 
+                report_field = ReportField(fieldname)
+                
                 try:
                 try:
                     val = record_data[fieldname]
                     val = record_data[fieldname]
                 except KeyError:
                 except KeyError:
                     if fieldmodel.get("required", True):
                     if fieldmodel.get("required", True):
-                        logger.error("%s - Champs manquant", fieldname)
+                        report_field.error = "Champs manquant"
+                    report_record.add_field(report_field)
                     continue
                     continue
                 
                 
+                report_field.value = val
+                
                 type_ = fieldmodel.get("type", "str")
                 type_ = fieldmodel.get("type", "str")
                 if type_ == "int":
                 if type_ == "int":
                     try:
                     try:
                         _ = int(val)
                         _ = int(val)
                     except (TypeError, ValueError):
                     except (TypeError, ValueError):
-                        logger.error("%s - Valeur invalide, un entier est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, un nombre entier est attendu"
+                        report_record.add_field(report_field)
                         continue
                         continue
                 elif type_ == "float":
                 elif type_ == "float":
                     try:
                     try:
                         _ = float(val)
                         _ = float(val)
                     except (TypeError, ValueError):
                     except (TypeError, ValueError):
-                        logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, un nombre décimal est attendu"
+                        report_record.add_field(report_field)
                         continue
                         continue
                 elif type_ == "datetime":
                 elif type_ == "datetime":
                     try:
                     try:
                         _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
                         _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
                     except ValueError:
                     except ValueError:
-                        logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
+                        report_field.error = "Valeur Invalide, une date est attendue"
+                        report_record.add_field(report_field)
                         continue
                         continue
                 else: 
                 else: 
                     if not fieldmodel.get("allow_empty", False) and not val:
                     if not fieldmodel.get("allow_empty", False) and not val:
-                        logger.error("%s - Champs vide", fieldname)
+                        report_field.error = "Ce champs ne peut pas être vide"
+                        report_record.add_field(report_field)
                         continue
                         continue
                 
                 
                 if type_ == "str" and "max_len" in fieldmodel:
                 if type_ == "str" and "max_len" in fieldmodel:
                     if len(str(val)) > fieldmodel["max_len"]:
                     if len(str(val)) > fieldmodel["max_len"]:
-                        logger.error("%s - Trop long, la longueur max. est de %s ('%s')", fieldname, fieldmodel["max_len"], val)
-                    
+                        report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
+                        report_record.add_field(report_field)
+                        continue
                 
                 
                 try:
                 try:
                     if not val in fieldmodel["in_list"]:
                     if not val in fieldmodel["in_list"]:
-                        logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
+                        report_field.error = "Valeur invalide, pas dans la liste"
+                        report_record.add_field(report_field)
                         continue
                         continue
                 except KeyError:
                 except KeyError:
                     pass
                     pass
         
         
+#                 if "in_table" in fieldmodel:
+#                     
+#                     db = databases[fieldmodel["in_table"]["db"]]
+#                     cursor = db.cursor()
+#                     cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
+#                     rows = [val[0] for val in cursor.fetchall()]
+#                     
+#                     if not val in rows:
+#                         report_field.error = "Valeur invalide, pas dans la liste"
+#                         report_record.add_field(report_field)
+#                         continue
+                
+                report_record.add_field(report_field)
+            report_file.add_record(report_record)
+            
         del sf, records
         del sf, records
+        
+    return report
+        
+
+def render_to_html(report):
+    with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f:
+        template = Template(f.read())
+
+    return template.render(report=report)
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
@@ -162,6 +307,15 @@ if __name__ == "__main__":
     subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
     subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
     checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
     checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
     
     
-    check(subject, checker)
+    report = check(subject, checker)
+    
+    result = render_to_html(report)
+    
+    result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
+    
+    with open(result_filename, "w+") as f:
+        f.write(result)
+    
+    subprocess.run("start {}".format(result_filename), shell=True)
     
     
     logger.info("-- Fin --")
     logger.info("-- Fin --")

+ 15 - 1
resources/netgeo_v2-2_doe.yaml

@@ -4,6 +4,9 @@ ymin: 8147750.0839389534667134
 ymax: 8294000.0620922148227692
 ymax: 8294000.0620922148227692
 srid: 3949
 srid: 3949
 
 
+databases:
+  sig_postgres: "DRIVER={PostgreSQL Unicode};DATABASE=sig50;SERVER=clusterpg.linux.infra.cloud.local;PORT=5432;UID=sigr;PWD=T38Msh2R4q"
+  
 files:
 files:
   "artere_geo.shp":
   "artere_geo.shp":
     shape_type: 3
     shape_type: 3
@@ -17,6 +20,10 @@ files:
       AR_ID_INSE:
       AR_ID_INSE:
         type: str
         type: str
         max_len: 5
         max_len: 5
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       AR_LONG:
       AR_LONG:
         type: float
         type: float
       AR_ETAT:
       AR_ETAT:
@@ -263,6 +270,10 @@ files:
       NO_ID_INSE:
       NO_ID_INSE:
         type: str
         type: str
         max_len: 5
         max_len: 5
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       NO_NOM:
       NO_NOM:
         type: str
         type: str
         max_len: 20
         max_len: 20
@@ -367,7 +378,10 @@ files:
       TR_ID_INSE:
       TR_ID_INSE:
         type: str
         type: str
         max_len: 5
         max_len: 5
-        in_list: []
+        in_table:
+          db: sig_postgres
+          table: sig_referentiel.admn_cd50_com
+          field: code_insee
       TR_VOIE:
       TR_VOIE:
         type: str
         type: str
         max_len: 200
         max_len: 200