datachecker.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. '''
  2. Python 3.7+
  3. @author: olivier.massot, sept 2018
  4. '''
  5. from datetime import datetime
  6. import json
  7. import logging
  8. import subprocess #@UnusedImport
  9. import tempfile #@UnusedImport
  10. import zipfile
  11. from jinja2.environment import Template
  12. from path import Path, TempDir
  13. import pypyodbc
  14. import shapefile
  15. import yaml
  16. from core import logconf
  17. from core.constants import MAIN
  18. logger = logging.getLogger("datachecker")
  19. logconf.start("datachecker", logging.INFO)
  20. pypyodbc.lowercase = False
  21. logger.disabled = True
  22. # TODO: Vérifier la projection (besoin de GDAL/OGR)
  23. # TODO: fonctions de controle spéciales
  24. class ReportField():
  25. def __init__(self, name, value = "", error=""):
  26. self.name = name
  27. self.value = value
  28. self._error = error
  29. self._valid = True
  30. @property
  31. def error(self):
  32. return self._error
  33. @error.setter
  34. def error(self, error):
  35. self._valid = False
  36. logger.error("%s - %s ('%s')", self.name, error, self.value)
  37. self._error = error
  38. @property
  39. def valid(self):
  40. return self._valid
  41. class ReportRecord():
  42. def __init__(self, index):
  43. self.index = index
  44. self._valid = True
  45. self._errors = []
  46. self._fields = []
  47. @property
  48. def errors(self):
  49. return self._errors
  50. def add_error(self, error):
  51. self._valid = False
  52. logger.error("Ligne %s - %s", self.index, error)
  53. self._errors.append(error)
  54. @property
  55. def valid(self):
  56. return self._valid
  57. @property
  58. def fields(self):
  59. return self._fields
  60. def add_field(self, field):
  61. if not field.valid:
  62. self._valid=False
  63. self._fields.append(field)
  64. class ReportFile():
  65. def __init__(self, file):
  66. self.file = file
  67. self.headers = []
  68. self._valid = True
  69. self._errors = []
  70. self._records = []
  71. @property
  72. def valid(self):
  73. return self._valid
  74. @property
  75. def records(self):
  76. return self._records
  77. def add_record(self, row):
  78. if not row.valid:
  79. self._valid=False
  80. self._records.append(row)
  81. @property
  82. def errors(self):
  83. return self._errors
  84. def add_error(self, error):
  85. self._valid=False
  86. logger.error("Fichier %s - %s", self.file, error)
  87. self._errors.append(error)
  88. class Report():
  89. def __init__(self, title, report_files=[]):
  90. self.title = title
  91. self.report_files = report_files
  92. @property
  93. def valid(self):
  94. return all([r.valid for r in self.report_files])
  95. def to_json(self):
  96. return json.dumps(self)
  97. def check(subject, checker):
  98. """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
  99. subject, checker = Path(subject), Path(checker)
  100. if subject.isfile():
  101. with TempDir() as dirname:
  102. zip_ref = zipfile.ZipFile(subject, 'r')
  103. zip_ref.extractall(dirname)
  104. zip_ref.close()
  105. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  106. dirname /= subject.stem
  107. return check_folder(dirname, checker)
  108. elif subject.isdir():
  109. return check_folder(subject, checker)
  110. else:
  111. raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  112. def check_folder(folder, checker):
  113. logger.info("***** Traitement de '%s' *****", folder.name)
  114. logger.info("> Controlleur: '%s'", checker.name)
  115. report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
  116. with open(checker, "r") as cf:
  117. config = yaml.load(cf)
  118. databases = {}
  119. for dbname, dsn in config.get("databases", {}).items():
  120. cnn = pypyodbc.connect(dsn)
  121. databases[dbname] = cnn
  122. for filename, model in config["files"].items():
  123. path_ = folder / filename
  124. logger.info("* Traitement de %s", path_.name)
  125. report_file = ReportFile(path_.name)
  126. report.report_files.append(report_file)
  127. if not path_.isfile():
  128. report_file.add_error("Fichier introuvable")
  129. continue
  130. try:
  131. sf = shapefile.Reader(path_)
  132. except shapefile.ShapefileException:
  133. report_file.add_error("Fichier illisible")
  134. continue
  135. if "srid" in config:
  136. pass
  137. xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
  138. int(config.get("xmax", float("inf"))),
  139. int(config.get("ymin", 0)),
  140. int(config.get("ymax", float("inf")))
  141. )
  142. if "shape_type" in model:
  143. shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
  144. if sf.shapeType != model["shape_type"]:
  145. report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
  146. del sf
  147. continue
  148. records = sf.shapeRecords()
  149. if not records and not model["can_be_empty"]:
  150. report_file.add_error("Le fichier shapefile ne contient aucune donnees")
  151. del sf, records
  152. continue
  153. if not "fields" in model:
  154. continue
  155. fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
  156. report_file.headers = list(model["fields"].keys())
  157. # parcours et controle des enregistrements
  158. for i, record in enumerate(records):
  159. logger.info("\n> Enregistrement n°%s\n", i)
  160. report_record = ReportRecord(i)
  161. record_data = {field: record.record[i] for i, field in enumerate(fields)}
  162. try:
  163. x1, y1, x2, y2 = sf.shapes()[i].bbox
  164. except AttributeError:
  165. x1, y1 = sf.shapes()[i].points[0]
  166. x2, y2 = x1, y1
  167. if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
  168. not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
  169. report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
  170. for fieldname, fieldmodel in model["fields"].items():
  171. report_field = ReportField(fieldname)
  172. try:
  173. val = record_data[fieldname]
  174. except KeyError:
  175. if fieldmodel.get("required", True):
  176. report_field.error = "Champs manquant"
  177. report_record.add_field(report_field)
  178. continue
  179. report_field.value = val
  180. type_ = fieldmodel.get("type", "str")
  181. if type_ == "int":
  182. try:
  183. _ = int(val)
  184. except (TypeError, ValueError):
  185. report_field.error = "Valeur Invalide, un nombre entier est attendu"
  186. report_record.add_field(report_field)
  187. continue
  188. elif type_ == "float":
  189. try:
  190. _ = float(val)
  191. except (TypeError, ValueError):
  192. report_field.error = "Valeur Invalide, un nombre décimal est attendu"
  193. report_record.add_field(report_field)
  194. continue
  195. elif type_ == "datetime":
  196. try:
  197. _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
  198. except ValueError:
  199. report_field.error = "Valeur Invalide, une date est attendue"
  200. report_record.add_field(report_field)
  201. continue
  202. else:
  203. if not fieldmodel.get("allow_empty", False) and not val:
  204. report_field.error = "Ce champs ne peut pas être vide"
  205. report_record.add_field(report_field)
  206. continue
  207. if type_ == "str" and "max_len" in fieldmodel:
  208. if len(str(val)) > fieldmodel["max_len"]:
  209. report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
  210. report_record.add_field(report_field)
  211. continue
  212. try:
  213. if not val in fieldmodel["in_list"]:
  214. report_field.error = "Valeur invalide, pas dans la liste"
  215. report_record.add_field(report_field)
  216. continue
  217. except KeyError:
  218. pass
  219. # if "in_table" in fieldmodel:
  220. #
  221. # db = databases[fieldmodel["in_table"]["db"]]
  222. # cursor = db.cursor()
  223. # cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
  224. # rows = [val[0] for val in cursor.fetchall()]
  225. #
  226. # if not val in rows:
  227. # report_field.error = "Valeur invalide, pas dans la liste"
  228. # report_record.add_field(report_field)
  229. # continue
  230. report_record.add_field(report_field)
  231. report_file.add_record(report_record)
  232. del sf, records
  233. return report
  234. def render_to_html(report):
  235. with open(MAIN / "templates" / "report.j2", "r") as f:
  236. template = Template(f.read())
  237. return template.render(report=report)
  238. # if __name__ == "__main__":
  239. #
  240. # subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
  241. # checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
  242. #
  243. # report = check(subject, checker)
  244. #
  245. # result = render_to_html(report)
  246. #
  247. # result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
  248. #
  249. # with open(result_filename, "w+") as f:
  250. # f.write(result)
  251. #
  252. # subprocess.run("start {}".format(result_filename), shell=True)
  253. #
  254. # logger.info("-- Fin --")