main.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. '''
  2. Python 3.7+
  3. @author: olivier.massot, sept 2018
  4. '''
  5. from datetime import datetime
  6. import json
  7. import logging
  8. import subprocess
  9. import tempfile
  10. import zipfile
  11. from jinja2.environment import Template
  12. from path import Path, TempDir
  13. import pypyodbc
  14. import shapefile
  15. import yaml
  16. from core import logconf
  17. from core.constants import MAIN
  18. logger = logging.getLogger("datachecker")
  19. logconf.start("datachecker", logging.INFO)
  20. pypyodbc.lowercase = False
  21. logger.disabled = True
  22. # TODO: Vérifier la projection (besoin de GDAL/OGR)
  23. # TODO: fonctions de controle spéciales
  24. # TODO: Rendu HTML
  25. class ReportField():
  26. def __init__(self, name, value = "", error=""):
  27. self.name = name
  28. self.value = value
  29. self._error = error
  30. self._valid = True
  31. @property
  32. def error(self):
  33. return self._error
  34. @error.setter
  35. def error(self, error):
  36. self._valid = False
  37. logger.error("%s - %s ('%s')", self.name, error, self.value)
  38. self._error = error
  39. @property
  40. def valid(self):
  41. return self._valid
  42. class ReportRecord():
  43. def __init__(self, index):
  44. self.index = index
  45. self._valid = True
  46. self._errors = []
  47. self._fields = []
  48. @property
  49. def errors(self):
  50. return self._errors
  51. def add_error(self, error):
  52. self._valid = False
  53. logger.error("Ligne %s - %s", self.index, error)
  54. self._errors.append(error)
  55. @property
  56. def valid(self):
  57. return self._valid
  58. @property
  59. def fields(self):
  60. return self._fields
  61. def add_field(self, field):
  62. if not field.valid:
  63. self._valid=False
  64. self._fields.append(field)
  65. class ReportFile():
  66. def __init__(self, file):
  67. self.file = file
  68. self.headers = []
  69. self._valid = True
  70. self._errors = []
  71. self._records = []
  72. @property
  73. def valid(self):
  74. return self._valid
  75. @property
  76. def records(self):
  77. return self._records
  78. def add_record(self, row):
  79. if not row.valid:
  80. self._valid=False
  81. self._records.append(row)
  82. @property
  83. def errors(self):
  84. return self._errors
  85. def add_error(self, error):
  86. self._valid=False
  87. logger.error("Fichier %s - %s", self.file, error)
  88. self._errors.append(error)
  89. class Report():
  90. def __init__(self, title, report_files=[]):
  91. self.title = title
  92. self.report_files = report_files
  93. @property
  94. def valid(self):
  95. return all([r.valid for r in self.report_files])
  96. def to_json(self):
  97. return json.dumps(self)
  98. def check(subject, checker):
  99. """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
  100. subject, checker = Path(subject), Path(checker)
  101. if subject.isfile():
  102. with TempDir() as dirname:
  103. zip_ref = zipfile.ZipFile(subject, 'r')
  104. zip_ref.extractall(dirname)
  105. zip_ref.close()
  106. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  107. dirname /= subject.stem
  108. return check_folder(dirname, checker)
  109. elif subject.isdir():
  110. return check_folder(subject, checker)
  111. else:
  112. raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  113. def check_folder(folder, checker):
  114. logger.info("***** Traitement de '%s' *****", folder.name)
  115. logger.info("> Controlleur: '%s'", checker.name)
  116. report = Report("Contrôle des données de {} au format {}".format(folder.name, checker.stem))
  117. with open(checker, "r") as cf:
  118. config = yaml.load(cf)
  119. databases = {}
  120. for dbname, dsn in config.get("databases", {}).items():
  121. cnn = pypyodbc.connect(dsn)
  122. databases[dbname] = cnn
  123. for filename, model in config["files"].items():
  124. path_ = folder / filename
  125. logger.info("* Traitement de %s", path_.name)
  126. report_file = ReportFile(path_.name)
  127. report.report_files.append(report_file)
  128. if not path_.isfile():
  129. report_file.add_error("Fichier introuvable")
  130. continue
  131. try:
  132. sf = shapefile.Reader(path_)
  133. except shapefile.ShapefileException:
  134. report_file.add_error("Fichier illisible")
  135. continue
  136. if "srid" in config:
  137. pass
  138. xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
  139. int(config.get("xmax", float("inf"))),
  140. int(config.get("ymin", 0)),
  141. int(config.get("ymax", float("inf")))
  142. )
  143. if "shape_type" in model:
  144. shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
  145. if sf.shapeType != model["shape_type"]:
  146. report_file.add_error("Le fichier shapefile n'est pas de type {}".format(shape_names[model["shape_type"]]))
  147. del sf
  148. continue
  149. records = sf.shapeRecords()
  150. if not records and not model["can_be_empty"]:
  151. report_file.add_error("Le fichier shapefile ne contient aucune donnees")
  152. del sf, records
  153. continue
  154. if not "fields" in model:
  155. continue
  156. fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
  157. report_file.headers = list(model["fields"].keys())
  158. # parcours et controle des enregistrements
  159. for i, record in enumerate(records):
  160. logger.info("\n> Enregistrement n°%s\n", i)
  161. report_record = ReportRecord(i)
  162. record_data = {field: record.record[i] for i, field in enumerate(fields)}
  163. try:
  164. x1, y1, x2, y2 = sf.shapes()[i].bbox
  165. except AttributeError:
  166. x1, y1 = sf.shapes()[i].points[0]
  167. x2, y2 = x1, y1
  168. if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
  169. not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
  170. report_record.add_error("L'élément est situé hors de la zone géographique autorisée")
  171. for fieldname, fieldmodel in model["fields"].items():
  172. report_field = ReportField(fieldname)
  173. try:
  174. val = record_data[fieldname]
  175. except KeyError:
  176. if fieldmodel.get("required", True):
  177. report_field.error = "Champs manquant"
  178. report_record.add_field(report_field)
  179. continue
  180. report_field.value = val
  181. type_ = fieldmodel.get("type", "str")
  182. if type_ == "int":
  183. try:
  184. _ = int(val)
  185. except (TypeError, ValueError):
  186. report_field.error = "Valeur Invalide, un nombre entier est attendu"
  187. report_record.add_field(report_field)
  188. continue
  189. elif type_ == "float":
  190. try:
  191. _ = float(val)
  192. except (TypeError, ValueError):
  193. report_field.error = "Valeur Invalide, un nombre décimal est attendu"
  194. report_record.add_field(report_field)
  195. continue
  196. elif type_ == "datetime":
  197. try:
  198. _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
  199. except ValueError:
  200. report_field.error = "Valeur Invalide, une date est attendue"
  201. report_record.add_field(report_field)
  202. continue
  203. else:
  204. if not fieldmodel.get("allow_empty", False) and not val:
  205. report_field.error = "Ce champs ne peut pas être vide"
  206. report_record.add_field(report_field)
  207. continue
  208. if type_ == "str" and "max_len" in fieldmodel:
  209. if len(str(val)) > fieldmodel["max_len"]:
  210. report_field.error = "Trop long, la longueur max. est de {}".format(fieldmodel["max_len"])
  211. report_record.add_field(report_field)
  212. continue
  213. try:
  214. if not val in fieldmodel["in_list"]:
  215. report_field.error = "Valeur invalide, pas dans la liste"
  216. report_record.add_field(report_field)
  217. continue
  218. except KeyError:
  219. pass
  220. # if "in_table" in fieldmodel:
  221. #
  222. # db = databases[fieldmodel["in_table"]["db"]]
  223. # cursor = db.cursor()
  224. # cursor.execute("SELECT DISTINCT {} FROM {};".format(fieldmodel["in_table"]["field"], fieldmodel["in_table"]["table"]))
  225. # rows = [val[0] for val in cursor.fetchall()]
  226. #
  227. # if not val in rows:
  228. # report_field.error = "Valeur invalide, pas dans la liste"
  229. # report_record.add_field(report_field)
  230. # continue
  231. report_record.add_field(report_field)
  232. report_file.add_record(report_record)
  233. del sf, records
  234. return report
  235. def render_to_html(report):
  236. with open(MAIN / "website" / "templates" / "report.j2", "r", encoding="utf-8") as f:
  237. template = Template(f.read())
  238. return template.render(report=report)
  239. if __name__ == "__main__":
  240. subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
  241. checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
  242. report = check(subject, checker)
  243. result = render_to_html(report)
  244. result_filename = tempfile.mktemp(".html", dir=MAIN / "website")
  245. with open(result_filename, "w+") as f:
  246. f.write(result)
  247. subprocess.run("start {}".format(result_filename), shell=True)
  248. logger.info("-- Fin --")