clonedb.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. """
  2. Script de clonage des bases de données MySql
  3. (requiert python 3.6+)
  4. > Configuration: settings.yml
  5. Usage:
  6. clonedb.py [-v] [-y] [<opname>...]
  7. clonedb.py (-h | --help)
  8. clonedb.py --version
  9. Options:
  10. -y, --yes Do not ask for confirmation
  11. -h --help Show this screen.
  12. --version Show version.
  13. @author: olivier.massot, 05-2020
  14. """
  15. import logging
  16. import re
  17. from subprocess import Popen, PIPE, CalledProcessError
  18. import sys
  19. import pymysql
  20. import yaml
  21. from docopt import docopt
  22. from path import Path
  23. from core import logging_
  24. from core.docker import resolve_docker_ip
  25. from core.locker import Lockfile
  26. from core.pipe_handler import PipeHandler
  27. from core.ssh import SshTunnel
  28. from core.prompt import ask_confirmation
  29. __VERSION__ = "0.2"
  30. HERE = Path(__file__).parent
  31. # Start logger
  32. LOG_DIR = HERE / 'log'
  33. LOG_DIR.mkdir_p()
  34. logger = logging.getLogger('clonedb')
  35. logging_.start("clonedb", filename=LOG_DIR / 'clonedb.log', replace=True)
  36. # FIX the default ascii encoding on some linux dockers...
  37. sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
  38. # Options
  39. SHOW_PROGRESSION = True
  40. LOG_PIPES_OUTPUT = True
  41. LOG_MYSQL_QUERIES = True
  42. MAX_ALLOWED_PACKET = 1073741824
  43. # Utilities
  44. def load_settings():
  45. """ Load the settings from the 'settings.yml' file
  46. If there is no such file, the base settings.yml file is created
  47. """
  48. settings_file = HERE / 'settings.yml'
  49. if not settings_file.exists():
  50. Path(HERE / 'settings.yml.dist').copy(HERE / 'settings.yml')
  51. with open(settings_file, 'r') as f:
  52. return yaml.load(f, Loader=yaml.FullLoader)
  53. def _print(msg, end=False):
  54. msg = msg.ljust(80)
  55. print(f'\r{msg}', end='' if not end else '\n', flush=True)
  56. class MysqldumpHandler(PipeHandler):
  57. """ Handle and process the stdout / stderr output from a mysqldump process
  58. """
  59. _rx_prog = re.compile(r'Retrieving table structure for table (\w+)')
  60. _log_all = LOG_PIPES_OUTPUT
  61. def __init__(self, logger_name, level, total_prog):
  62. super().__init__(logger_name, level)
  63. self.total_prog = total_prog
  64. self.prog = 0
  65. self._last_logged = ""
  66. def process(self, line):
  67. """ Process the last line that was read
  68. """
  69. line = line.strip('\n')
  70. if SHOW_PROGRESSION:
  71. match = self._rx_prog.search(line)
  72. if match:
  73. self.log_new_table(match.group(1), "dumping")
  74. if self._log_all:
  75. logger.debug(line)
  76. def log_new_table(self, tname, action_name=""):
  77. if tname == self._last_logged:
  78. return
  79. self.prog += 1
  80. logger.debug('... %s %s', action_name, tname)
  81. _print(f'{action_name} `{tname}` [{self.prog} / {self.total_prog}]')
  82. self._last_logged = tname
  83. def log_end(self):
  84. _print(f'\r-- done --', end=True)
  85. def close(self):
  86. """ Close the write end of the pipe.
  87. """
  88. super().close()
  89. class MysqlHandler(MysqldumpHandler):
  90. """ Handle and process the stdout / stderr output from a mysql process
  91. """
  92. _rx_prog = re.compile(r'^((?:CREATE TABLE )|(?:INSERT INTO ))`(\w+)`')
  93. _log_all = LOG_PIPES_OUTPUT
  94. _action_name = "restoring"
  95. def process(self, line):
  96. """ Process the last line that was read
  97. """
  98. line = line.strip('\n')
  99. if SHOW_PROGRESSION:
  100. match = self._rx_prog.search(line)
  101. if match:
  102. action_name = "restoring {}".format('structure of'
  103. if 'CREATE' in match.group(1)
  104. else 'data of')
  105. self.log_new_table(match.group(2), action_name)
  106. if self._log_all:
  107. logger.debug(line)
  108. class MySqlServer:
  109. """ A server hosting a Mysql instance
  110. """
  111. def __init__(self, host, port, username, password, description="", ssh_tunnel=None):
  112. self.host = host
  113. self.port = port
  114. self.username = username
  115. self.password = password
  116. self.description = description[:30]
  117. self.ssh_tunnel = ssh_tunnel
  118. self.cnn = None
  119. self.active_db = ""
  120. def __repr__(self):
  121. s = f"{self.host}:{self.port} as {self.username}"
  122. if self.description:
  123. s = f"{self.description} ({s})"
  124. return s
  125. def connect(self, autocommit=True):
  126. """ Establish the connection to the Mysql server
  127. @see https://pymysql.readthedocs.io/en/latest/modules/connections.html
  128. """
  129. if self.ssh_tunnel:
  130. self.ssh_tunnel.start()
  131. host, port = self.ssh_tunnel.LOCAL_ADRESS
  132. else:
  133. host, port = self.host, self.port
  134. self.cnn = pymysql.connect(host=host,
  135. port=port,
  136. user=self.username,
  137. password=self.password,
  138. autocommit=autocommit,
  139. max_allowed_packet=MAX_ALLOWED_PACKET,
  140. )
  141. if not self.cnn.open:
  142. raise RuntimeError(f'Unable to connect to {self}')
  143. return self.cnn
  144. def set_active_db(self, dbname):
  145. """ set the active database
  146. """
  147. self.cnn.select_db(dbname)
  148. self.active_db = dbname
  149. def close(self):
  150. """ Close the connection to the database
  151. and the ssh tunnel if one is opened
  152. """
  153. if self.cnn:
  154. self.cnn.close()
  155. if self.ssh_tunnel:
  156. self.ssh_tunnel.stop()
  157. logger.debug(f'{self} - connection closed')
  158. def exec_query(self, sql):
  159. """ Execute the sql code and return the resulting cursor
  160. @see https://pymysql.readthedocs.io/en/latest/modules/cursors.html
  161. """
  162. self.cnn.ping(reconnect=True)
  163. cursor = self.cnn.cursor()
  164. if LOG_MYSQL_QUERIES:
  165. logger.debug(sql)
  166. cursor.execute(sql)
  167. return cursor
  168. def db_exists(self, dbname):
  169. """ Return True if the database exists
  170. """
  171. cursor = self.exec_query(f"""SELECT SCHEMA_NAME
  172. FROM INFORMATION_SCHEMA.SCHEMATA
  173. WHERE SCHEMA_NAME = '{dbname}'""")
  174. row = cursor.fetchone()
  175. return row is not None
  176. def list_tables(self, dbname=""):
  177. """ Return a list of tables (but not views!)
  178. for either the currently selected database,
  179. or the one given as a parameter"""
  180. cursor = self.exec_query(
  181. "SHOW FULL TABLES{} WHERE Table_type='BASE TABLE';".format(f" FROM {dbname}" if dbname else ""))
  182. return (row[0] for row in cursor.fetchall())
  183. def list_views(self, dbname=""):
  184. """ Return a list of views
  185. for either the currently selected database,
  186. or the one given as a parameter"""
  187. cursor = self.exec_query(
  188. "SHOW FULL TABLES{} WHERE Table_type='VIEW';".format(f" FROM {dbname}" if dbname else ""))
  189. return (row[0] for row in cursor.fetchall())
  190. def get_view_definition(self, view_name, set_definer=""):
  191. """ Return the SQL create statement for the view
  192. If 'set_definer' is not empty, the username in the 'SET DEFINER' part
  193. of the create statement is replace by the one given
  194. """
  195. cursor = self.exec_query(f"show create view {view_name}")
  196. definition = cursor.fetchone()[1]
  197. if set_definer:
  198. # force a new definer
  199. definition = re.sub(r'DEFINER=`\w+`@`[\w\-.]+`',
  200. f"DEFINER=`{set_definer}`@`\1`",
  201. definition)
  202. return definition
  203. # Operation status
  204. UNKNOWN = 0
  205. SUCCESS = 1
  206. FAILURE = 2
  207. # Behaviors for the tables cloning
  208. IGNORE = 0
  209. STRUCTURE_ONLY = 1
  210. STRUCTURE_AND_DATA = 2 # -> default behavior
  211. class CloningOperation:
  212. """ A database cloning operation between two Mysql servers
  213. """
  214. def __init__(self, name, dbname, from_server, to_server, is_default=True, ignore_tables=None, structure_only=None,
  215. filter_tables=None, ignore_views=None, compress=True):
  216. self.name = name
  217. self.dbname = dbname
  218. self.from_server = from_server
  219. self.to_server = to_server
  220. self.is_default = is_default
  221. self.compress = compress
  222. self.ignore_tables = [re.compile(r) for r in ignore_tables] if ignore_tables else []
  223. self.structure_only = [re.compile(r) for r in structure_only] if structure_only else []
  224. self.filter_tables = [re.compile(r) for r in filter_tables] if filter_tables else []
  225. self.ignore_views = [re.compile(r) for r in ignore_views] if ignore_views else []
  226. self.status = UNKNOWN
  227. def __repr__(self):
  228. return f"Cloning {self.dbname} from {self.from_server} to {self.to_server}"
  229. def _build_dump_command(self, dump_options=None, tables=None):
  230. """ Build a mysqldump command line and return it as a
  231. ready-to-consume list for Popen
  232. @see https://dev.mysql.com/doc/refman/5.7/en/mysqldump.html#mysqldump-option-summary
  233. """
  234. tables = tables or []
  235. dump_options = dump_options or []
  236. base_cmd = ["mysqldump",
  237. "--single-transaction",
  238. "-u", self.from_server.username,
  239. f"--password={self.from_server.password}",
  240. f"--max-allowed-packet={MAX_ALLOWED_PACKET}",
  241. "--skip-add-drop-table",
  242. "--skip-add-locks",
  243. "--skip-comments",
  244. ]
  245. if self.compress:
  246. base_cmd.append("--compress")
  247. if SHOW_PROGRESSION:
  248. base_cmd.append("--verbose")
  249. if self.from_server.ssh_tunnel:
  250. host, port = self.from_server.ssh_tunnel.LOCAL_ADRESS
  251. base_cmd += ["--host", host,
  252. "--port", str(port)]
  253. return base_cmd + dump_options + [self.dbname] + tables
  254. def _build_restore_command(self):
  255. """ Build a mysql command line and return it as a
  256. ready-to-consume list for Popen
  257. @see https://dev.mysql.com/doc/refman/8.0/en/mysql-command-options.html#option_mysql_quick
  258. """
  259. init_command = f"set global max_allowed_packet={MAX_ALLOWED_PACKET};" \
  260. "set global wait_timeout=28800;" \
  261. "set global interactive_timeout=28800;"
  262. cmd = ["mysql",
  263. "-h", self.to_server.host,
  264. "-P", str(self.to_server.port),
  265. "-u", self.to_server.username,
  266. f"--password={self.to_server.password}",
  267. f"--init-command={init_command}",
  268. "--reconnect",
  269. "--quick",
  270. "--unbuffered",
  271. "--wait",
  272. "--verbose",
  273. "-D", self.dbname
  274. ]
  275. # if LOG_PIPES_OUTPUT:
  276. # cmd.append("--verbose")
  277. if self.compress:
  278. cmd.append("--compress")
  279. return cmd
  280. @staticmethod
  281. def _run_piped_processes(dump_cmd, restore_cmd, tbl_count):
  282. """ Run the dump and the restore commands by piping them
  283. The output of the mysqldump process is piped into the input of the mysql one
  284. """
  285. logger.debug(">>> Dump command: %s", " ".join(map(str, dump_cmd)))
  286. logger.debug(">>> Piped into: %s", " ".join(map(str, restore_cmd)))
  287. mysqldump_handler = MysqldumpHandler(logger.name, logging.INFO, tbl_count)
  288. mysql_handler = MysqlHandler(logger.name, logging.INFO, tbl_count)
  289. try:
  290. # noinspection PyTypeChecker
  291. with Popen(restore_cmd, stdin=PIPE, stdout=mysql_handler, stderr=mysql_handler) as mysql:
  292. # noinspection PyTypeChecker
  293. with Popen(dump_cmd, stdout=PIPE, stderr=mysqldump_handler) as mysqldump:
  294. mysql.stdin.write(mysqldump.stdout.read())
  295. if mysqldump.returncode:
  296. raise RuntimeError('mysqldump returned a non zero code')
  297. if mysql.returncode:
  298. raise RuntimeError('mysql returned a non zero code')
  299. mysql_handler.log_end()
  300. except (OSError, RuntimeError, CalledProcessError) as e:
  301. logger.error("Execution failed: %s", e)
  302. raise RuntimeError(f"An error happened at runtime: {e}")
  303. finally:
  304. mysqldump_handler.close()
  305. mysql_handler.close()
  306. def run(self):
  307. """ Run the cloning op
  308. """
  309. logger.info(f"*** Cloning {self.dbname} ***")
  310. logger.info(f"> From {self.from_server}")
  311. logger.info(f"> To {self.to_server}")
  312. try:
  313. self.from_server.connect()
  314. self.from_server.set_active_db(self.dbname)
  315. logger.debug('Connected to %s', self.from_server)
  316. self.to_server.connect()
  317. logger.debug('Connected to %s', self.to_server)
  318. tables = {}
  319. for tname in self.from_server.list_tables():
  320. if any(rx.match(tname) for rx in self.ignore_tables):
  321. tables[tname] = IGNORE
  322. elif self.filter_tables and not any(rx.match(tname) for rx in self.filter_tables):
  323. tables[tname] = IGNORE
  324. elif any(rx.match(tname) for rx in self.structure_only):
  325. tables[tname] = STRUCTURE_ONLY
  326. else:
  327. tables[tname] = STRUCTURE_AND_DATA
  328. restore_cmd = self._build_restore_command()
  329. # Dump structure: --single-transaction --no-data --routines {dbname} tbname1 tname2 ...
  330. dump_structure_for = [t for t, s in tables.items() if s != IGNORE]
  331. dump_structure_cmd = self._build_dump_command(["--no-data", "--routines"],
  332. dump_structure_for)
  333. # Dump data: --no-create-info --skip-triggers {dbname} tbname1 tname2 ...
  334. dump_data_for = [t for t, s in tables.items() if s == STRUCTURE_AND_DATA]
  335. dump_data_cmd = self._build_dump_command(["--no-create-info", "--skip-triggers"],
  336. dump_data_for)
  337. if tables and not dump_structure_for and not dump_data_for:
  338. logging.warning('No table will be cloned')
  339. # Recreate the target DB
  340. logger.info("(Re)create the database")
  341. self.to_server.exec_query(f"DROP DATABASE IF EXISTS `{self.dbname}`;")
  342. self.to_server.exec_query(f"CREATE SCHEMA `{self.dbname}`;")
  343. self.to_server.set_active_db(self.dbname)
  344. # Run mysqldump
  345. try:
  346. if dump_structure_for:
  347. logger.info(f"Cloning structure for {len(dump_structure_for)} tables (on {len(tables)})...")
  348. self._run_piped_processes(dump_structure_cmd, restore_cmd, len(dump_structure_for))
  349. if dump_data_for:
  350. logger.info(f"Cloning data for {len(dump_data_for)} tables (on {len(tables)})...")
  351. self._run_piped_processes(dump_data_cmd, restore_cmd, len(dump_data_for))
  352. logger.info(f"Cloning views...")
  353. self.from_server.set_active_db(self.dbname)
  354. self.to_server.set_active_db(self.dbname)
  355. for v in self.from_server.list_views(self.dbname):
  356. if any(rx.match(v) for rx in self.ignore_views):
  357. continue
  358. logger.debug('* cloning view %s', v)
  359. definition = self.from_server.get_view_definition(v, self.to_server.username)
  360. try:
  361. self.to_server.exec_query(definition)
  362. except (pymysql.err.ProgrammingError, pymysql.err.InternalError) as e:
  363. logger.error('Unable to create the internal view %s: %s', v, e)
  364. self.status = SUCCESS
  365. logger.info("> the database was successfully cloned")
  366. except RuntimeError:
  367. self.status = FAILURE
  368. logger.error("<!> An error happened while cloning the '%s' database", self.dbname)
  369. finally:
  370. self.from_server.close()
  371. self.to_server.close()
  372. def main(settings, arguments):
  373. prompt = not arguments["--yes"]
  374. logger.info("Start db cloning utility...")
  375. logger.debug(f"Settings: %s", str(settings).replace('\r', '').replace('\n', ''))
  376. logger.debug(f"Arguments: %s", str(arguments).replace('\r', '').replace('\n', ''))
  377. # Load the servers' configuration
  378. servers = {}
  379. for server_name, server_settings in settings['servers'].items():
  380. hostname = server_settings['host']
  381. match = re.search(r"^docker:(\w+)$", hostname)
  382. if match:
  383. logger.debug("resolve IP for docker %s", match.group(1))
  384. ip = resolve_docker_ip(match.group(1))
  385. logger.debug("substitute '%s' to '%s' as hostname", ip, hostname)
  386. hostname = ip
  387. if 'ssh' in server_settings:
  388. ssh_tunnel = SshTunnel(hostname, server_settings['mysql']['port'], **server_settings['ssh'])
  389. else:
  390. ssh_tunnel = None
  391. server = MySqlServer(hostname,
  392. **server_settings['mysql'],
  393. description=server_settings['description'],
  394. ssh_tunnel=ssh_tunnel)
  395. servers[server_name] = server
  396. # Load the cloning ops' configuration
  397. ops = {}
  398. for name, args in settings['operations'].items():
  399. dbname = args['dbname']
  400. from_server = servers[args['from_server']]
  401. to_server = servers[args['to_server']]
  402. kwargs = {k: v for k, v in args.items() if k not in ('dbname', 'from_server', 'to_server')}
  403. op = CloningOperation(name, dbname, from_server, to_server, **kwargs)
  404. ops[name] = op
  405. # Operations to launch
  406. if arguments.get('<opname>', None):
  407. selected_ops = []
  408. for opname in arguments['<opname>']:
  409. try:
  410. selected_ops.append(ops[opname])
  411. except KeyError:
  412. logger.error('No operation found with name %s', opname)
  413. else:
  414. selected_ops = [op for op in ops.values() if op.is_default]
  415. if not selected_ops:
  416. raise RuntimeError('No operation to launch')
  417. # Ask for confirmation (except if '--yes' is in arguments)
  418. if prompt:
  419. logger.debug('Ask for confirmation...')
  420. msg = "The following operations will be launched:\n{}\n" \
  421. "WARNING: the existing local databases will be replaced" \
  422. "".format("\n".join(f"* {op}" for op in selected_ops))
  423. if not ask_confirmation(msg):
  424. logger.info("-- Operation cancelled by user --")
  425. return
  426. logger.debug('> User confirmed')
  427. # Run the cloning operations
  428. for op in selected_ops:
  429. op.run()
  430. failures = [op.name for op in selected_ops if op.status == FAILURE]
  431. if failures:
  432. logger.error("WARNING! the following operations failed: %s", ', '.join(failures))
  433. if __name__ == '__main__':
  434. # load settings from settings.yml file
  435. settings = load_settings()
  436. # parse CLI arguments
  437. arguments = docopt(__doc__, help=__doc__, version=__VERSION__)
  438. with Lockfile(path=HERE / '.clonedb.lock',
  439. on_error=lambda: logger.critical("A cloning process is already running, please wait...")):
  440. main(settings, arguments)