import time import urllib from pytube import YouTube from core import constants, logging_ from core.exceptions import YoutubeStreamNotFound from core.logging_ import Logger MAX_ATTEMPTS = 5 logger = Logger.get() class YoutubeDownloader: MAX_ATTEMPTS = 5 OUTPUT_DIR = constants.APP_ROOT / 'data' / 'youtube' @staticmethod def get_audio_stream(yt): args = [ {"only_audio": True}, {"file_extension": 'mp4'} ] for a in args: stream = yt.streams \ .filter(**a) \ .first() if stream: return stream @staticmethod def get_stream(url, progress_callback=None, complete_callback=None): for i in range(1, YoutubeDownloader.MAX_ATTEMPTS + 1): logger.info(f"download {url} - attempt {i}/{YoutubeDownloader.MAX_ATTEMPTS}") try: yt = YouTube(url, progress_callback, complete_callback) return YoutubeDownloader.get_audio_stream(yt) except urllib.error.HTTPError: time.sleep(500) continue raise YoutubeStreamNotFound('Stream not found') @staticmethod def download(url, progress_callback=None, complete_callback=None): logger.info(f"** Download {url} into {YoutubeDownloader.OUTPUT_DIR}") stream = YoutubeDownloader.get_stream( url, progress_callback, complete_callback ) logger.info(f"stream found - start downloading") stream.download( output_path=YoutubeDownloader.OUTPUT_DIR, timeout=5, max_retries=3 ) output_path = YoutubeDownloader.OUTPUT_DIR / stream.title logger.info(f"> file downloaded as {output_path}") return output_path if __name__ == "__main__": YoutubeDownloader.download( 'https://youtu.be/nOWjX4BpC24', )