import time import urllib from pytube import YouTube from core import constants, logging_ from core.exceptions import YoutubeStreamNotFound MAX_ATTEMPTS = 5 logger = logging_.get("mew") class YoutubeDownloader: output_dir = constants.APP_ROOT / 'data' / 'youtube' @staticmethod def get_audio_stream(yt): args = [ {"only_audio": True}, {"file_extension": 'mp4'} ] for a in args: stream = yt.streams \ .filter(**a) \ .first() if stream: return stream @staticmethod def get_stream(url): for i in range(1, MAX_ATTEMPTS + 1): logger.info(f"download {url} - attempt {i}/{MAX_ATTEMPTS}") try: yt = YouTube(url) return YoutubeDownloader.get_audio_stream(yt) except urllib.error.HTTPError: time.sleep(500) continue @staticmethod def download(url): logger.info(f"** Download {url} into {YoutubeDownloader.output_dir}") stream = YoutubeDownloader.get_stream(url) if stream is None: raise YoutubeStreamNotFound('Stream not found') logger.info(f"stream found - start downloading") stream.download( output_path=YoutubeDownloader.output_dir, timeout=3, max_retries=5 ) fname = stream.title logger.info(f"> file downloaded as {YoutubeDownloader.output_dir}/{fname}") return fname if __name__ == "__main__": YoutubeDownloader.download( 'https://www.youtube.com/watch?v=nOWjX4BpC24XXXXXX' )