| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- import time
- import urllib
- from pytube import YouTube
- from core import constants, logging_
- from core.exceptions import YoutubeStreamNotFound
- MAX_ATTEMPTS = 5
- logger = logging_.get()
- class YoutubeDownloader:
- MAX_ATTEMPTS = 5
- OUTPUT_DIR = constants.APP_ROOT / 'data' / 'youtube'
- @staticmethod
- def get_audio_stream(yt):
- args = [
- {"only_audio": True},
- {"file_extension": 'mp4'}
- ]
- for a in args:
- stream = yt.streams \
- .filter(**a) \
- .first()
- if stream:
- return stream
- @staticmethod
- def get_stream(url, progress_callback=None, complete_callback=None):
- for i in range(1, YoutubeDownloader.MAX_ATTEMPTS + 1):
- logger.info(f"download {url} - attempt {i}/{YoutubeDownloader.MAX_ATTEMPTS}")
- try:
- yt = YouTube(url, progress_callback, complete_callback)
- return YoutubeDownloader.get_audio_stream(yt)
- except urllib.error.HTTPError:
- time.sleep(500)
- continue
- raise YoutubeStreamNotFound('Stream not found')
- @staticmethod
- def download(url, progress_callback=None, complete_callback=None):
- logger.info(f"** Download {url} into {YoutubeDownloader.OUTPUT_DIR}")
- stream = YoutubeDownloader.get_stream(
- url,
- progress_callback,
- complete_callback
- )
- logger.info(f"stream found - start downloading")
- stream.download(
- output_path=YoutubeDownloader.OUTPUT_DIR,
- timeout=5,
- max_retries=3
- )
- output_path = YoutubeDownloader.OUTPUT_DIR / stream.title
- logger.info(f"> file downloaded as {output_path}")
- return output_path
- if __name__ == "__main__":
- YoutubeDownloader.download(
- 'https://www.youtube.com/watch?v=nOWjX4BpC24XXXXXX',
- )
|