youtube.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import time
  2. import urllib
  3. from pytube import YouTube
  4. from core import constants, logging_
  5. from core.exceptions import YoutubeStreamNotFound
  6. MAX_ATTEMPTS = 5
  7. logger = logging_.get()
  8. class YoutubeDownloader:
  9. MAX_ATTEMPTS = 5
  10. OUTPUT_DIR = constants.APP_ROOT / 'data' / 'youtube'
  11. @staticmethod
  12. def get_audio_stream(yt):
  13. args = [
  14. {"only_audio": True},
  15. {"file_extension": 'mp4'}
  16. ]
  17. for a in args:
  18. stream = yt.streams \
  19. .filter(**a) \
  20. .first()
  21. if stream:
  22. return stream
  23. @staticmethod
  24. def get_stream(url, progress_callback=None, complete_callback=None):
  25. for i in range(1, YoutubeDownloader.MAX_ATTEMPTS + 1):
  26. logger.info(f"download {url} - attempt {i}/{YoutubeDownloader.MAX_ATTEMPTS}")
  27. try:
  28. yt = YouTube(url, progress_callback, complete_callback)
  29. return YoutubeDownloader.get_audio_stream(yt)
  30. except urllib.error.HTTPError:
  31. time.sleep(500)
  32. continue
  33. raise YoutubeStreamNotFound('Stream not found')
  34. @staticmethod
  35. def download(url, progress_callback=None, complete_callback=None):
  36. logger.info(f"** Download {url} into {YoutubeDownloader.OUTPUT_DIR}")
  37. stream = YoutubeDownloader.get_stream(
  38. url,
  39. progress_callback,
  40. complete_callback
  41. )
  42. logger.info(f"stream found - start downloading")
  43. stream.download(
  44. output_path=YoutubeDownloader.OUTPUT_DIR,
  45. timeout=5,
  46. max_retries=3
  47. )
  48. output_path = YoutubeDownloader.OUTPUT_DIR / stream.title
  49. logger.info(f"> file downloaded as {output_path}")
  50. return output_path
  51. if __name__ == "__main__":
  52. YoutubeDownloader.download(
  53. 'https://www.youtube.com/watch?v=nOWjX4BpC24XXXXXX',
  54. )