youtube.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import time
  2. import urllib
  3. from pytube import YouTube
  4. from core import constants, logging_
  5. from core.exceptions import YoutubeStreamNotFound
  6. from core.logging_ import Logger
  7. MAX_ATTEMPTS = 5
  8. logger = Logger.get()
  9. class YoutubeDownloader:
  10. MAX_ATTEMPTS = 5
  11. OUTPUT_DIR = constants.APP_ROOT / 'data' / 'youtube'
  12. @staticmethod
  13. def get_audio_stream(yt):
  14. args = [
  15. {"only_audio": True},
  16. {"file_extension": 'mp4'}
  17. ]
  18. for a in args:
  19. stream = yt.streams \
  20. .filter(**a) \
  21. .first()
  22. if stream:
  23. return stream
  24. @staticmethod
  25. def get_stream(url, progress_callback=None, complete_callback=None):
  26. for i in range(1, YoutubeDownloader.MAX_ATTEMPTS + 1):
  27. logger.info(f"download {url} - attempt {i}/{YoutubeDownloader.MAX_ATTEMPTS}")
  28. try:
  29. yt = YouTube(url, progress_callback, complete_callback)
  30. return YoutubeDownloader.get_audio_stream(yt)
  31. except urllib.error.HTTPError:
  32. time.sleep(500)
  33. continue
  34. raise YoutubeStreamNotFound('Stream not found')
  35. @staticmethod
  36. def download(url, progress_callback=None, complete_callback=None):
  37. logger.info(f"** Download {url} into {YoutubeDownloader.OUTPUT_DIR}")
  38. stream = YoutubeDownloader.get_stream(
  39. url,
  40. progress_callback,
  41. complete_callback
  42. )
  43. logger.info(f"stream found - start downloading")
  44. stream.download(
  45. output_path=YoutubeDownloader.OUTPUT_DIR,
  46. timeout=5,
  47. max_retries=3
  48. )
  49. output_path = YoutubeDownloader.OUTPUT_DIR / stream.title
  50. logger.info(f"> file downloaded as {output_path}")
  51. return output_path
  52. if __name__ == "__main__":
  53. YoutubeDownloader.download(
  54. 'https://youtu.be/nOWjX4BpC24',
  55. )