youtube.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import time
  2. import urllib
  3. from pytube import YouTube
  4. from core import constants, logging_
  5. from core.exceptions import YoutubeStreamNotFound
  6. MAX_ATTEMPTS = 5
  7. logger = logging_.get("mew")
  8. class YoutubeDownloader:
  9. output_dir = constants.APP_ROOT / 'data' / 'youtube'
  10. @staticmethod
  11. def get_audio_stream(yt):
  12. args = [
  13. {"only_audio": True},
  14. {"file_extension": 'mp4'}
  15. ]
  16. for a in args:
  17. stream = yt.streams \
  18. .filter(**a) \
  19. .first()
  20. if stream:
  21. return stream
  22. @staticmethod
  23. def get_stream(url):
  24. for i in range(1, MAX_ATTEMPTS + 1):
  25. logger.info(f"download {url} - attempt {i}/{MAX_ATTEMPTS}")
  26. try:
  27. yt = YouTube(url)
  28. return YoutubeDownloader.get_audio_stream(yt)
  29. except urllib.error.HTTPError:
  30. time.sleep(500)
  31. continue
  32. @staticmethod
  33. def download(url):
  34. logger.info(f"** Download {url} into {YoutubeDownloader.output_dir}")
  35. stream = YoutubeDownloader.get_stream(url)
  36. if stream is None:
  37. raise YoutubeStreamNotFound('Stream not found')
  38. logger.info(f"stream found - start downloading")
  39. stream.download(
  40. output_path=YoutubeDownloader.output_dir,
  41. timeout=3,
  42. max_retries=5
  43. )
  44. fname = stream.title
  45. logger.info(f"> file downloaded as {YoutubeDownloader.output_dir}/{fname}")
  46. return fname
  47. if __name__ == "__main__":
  48. YoutubeDownloader.download(
  49. 'https://www.youtube.com/watch?v=nOWjX4BpC24XXXXXX'
  50. )