indexer.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import time
  2. from collections import deque
  3. from threading import Thread, Timer, Event
  4. import vlc
  5. from PyQt5.QtCore import pyqtSignal, QObject
  6. from path import Path
  7. from core import db, file_utilities
  8. from core.exceptions import NotSupportedFile
  9. from core.file_utilities import is_media_file_ext, hash_file
  10. from core.logging_ import Logger
  11. from core.models import Track
  12. from core.repositories import MusicFolderRepository, TrackRepository
  13. logger = Logger.get()
  14. class AlreadyIndexed(Exception):
  15. pass
  16. class Emitter(QObject):
  17. filesIndexed = pyqtSignal(list)
  18. musicFolderStatusChanged = pyqtSignal(int)
  19. class Indexer(Thread):
  20. DELAY = 2
  21. def __init__(self):
  22. Thread.__init__(self)
  23. self.stopped = Event()
  24. self.emitter = Emitter()
  25. self.timer = Timer(self.DELAY, self.act)
  26. def run(self):
  27. logger.info('** indexation thread started **')
  28. while not self.stopped.wait(self.DELAY):
  29. # logger.debug("... indexation")
  30. self.act()
  31. def act(self):
  32. # Initialize
  33. session = db.Session()
  34. music_folder_repo = MusicFolderRepository(session)
  35. track_repo = TrackRepository(session)
  36. # Get current data
  37. music_folders = music_folder_repo.get_all()
  38. tracks = track_repo.get_all()
  39. # Index existing
  40. index = {t.path: t for t in tracks}
  41. buffer = deque()
  42. # -- Walk through music folders
  43. # Put new files in buffer
  44. for music_folder in music_folders:
  45. music_folder_path = Path(music_folder.path)
  46. # music folder cant be found
  47. if not music_folder_path.exists():
  48. if music_folder.status == music_folder.STATUS_FOUND:
  49. music_folder.status = music_folder.STATUS_UNAVAILABLE
  50. music_folder_repo.commit()
  51. self.emitter.musicFolderStatusChanged.emit(music_folder.id)
  52. continue
  53. # music folder found
  54. if music_folder.status != music_folder.STATUS_FOUND:
  55. music_folder.status = music_folder.STATUS_FOUND
  56. music_folder_repo.commit()
  57. self.emitter.musicFolderStatusChanged.emit(music_folder.id)
  58. # walk files
  59. for filename in music_folder_path.walkfiles():
  60. # filename already seen
  61. if filename in buffer:
  62. continue
  63. # new file
  64. if filename not in index and is_media_file_ext(filename.ext):
  65. buffer.append(filename)
  66. # file already in db
  67. elif filename in index:
  68. track = index[filename]
  69. if track.status in (Track.STATUS_UNAVAILABLE, Track.STATUS_UNKNOWN):
  70. buffer.append(track.id)
  71. del index[filename]
  72. # Put missing files in buffer
  73. for filename, track in index.items():
  74. if track.id in buffer:
  75. continue
  76. filename = Path(filename)
  77. if not filename.exists() and track.status != Track.STATUS_UNAVAILABLE:
  78. buffer.append(track.id)
  79. # Index buffered tracks
  80. # NB: the tracks are treated from the end to the beginning, so missing files are treated before the new ones
  81. tracks = []
  82. while buffer:
  83. filename_or_id = buffer.pop()
  84. try:
  85. track = self.index(track_repo, filename_or_id, tracks)
  86. tracks.append(track)
  87. except AlreadyIndexed:
  88. pass
  89. except (FileNotFoundError, NotSupportedFile) as e:
  90. logger.warning("Error during indexation: %s" % e)
  91. continue
  92. except IndexError:
  93. break
  94. # Finalize
  95. if tracks:
  96. for track in tracks:
  97. if track.id is None:
  98. track_repo.create(track)
  99. track_repo.commit()
  100. self.emitter.filesIndexed.emit(tracks)
  101. logger.info(f"{len(tracks)} tracks indexed")
  102. @staticmethod
  103. def index(track_repo, filename_or_track_id, previously_indexed=None):
  104. """ index a media file from the filesystem or a track id """
  105. previously_indexed = previously_indexed or []
  106. if type(filename_or_track_id) is int:
  107. track = track_repo.get_by_id(filename_or_track_id)
  108. filename = Path(track.path)
  109. track_hash = track.hash
  110. if not filename.exists() and track.status != Track.STATUS_UNAVAILABLE:
  111. logger.debug('Index - missing: %s' % filename)
  112. track.status = Track.STATUS_UNAVAILABLE
  113. return track
  114. else:
  115. filename = Path(filename_or_track_id)
  116. if not filename.exists():
  117. raise FileNotFoundError(f"File not found: {filename}")
  118. if not is_media_file_ext(filename.ext):
  119. raise NotSupportedFile(f"File's extension {filename.ext} is not supported")
  120. track_hash = hash_file(filename)
  121. if any(t.hash == track_hash for t in previously_indexed):
  122. raise AlreadyIndexed(f"File already indexed")
  123. track = track_repo.get_by_hash(track_hash)
  124. if not track:
  125. track = Track()
  126. elif track.status == Track.STATUS_FOUND:
  127. raise AlreadyIndexed(f"File already indexed")
  128. vlc_media = vlc.Media(filename)
  129. vlc_media.parse()
  130. title = vlc_media.get_meta(vlc.Meta.Title)
  131. if not title or title == '(null)' or title == filename.name:
  132. title = filename.stripext().name
  133. track.title = title
  134. track.format = filename.ext
  135. track.artist = vlc_media.get_meta(vlc.Meta.AlbumArtist) or vlc_media.get_meta(vlc.Meta.Artist)
  136. track.album = vlc_media.get_meta(vlc.Meta.Album)
  137. track.track_num = vlc_media.get_meta(vlc.Meta.TrackNumber)
  138. track.duration = vlc_media.get_duration() // 1000
  139. track.note = ""
  140. track.status = Track.STATUS_FOUND
  141. track.path = filename
  142. track.hash = track_hash
  143. return track
  144. def stop(self):
  145. self.stopped.set()
  146. while self.is_alive():
  147. time.sleep(0.1)
  148. logger.info('** indexation thread stopped **')
  149. if __name__ == '__main__':
  150. indexer = Indexer()
  151. indexer.start()
  152. try:
  153. indexer.join()
  154. except KeyboardInterrupt:
  155. indexer.stop()