Browse Source

add a basic tracks indexer

Olivier Massot 4 years ago
parent
commit
771dd27e65

+ 1 - 0
.gitignore

@@ -8,3 +8,4 @@ htmlcov/
 .coverage
 
 /.project
+*.log

+ 30 - 0
core/file_utilities.py

@@ -0,0 +1,30 @@
+import hashlib
+import mimetypes
+
+MEDIA_EXTS_CACHE = []
+
+
+def media_exts():
+    """ List of media file extensions from local system mimetypes """
+    if not MEDIA_EXTS_CACHE:
+        mimetypes.init()
+        for ext in mimetypes.types_map:
+            if mimetypes.types_map[ext].split('/')[0] in ('audio', 'video'):
+                MEDIA_EXTS_CACHE.append(ext)
+    return MEDIA_EXTS_CACHE
+
+
+def is_media_file_ext(ext):
+    """ Is the given extension a media file extension according to the local system mimetypes """
+    return ext.lower().lstrip('.') in [e.lower().lstrip('.') for e in media_exts()]
+
+
+HASHER = hashlib.md5()
+
+
+def hash_file(filename):
+    """ return a hash for the given file """
+    with open(filename, 'rb') as f:
+        buf = f.read()
+        HASHER.update(buf)
+    return HASHER.hexdigest()

+ 111 - 0
core/indexer.py

@@ -0,0 +1,111 @@
+import time
+
+import vlc
+from path import Path
+
+from core.file_utilities import is_media_file_ext, hash_file
+from core.models import Track
+from core.repositories import MusicFolderRepository, TrackRepository
+
+
+class Indexation:
+    def __init__(self):
+        self.started = False
+        self.music_folder_repo = MusicFolderRepository()
+        self.track_repo = TrackRepository()
+        self.music_folders = []
+        self.index = {}
+        self.processed = set()
+        self.t0 = time.time()
+
+    def start(self):
+        self.index = {t.hash: t for t in self.track_repo.get_all()}
+        self.processed = set()
+        self.t0 = time.time()
+        self.started = True
+
+    def index_file(self, music_folder, filename):
+        if not self.started:
+            self.start()
+
+        vlc_media = vlc.Media(filename)
+        vlc_media.parse()
+        track_infos = vlc_media.get_tracks_info()
+
+        track_hash = hash_file(filename)
+
+        if track_hash in self.processed:
+            print(" ... file already indexed, ignore: ", filename)
+            return
+
+        if track_hash in self.index:
+            track = self.index[track_hash]
+        else:
+            track = Track()
+
+        track.profile_id = 0
+        track.music_folder_id = music_folder.id
+        track.title = vlc_media.get_meta(vlc.Meta.Title)
+        track.format = filename.ext
+        track.artist = vlc_media.get_meta(vlc.Meta.AlbumArtist) or vlc_media.get_meta(vlc.Meta.Artist)
+        track.album = vlc_media.get_meta(vlc.Meta.Album)
+        track.track_num = vlc_media.get_meta(vlc.Meta.TrackNumber)
+        # track.year = vlc_media.get_meta(vlc.Meta.Date)
+        # track.duration = vlc_media.get_meta(vlc.Meta.Date)
+        # track.size = 0
+        track.note = ""
+        track.status = Track.STATUS_FOUND
+        track.path = filename
+        track.hash = track_hash
+
+        if track.id is not None:
+            self.track_repo.update(track)
+            print('updated', filename)
+        else:
+            self.track_repo.create(track)
+            print('created', filename)
+        self.track_repo.commit()
+
+        self.processed.add(track_hash)
+
+    def index_folder(self, music_folder):
+
+        if not self.started:
+            self.start()
+
+        music_folder_path = Path(music_folder.path)
+
+        for filename in music_folder_path.walkfiles():
+            if not is_media_file_ext(filename.ext):
+                print('   ...  ignored ... ', filename)
+                continue
+            self.index_file(music_folder, filename)
+
+    def index_all(self):
+        self.start()
+
+        music_folders = self.music_folder_repo.get_all()
+        for music_folder in music_folders:
+            self.index_folder(music_folder)
+
+
+class Indexer:
+
+    @staticmethod
+    def index_file(music_folder, path):
+        indexation = Indexation()
+        indexation.index_file(music_folder, path)
+
+    @staticmethod
+    def index_folder(music_folder):
+        indexation = Indexation()
+        indexation.index_folder(music_folder)
+
+    @staticmethod
+    def index_all():
+        indexation = Indexation()
+        indexation.index_all()
+
+
+if __name__ == '__main__':
+    Indexer.index_all()

+ 9 - 4
core/models.py

@@ -3,10 +3,10 @@ class Model:
     def __init__(self, id_=None):
         self.id = id_
 
-    def as_fields_and_values(self):
+    def as_fields_and_values(self, exclude_id=False):
         fields, values = [], []
         for attr, val in self.__dict__.items():
-            if attr[0] == '_' or val is None:
+            if attr[0] == '_' or val is None or (exclude_id and attr == 'id'):
                 continue
             fields.append(attr)
             values.append(val)
@@ -42,14 +42,19 @@ class Tag(Model):
 
 
 class Track(Model):
-    def __init__(self, id_=None, profile_id=None, music_folder_id=None, name=None,
+    STATUS_UNKNOWN = 0
+    STATUS_FOUND = 1
+    STATUS_UNAVAILABLE = 2
+    STATUS_UNREADABLE = 3
+
+    def __init__(self, id_=None, profile_id=None, music_folder_id=None, title=None,
                  format_=None, artist=None, album=None, track_num=None, year=None,
                  duration=None, size=None, note=None, status=None, path_=None,
                  hash_=None, origin=None):
         super().__init__(id_)
         self.profile_id = profile_id
         self.music_folder_id = music_folder_id
-        self.name = name
+        self.title = title
         self.format = format_
         self.artist = artist
         self.album = album

+ 4 - 3
core/repositories.py

@@ -39,7 +39,7 @@ class Repository:
         return [self.MODEL_CLS(*row) for row in cur.fetchall()]
 
     def create(self, model, commit=False):
-        fields, values = model.as_fields_and_values()
+        fields, values = model.as_fields_and_values(True)
         self.execute(
             f"INSERT INTO {self.TABLE_NAME} ({', '.join(fields)}) VALUES ({', '.join(['?' for v in values])});",
             *values
@@ -48,9 +48,10 @@ class Repository:
             self.commit()
 
     def update(self, model, commit=False):
-        fields, values = model.as_fields_and_values()
+        fields, values = model.as_fields_and_values(True)
+        values.append(model.id)
         self.execute(
-            f"UPDATE {self.TABLE_NAME} SET ({'=?,'.join(fields)});",
+            f"UPDATE {self.TABLE_NAME} SET {', '.join([f'{f}=?' for f in fields])} WHERE id=?;",
             *values
         )
         if commit:

+ 0 - 9
core/track_index.py

@@ -1,9 +0,0 @@
-from core import constants
-
-
-class TrackIndex:
-    def __init__(self):
-        pass
-
-    def scan(self):
-        pass

+ 1 - 2
core/player.py → core/vlc_.py

@@ -22,7 +22,7 @@ A simple example for VLC python bindings using PyQt5.
 Author: Saveliy Yusufov, Columbia University, sy2685@columbia.edu
 Date: 25 December 2018
 """
-
+import mimetypes
 import platform
 import os
 import sys
@@ -35,7 +35,6 @@ os.environ['PYTHON_VLC_LIB_PATH'] = constants.APP_ROOT / 'core' / 'vlc-core' / '
 if 1:
     import vlc
 
-
 class Player(QtWidgets.QMainWindow):
     """A simple Media Player using VLC and Qt
     """

+ 0 - 0
data/youtube/(Ghost) Riders In the Sky (American Outlaws Live at Nassau Coliseum 1990).mp4 → data/default/youtube/(Ghost) Riders In the Sky (American Outlaws Live at Nassau Coliseum 1990).mp4


+ 1 - 1
requirements.txt

@@ -1,6 +1,6 @@
 PyQt5
 PyQt5-stubs
-python-vlc=3.0.12
+python-vlc~=3.0
 path.py
 pyyaml
 pytube