#!/usr/bin/env python # -*- coding: utf-8 -*- # Sound archiver ... # Default archive path ArchivePath = u'~/sharedFiles/Albums' import sys import os.path import locale import re from glob import glob from os import walk import shutil import pyid3lib as ID3v2 # Ogg.Vorbis module is also used, if avaliable, to support Ogg Vorbis Files def normalizeName(uName): uName = re.sub(u'_+', u' ', uName) uName = re.sub(u'\[', u' ', uName) uName = re.sub(u'\]', u' ', uName) uName = re.sub(u'-+', u'-', uName) uName = re.sub(u'-\s+-', u'-', uName) uName = re.sub(u'-\s', u' - ', uName) uName = re.sub(u'\s-', u' - ', uName) uName = re.sub(u'^ + -+ +', u'', uName) uName = re.sub(u' + -+ +$', u'', uName) uName = re.sub(u' +', u' ', uName) return uName global FileTypes FileTypes = {} class AnyFile: def __init__(self, rawFileName): self.__rawName = os.path.normpath(rawFileName) country, encoding = locale.getdefaultlocale() if not encoding: encoding = 'ascii' uName = unicode(rawFileName, encoding, 'ignore') uName = os.path.normpath(uName) uName, self.__extension = os.path.splitext(uName) self.setName(normalizeName(uName)) self.__Artist = '' self.__Album = '' self.__Title = '' self.__Track = None self.__Year = None return def isMusic(self): return False def getRealFileName(self): return self.__rawName def getName(self): return os.path.basename(self.__uName) def setName(self, name): self.__uName = name.strip().replace('\x00', '') return def getDirectory(self): return os.path.dirname(self.__uName) def getExtension(self): return self.__extension.lower() def getArtist(self): return self.__Artist def setArtist(self, artist): self.__Artist = artist.strip().replace('\x00', '') return def getAlbum(self): return self.__Album def setAlbum(self, album): self.__Album = album.strip().replace('\x00', '') return def getTitle(self): return self.__Title def setTitle(self, title): self.__Title = title.strip().replace('\x00', '') return def getTrack(self): return self.__Track def setTrack(self, track): if track: self.__Track = int(track) return def getYear(self): return self.__Year def setYear(self, year): if year: self.__Year = int(year) return FileTypes[''] = AnyFile try: import ogg.vorbis class OggVorbisFile(AnyFile): def __init__(self, rawFileName): AnyFile.__init__(self, rawFileName) vf = ogg.vorbis.VorbisFile (self.getRealFileName()) vcs = vf.comment().as_dict() if vcs.has_key ('ARTIST'): self.setArtist(vcs ['ARTIST'][0]) if vcs.has_key ('ALBUM'): self.setAlbum(vcs ['ALBUM'][0]) if vcs.has_key ('TRACKNUMBER'): self.setTrack(vcs ['TRACKNUMBER'][0]) if vcs.has_key ('TITLE'): self.setTitle(vcs ['TITLE'][0]) if vcs.has_key('YEAR'): self.setYear(vcs['YEAR'][0]) return def isMusic(self): return True FileTypes['.ogg'] = OggVorbisFile except ImportError: sys.stderr.write('# No Ogg Vorbis support\n') class MP3File(AnyFile): def __init__(self, rawFileName): AnyFile.__init__(self, rawFileName) try: id3info = ID3v2.tag(self.getRealFileName()) try: self.setArtist(unicode(id3info.artist, 'iso8859-1', 'ignore')) except AttributeError: pass try: self.setAlbum(unicode(id3info.album, 'iso8859-1', 'ignore')) except AttributeError: pass try: self.setTitle(unicode(id3info.title, 'iso8859-1', 'ignore')) except AttributeError: pass try: track, = id3info.track self.setTrack(track) except AttributeError: pass except Exception, e: print e sys.stderr.write("# ID3 Ooops for " + self.getRealFileName() + "\n") return def isMusic(self): return True FileTypes['.mp3'] = MP3File class Group: def __init__(self): self.__Artist = '' self.__Album = '' self.__Directories = {} return def addFile(self, file): if not self.__Artist: self.__Artist = file.getArtist() elif file.getArtist() and \ self.__Artist.lower() != file.getArtist().lower(): self.__Artist = 'VA' if not self.__Album: self.__Album = file.getAlbum() if file.getDirectory() not in self.__Directories.keys(): self.__Directories[file.getDirectory()] = [] self.__Directories[file.getDirectory()].append(file) def getArtist(self): return self.__Artist def getAlbum(self): return self.__Album def getDirectories(self): return self.__Directories def getFiles(self): fileList = [] for files in self.__Directories.values(): for file in files: fileList.append(file) return fileList class GroupManager: def __init__(self): self.__groups = {} return def addFile(self, gName, file): if gName not in self.__groups.keys(): self.__groups[gName] = Group() self.__groups[gName].addFile(file) return def getGroups(self): return self.__groups.values() def getDirectories(self): dirList = {} for group in self.getGroups(): for dir in group.getDirectories().keys(): if dir not in dirList.keys(): dirList[dir] = [] dirList[dir].append(group) return dirList def normalizeToMatch(s): for c in ur'\.$*+?{}()[]|#<>!-': s = re.sub(u'\\' + c, u'\\' + c, s) return s def buildDB(filesByDir): db = {} for fileList in filesByDir.values: for file in fileList: artist = normalizeName(file.getArtist()).lower() album = normalizeName(file.getAlbum()).lower() if artist not in db.keys(): db[artist] = [] db[artist].append(album) return db def groupByAlbum(groupManager, filesByDir): newFilesByDir = {} for dirName, fileList in filesByDir.items(): newFileList = fileList[:] for file in fileList: if file.getAlbum(): groupManager.addFile(u'ALBUM/' + normalizeName(file.getAlbum().lower()), file) newFileList.remove(file) newFilesByDir[dirName] = newFileList filesByDir = newFilesByDir newFilesByDir = {} for group in groupManager.getGroups(): for dirName in group.getDirectories().keys(): newFileList = filesByDir[dirName] for file in filesByDir[dirName]: rbase = normalizeToMatch(group.getAlbum()) if re.search(rbase, file.getName(), re.IGNORECASE): group.addFile(file) newFileList.remove(file) newFilesByDir[dirName] = newFileList return groupManager, newFilesByDir def groupByArtist(groupManager, filesByDir): newFilesByDir = {} for dirName, fileList in filesByDir.items(): newFileList = fileList[:] for file in fileList: if file.getArtist(): groupManager.addFile(u'ARTIST/' + normalizeName(file.getArtist().lower()), file) newFileList.remove(file) newFilesByDir[dirName] = newFileList filesByDir = newFilesByDir newFilesByDir = {} for group in groupManager.getGroups(): for dirName in group.getDirectories().keys(): newFileList = filesByDir[dirName] for file in filesByDir[dirName]: rbase = normalizeToMatch(group.getArtist()) if re.search(rbase, file.getName(), re.IGNORECASE): group.addFile(file) newFileList.remove(file) newFilesByDir[dirName] = newFileList return groupManager, newFilesByDir def groupByDirectory(groupManager, filesByDir): usedDirs = {} for dirName1, groups in groupManager.getDirectories().items(): for dirName2, files in filesByDir.items(): if (dirName1 == dirName2) and (len(files) > 0): usedDirs[dirName1] = groups newFilesByDir = {} for dirName, fileList in filesByDir.items(): newFileList = fileList[:] for file in fileList: score = {} if file.getDirectory() not in usedDirs.keys(): break groups = usedDirs[file.getDirectory()] for group in groups: score[group] = 0 rbase = normalizeToMatch(group.getAlbum()) if re.search(rbase, file.getName(), re.IGNORECASE): score[group] += 2 rbase = normalizeToMatch(group.getArtist()) if re.search(rbase, file.getName(), re.IGNORECASE): score[group] += 1 group = None if len(groups) == 1: group = groups[0] elif max(score.values() > 0): for group in groups: if score[group] == max(score.values()): break if group: group.addFile(file) newFileList.remove(file) newFilesByDir[dirName] = newFileList return groupManager, newFilesByDir def findTrack(groupManager, filesByDir): regexp = re.compile(r'(?:^|[ \-\[_])([0-9]+)(?:[ \-\]_]+)') for group in groupManager.getGroups(): artist = group.getArtist() album = group.getAlbum() for file in group.getFiles(): if not file.getTrack(): fileName = file.getName() fileName = re.sub(normalizeToMatch(artist), '', fileName) fileName = re.sub(normalizeToMatch(album), '', fileName) m = regexp.search(fileName) if m is not None: track, = m.groups() file.setTrack(int(track)) return groupManager, filesByDir def makeNames(groupManager, filesByDir): for group in groupManager.getGroups(): if not group.getArtist() or not group.getAlbum(): continue for file in group.getFiles(): if file.isMusic() and file.getTitle() and file.getTrack(): file.setName('[%02d] %s' % (file.getTrack(), file.getTitle())) return groupManager, filesByDir def main(archive, exportToDir=False, dryRun=False, symLink=False): archive = os.path.expandvars(os.path.expanduser(archive)) # Build file list fileList = [] def buildFile(fileName): country, encoding = locale.getdefaultlocale() if not encoding: encoding = 'ascii' uName, uExt = os.path.splitext(unicode(fileName, encoding, 'ignore')) for type, Factory in FileTypes.items(): if uExt.lower() == type.lower(): return Factory(fileName) return AnyFile(fileName) filesByDir = {} for path in pathSpecs: if os.path.isfile(path): fileList.append(buildFile(path)) else: for base, d, files in walk(path): for file in files: fileList.append(buildFile(base + '/' + file)) filesByDir = {} for file in fileList: if not filesByDir.has_key(file.getDirectory()): filesByDir[file.getDirectory()] = [] filesByDir[file.getDirectory()].append(file) # Build groups groupManager = GroupManager() groupManager, filesByDir = groupByAlbum(groupManager, filesByDir) groupManager, filesByDir = groupByArtist(groupManager, filesByDir) groupManager, filesByDir = groupByDirectory(groupManager, filesByDir) # Scan for tranks in the file name if the tag doesn't have a track groupManager, filesByDir = findTrack(groupManager, filesByDir) # Adjust the names as wished groupManager, filesByDir = makeNames(groupManager, filesByDir) # Final country, encoding = locale.getdefaultlocale() if not encoding: encoding = 'ascii' halfColumns = 17 for group in groupManager.getGroups(): artist = group.getArtist() album = group.getAlbum() targetDir = os.path.join(archive, '%s - %s' % (artist, album)) if dryRun: print 'Creating', targetDir.encode(encoding) else: try: os.makedirs(targetDir.encode(encoding)) except OSError,e: if e.errno != 17: raise e for file in group.getFiles(): fileName = os.path.join(targetDir, file.getName() + file.getExtension()) if dryRun: print file.getRealFileName(), '->', fileName.encode(encoding) else: shutil.move(file.getRealFileName(), fileName.encode(encoding)) for dir, fileList in filesByDir.items(): if fileList: print ('Dir left out: ' + dir).encode(encoding, 'ignore') for fileName in fileList: print (' ' + fileName).encode(encoding, 'ignore') if __name__ == '__main__': import sys from getopt import getopt, GetoptError dryRun = False exportToDir = False archive = ArchivePath symLinks = False try: options, pathSpecs = getopt(sys.argv[1:], 's', ['export=', 'dry-run']) for o, a in options: if o == '--dry-run': dryRun = True elif o == '-s': symLink = True elif o == '--export': exportToDir = True archive = a if len(pathSpecs) < 1: raise GetoptError(None, None) except GetoptError: # Print usage and exit print 'Usage: %s [-s] [--no-va] [--export=dst] path' % sys.argv[0] print print ' -s' print ' symlink instead of move' print ' --export=dst' print ' archives into a tree under dst using shorter filenames' print ' --dry-run' print " doesn't move files, just prints out a list of actions" print sys.exit(2) sys.exit(main(archive, exportToDir, dryRun))