by Scottes » Fri Aug 03, 2012 9:09 pm
About 10 years ago I ripped all my CDs to MP3s. Over the years I've done a LOT of work on the tags, getting them to be how I want them. Much of that time was spent rating my MP3s. (Do you have any idea how long it takes to rate 12,000 songs?)
Now that hard drive space is much cheaper, I decided to re-rip all my CDs to FLAC. I am now going through the process of correcting the tags on those FLACs. Since they're the same corrections that I made on the MP3s, I figured that it would be easier to copy the info from the MP3s to the FLACs.
I almost always work in Python, and I've been working on some Python scripts that query MusicBrainz. And it's been 10+ years since I've done VB programming. So I started playing with Python to script against the MediaMonkey database. And I struggled a bit, since I'm not a great programmer and I could not find many Python examples for programming against MM.
Due to a little prodding from Lowlander, I'm posting the following script and hoping that one single Python programmer will get a leg up.
Some of this things in this script that I think might be find useful for beginning Python-MediaMonkey programmers:
- Simple routines to use QuerySongs, step through the results, and update the database and tags
- The use of codecs.open to be able to log non-ASCII strings like Artist names with accented characters.
- The Levenshtein fuzzy compare of the "Artist + SongTitle" phrases
I found the Levenshtein fuzzy match stuff quite useful. I wrote another script (which I will post once I improve it) which:
- Grabs every unique ArtistName from MediaMonkey
- Compares each ArtistName to every other ArtistName
- Logs out the near-matches
- Then does it all for SongTitles and Albums, too.
This allowed me to find every variation of, as an example:
Crosby, Stills, Nash & Young
Crosby, Stills, Nash and Young
Crosby, Stills, Nash, and Young
Crosby Stills Nash and Young
etc.
Then I was able to filter them in MM and make them all identical. I used to have 6 different variations of CSN&Y, now I have 1. Etcetera. I think that I reduced the number of unique artist names by 30%, which is a whopping improvement for those of us who are fanatic about tags and too lazy to search by hand. Or too eye-strained to see the sometimes subtle differences that can occur.
If you get any use out of this code, please say so.
If you improve it, please post those improvements.
Code: Select all
#!/usr/bin/env python
''' Get unique list of all MP3 songs that have been rated
- Ignore songs with multiple entries, and ignore the rating
Get list of all FLACs
Do a Levenshtein fuzzy compare of the "Artist + SongTitle"
(http://code.google.com/p/pylevenshtein/)
For any matches, copy the Rating from the MP3 to the matching FLAC
Update the database and tags
'''
import win32com.client
import sys
import time
import Levenshtein
import codecs
def comp(mp3s, flacs):
matches = {}
mp3match = 0
flacmatch = 0
# Log the perfect matches
f100=codecs.open("mp3-flac_matches.100", "w", "utf-8")
# Log the near matches to look at later, so I can clean the names
f90=codecs.open("mp3-flac_matches.90", "w", "utf-8")
# Log any MP3s that don't have a matching FLAC, to see what I'm missing
nomatch=codecs.open("mp3-flac_matches.no", "w", "utf-8")
print "Comparing MP3s to FLACs"
for mp3 in mp3s.keys():
foundmatch=False
for flac in flacs:
ratio = int(Levenshtein.ratio(mp3, flac) * 100)
if( ratio == 100 ):
f100.write('%3d "%s" ~ "%s"\n' % (ratio, mp3, flac))
foundmatch=True
flacmatch += 1
sys.stdout.write("!")
matches[mp3] = mp3s[mp3] # Add match and rating to matches{}
if( ratio < 100 and ratio >= 90):
# Log the near matches, which are probably only a character or two different
f90.write('%3d "%s" ~ "%s"\n' % (ratio, mp3, flac))
if(foundmatch == False):
nomatch.write(mp3 + "\n")
else:
mp3match += 1
sys.stdout.write("\n")
f100.close()
f90.close()
nomatch.close()
print"Matched %d MP3 ratings to %d FLACs" % (mp3match, flacmatch)
return matches
def get_rated_mp3s():
mp3s={}
print "Getting MP3s"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
seltracks = SDB.Database.QuerySongs("Songs.Rating > 0 AND Songs.SongPath LIKE '%.mp3'")
while not seltracks.EOF:
trk = seltracks.Item
key = trk.ArtistName + "~" + trk.Title
if( mp3s.has_key(key) ):
if( trk.Rating > mp3s[key] ):
mp3s[key] = trk.Rating
else:
mp3s[key] = trk.Rating
seltracks.Next()
print "Found %d rated MP3s" % len(mp3s)
SDB = None
return mp3s
def get_flacs():
flacs=[]
print "Getting FLACs"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
seltracks = SDB.Database.QuerySongs("Songs.SongPath LIKE '%.flac'")
while not seltracks.EOF:
trk = seltracks.Item
flacs.append(trk.ArtistName + "~" + trk.Title)
seltracks.Next()
print "Found %d FLACs" % len(flacs)
SDB = None
flacs.sort()
return flacs
def to_unicode_or_bust(obj, encoding='utf-8'):
''' Convert object to Unicode. Necessary because of all the
different possible encodings of entries in MB and MM '''
if isinstance(obj, basestring):
if not isinstance(obj, unicode):
obj = unicode(obj, encoding)
return obj
def process_matches(matches):
''' Update the DB and tags of any matching MP3s and FLACs '''
keys = matches.keys()
keys.sort()
for key in keys:
rating = matches[key]
artist, song = key.split("~")
updatetags(artist, song, rating)
return
def process_now_playing(mp3s):
print "Updating NowPlaying with all MP3s that have a raing of 0"
keys = mp3s.keys()
keys.sort()
for key in keys:
rating = mp3s[key]
if( rating == 0 ):
artist, song = key.split("~")
update_now_playing(artist, song, rating)
return
def update_now_playing(artist, song, rating):
# Quote the Artist and Song, which might include ' or "
# If they contain both, we're screwed, so log and don't proceed
if("'" in artist and '"' in artist):
print "Can't quote this artist for QuerySongs: %s" % artist
log.write("Can't quote this artist for QuerySongs: %s\n" % artist)
return
if("'" in song and '"' in song):
print "Can't quote this song for QuerySongs: %s" % song
log.write("Can't quote this song for QuerySongs: %s\n" % song)
return
if("'" in artist):
qartist = '"' + artist + '"'
elif('"' in artist):
qartist = "'" + artist + "'"
else:
qartist = "'" + artist + "'"
if("'" in song):
qsong = '"' + song + '"'
elif('"' in song):
qsong = "'" + song + "'"
else:
qsong = "'" + song + "'"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
query = "Songs.Artist=" + qartist + " AND Songs.SongTitle=" + qsong + " AND Songs.SongPath LIKE '%.mp3'"
seltracks = SDB.Database.QuerySongs(query)
# Add the songs to the NowPlaying list, to make an easy-to-browse list
# TO-DO: Clear the NowPlaying list before adding all these tracks
while not seltracks.EOF:
trk = seltracks.Item
SDB.Player.PlaylistAddTrack(trk)
seltracks.Next()
return
def updatetags(artist, song, rating):
''' Update the Rating tag in all the FLAC(s) that match the Artist & Song '''
log=codecs.open("updatetags.log", "a", "utf-8")
# Quote the Artist and Song, which might include ' or "
# If they contain both, we're screwed, so log and don't proceed
if("'" in artist and '"' in artist):
print "Can't quote this artist for QuerySongs: %s" % artist
log.write("Can't quote this artist for QuerySongs: %s\n" % artist)
return
if("'" in song and '"' in song):
print "Can't quote this song for QuerySongs: %s" % song
log.write("Can't quote this song for QuerySongs: %s\n" % song)
return
if("'" in artist):
qartist = '"' + artist + '"'
elif('"' in artist):
qartist = "'" + artist + "'"
else:
qartist = "'" + artist + "'"
if("'" in song):
qsong = '"' + song + '"'
elif('"' in song):
qsong = "'" + song + "'"
else:
qsong = "'" + song + "'"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
query = "Songs.Artist=" + qartist + " AND Songs.SongTitle=" + qsong + " AND Songs.SongPath LIKE '%.flac'"
seltracks = SDB.Database.QuerySongs(query)
while not seltracks.EOF:
trk = seltracks.Item
print "Setting FLAC: %s ~ %s ~ rating: %d" % (qartist, qsong, rating)
log.write("Setting FLAC: %s ~ %s ~ rating: %d\n" % (qartist, qsong, rating))
trk.Rating = rating
trk.WriteTags()
trk.UpdateDB()
seltracks.Next()
log.close()
return
def main():
mp3s = get_rated_mp3s()
process_now_playing(mp3s)
flacs = get_flacs()
matches = comp(mp3s,flacs)
process_matches(matches)
print "Done."
if __name__ == '__main__':
main()
About 10 years ago I ripped all my CDs to MP3s. Over the years I've done a LOT of work on the tags, getting them to be how I want them. Much of that time was spent rating my MP3s. (Do you have any idea how long it takes to rate 12,000 songs?)
Now that hard drive space is much cheaper, I decided to re-rip all my CDs to FLAC. I am now going through the process of correcting the tags on those FLACs. Since they're the same corrections that I made on the MP3s, I figured that it would be easier to copy the info from the MP3s to the FLACs.
I almost always work in Python, and I've been working on some Python scripts that query MusicBrainz. And it's been 10+ years since I've done VB programming. So I started playing with Python to script against the MediaMonkey database. And I struggled a bit, since I'm not a great programmer and I could not find many Python examples for programming against MM.
Due to a little prodding from Lowlander, I'm posting the following script and hoping that one single Python programmer will get a leg up.
Some of this things in this script that I think might be find useful for beginning Python-MediaMonkey programmers:
- Simple routines to use QuerySongs, step through the results, and update the database and tags
- The use of codecs.open to be able to log non-ASCII strings like Artist names with accented characters.
- The Levenshtein fuzzy compare of the "Artist + SongTitle" phrases
I found the Levenshtein fuzzy match stuff quite useful. I wrote another script (which I will post once I improve it) which:
- Grabs every unique ArtistName from MediaMonkey
- Compares each ArtistName to every other ArtistName
- Logs out the near-matches
- Then does it all for SongTitles and Albums, too.
This allowed me to find every variation of, as an example:
Crosby, Stills, Nash & Young
Crosby, Stills, Nash and Young
Crosby, Stills, Nash, and Young
Crosby Stills Nash and Young
etc.
Then I was able to filter them in MM and make them all identical. I used to have 6 different variations of CSN&Y, now I have 1. Etcetera. I think that I reduced the number of unique artist names by 30%, which is a whopping improvement for those of us who are fanatic about tags and too lazy to search by hand. Or too eye-strained to see the sometimes subtle differences that can occur.
If you get any use out of this code, please say so.
If you improve it, please post those improvements.
[code]#!/usr/bin/env python
''' Get unique list of all MP3 songs that have been rated
- Ignore songs with multiple entries, and ignore the rating
Get list of all FLACs
Do a Levenshtein fuzzy compare of the "Artist + SongTitle"
(http://code.google.com/p/pylevenshtein/)
For any matches, copy the Rating from the MP3 to the matching FLAC
Update the database and tags
'''
import win32com.client
import sys
import time
import Levenshtein
import codecs
def comp(mp3s, flacs):
matches = {}
mp3match = 0
flacmatch = 0
# Log the perfect matches
f100=codecs.open("mp3-flac_matches.100", "w", "utf-8")
# Log the near matches to look at later, so I can clean the names
f90=codecs.open("mp3-flac_matches.90", "w", "utf-8")
# Log any MP3s that don't have a matching FLAC, to see what I'm missing
nomatch=codecs.open("mp3-flac_matches.no", "w", "utf-8")
print "Comparing MP3s to FLACs"
for mp3 in mp3s.keys():
foundmatch=False
for flac in flacs:
ratio = int(Levenshtein.ratio(mp3, flac) * 100)
if( ratio == 100 ):
f100.write('%3d "%s" ~ "%s"\n' % (ratio, mp3, flac))
foundmatch=True
flacmatch += 1
sys.stdout.write("!")
matches[mp3] = mp3s[mp3] # Add match and rating to matches{}
if( ratio < 100 and ratio >= 90):
# Log the near matches, which are probably only a character or two different
f90.write('%3d "%s" ~ "%s"\n' % (ratio, mp3, flac))
if(foundmatch == False):
nomatch.write(mp3 + "\n")
else:
mp3match += 1
sys.stdout.write("\n")
f100.close()
f90.close()
nomatch.close()
print"Matched %d MP3 ratings to %d FLACs" % (mp3match, flacmatch)
return matches
def get_rated_mp3s():
mp3s={}
print "Getting MP3s"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
seltracks = SDB.Database.QuerySongs("Songs.Rating > 0 AND Songs.SongPath LIKE '%.mp3'")
while not seltracks.EOF:
trk = seltracks.Item
key = trk.ArtistName + "~" + trk.Title
if( mp3s.has_key(key) ):
if( trk.Rating > mp3s[key] ):
mp3s[key] = trk.Rating
else:
mp3s[key] = trk.Rating
seltracks.Next()
print "Found %d rated MP3s" % len(mp3s)
SDB = None
return mp3s
def get_flacs():
flacs=[]
print "Getting FLACs"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
seltracks = SDB.Database.QuerySongs("Songs.SongPath LIKE '%.flac'")
while not seltracks.EOF:
trk = seltracks.Item
flacs.append(trk.ArtistName + "~" + trk.Title)
seltracks.Next()
print "Found %d FLACs" % len(flacs)
SDB = None
flacs.sort()
return flacs
def to_unicode_or_bust(obj, encoding='utf-8'):
''' Convert object to Unicode. Necessary because of all the
different possible encodings of entries in MB and MM '''
if isinstance(obj, basestring):
if not isinstance(obj, unicode):
obj = unicode(obj, encoding)
return obj
def process_matches(matches):
''' Update the DB and tags of any matching MP3s and FLACs '''
keys = matches.keys()
keys.sort()
for key in keys:
rating = matches[key]
artist, song = key.split("~")
updatetags(artist, song, rating)
return
def process_now_playing(mp3s):
print "Updating NowPlaying with all MP3s that have a raing of 0"
keys = mp3s.keys()
keys.sort()
for key in keys:
rating = mp3s[key]
if( rating == 0 ):
artist, song = key.split("~")
update_now_playing(artist, song, rating)
return
def update_now_playing(artist, song, rating):
# Quote the Artist and Song, which might include ' or "
# If they contain both, we're screwed, so log and don't proceed
if("'" in artist and '"' in artist):
print "Can't quote this artist for QuerySongs: %s" % artist
log.write("Can't quote this artist for QuerySongs: %s\n" % artist)
return
if("'" in song and '"' in song):
print "Can't quote this song for QuerySongs: %s" % song
log.write("Can't quote this song for QuerySongs: %s\n" % song)
return
if("'" in artist):
qartist = '"' + artist + '"'
elif('"' in artist):
qartist = "'" + artist + "'"
else:
qartist = "'" + artist + "'"
if("'" in song):
qsong = '"' + song + '"'
elif('"' in song):
qsong = "'" + song + "'"
else:
qsong = "'" + song + "'"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
query = "Songs.Artist=" + qartist + " AND Songs.SongTitle=" + qsong + " AND Songs.SongPath LIKE '%.mp3'"
seltracks = SDB.Database.QuerySongs(query)
# Add the songs to the NowPlaying list, to make an easy-to-browse list
# TO-DO: Clear the NowPlaying list before adding all these tracks
while not seltracks.EOF:
trk = seltracks.Item
SDB.Player.PlaylistAddTrack(trk)
seltracks.Next()
return
def updatetags(artist, song, rating):
''' Update the Rating tag in all the FLAC(s) that match the Artist & Song '''
log=codecs.open("updatetags.log", "a", "utf-8")
# Quote the Artist and Song, which might include ' or "
# If they contain both, we're screwed, so log and don't proceed
if("'" in artist and '"' in artist):
print "Can't quote this artist for QuerySongs: %s" % artist
log.write("Can't quote this artist for QuerySongs: %s\n" % artist)
return
if("'" in song and '"' in song):
print "Can't quote this song for QuerySongs: %s" % song
log.write("Can't quote this song for QuerySongs: %s\n" % song)
return
if("'" in artist):
qartist = '"' + artist + '"'
elif('"' in artist):
qartist = "'" + artist + "'"
else:
qartist = "'" + artist + "'"
if("'" in song):
qsong = '"' + song + '"'
elif('"' in song):
qsong = "'" + song + "'"
else:
qsong = "'" + song + "'"
SDB = win32com.client.Dispatch('SongsDB.SDBApplication')
SDB.ShutdownAfterDisconnect = False
query = "Songs.Artist=" + qartist + " AND Songs.SongTitle=" + qsong + " AND Songs.SongPath LIKE '%.flac'"
seltracks = SDB.Database.QuerySongs(query)
while not seltracks.EOF:
trk = seltracks.Item
print "Setting FLAC: %s ~ %s ~ rating: %d" % (qartist, qsong, rating)
log.write("Setting FLAC: %s ~ %s ~ rating: %d\n" % (qartist, qsong, rating))
trk.Rating = rating
trk.WriteTags()
trk.UpdateDB()
seltracks.Next()
log.close()
return
def main():
mp3s = get_rated_mp3s()
process_now_playing(mp3s)
flacs = get_flacs()
matches = comp(mp3s,flacs)
process_matches(matches)
print "Done."
if __name__ == '__main__':
main()
[/code]