by Thufir Hawat » Thu Mar 13, 2008 11:39 am
Hi GuHu,
i made some improvements to your script:
- *) exchanged the leveshtein distance to the more general damerau-levenshtein distance (algorithm from wikibooks http://en.wikibooks.org/wiki/Algorithm_ ... plications with some little changes), that is nearly the same but counts swaped caracters as a single difference instead of two. should be faster too as this implementation goes along a diagonal stripe of the matrix and does not compare strings whitch have differece in lenght more than the given max. but still slow as complexity is O(|s1|*|s2|) at best.
*) the script now treats only the selected songs in the main window.
*) the script handles album typos too.
*) as i don't want albums like "xxxx - CD1" and "xxxx - CD2" caunted as typos the script will not care for " - CD"+$NUMBER$ and " - Vol."+$NUMBER$. if you have another way to name cd's of a set like (CD?) you have to modify lines 251 to 270.
Code: Select all
' MediaMonkey Script
'
' NAME: Typo 0.2
' AUTHOR: Thufir Hawat
' DATE:2008.03.13
'
' Original-AUTHOR: GuHu
' DATE : 01/18/08
'
'
' INSTALL: Copy to Scripts/auto directory
'
Option Explicit
Dim ArtistMax : ArtistMax=4 ' max distance for Artists
Dim AlbumMax : AlbumMax=4 ' max distance for Albums
Dim d(250,250) ' matrix here and not in function for performance
Dim Tree, Node, Subnode, Artistnode,Sartistnode,Albumnode,Salbumnode
Sub OnStartup
'add node
Set Tree = SDB.MainTree
Set Node = Tree.CreateNode
Node.Caption = SDB.Localize("Typos")
Node.IconIndex = 49
Tree.AddNode Tree.Node_FilesToEdit, Node, 2
Node.HasChildren = True
'add Artists
set Subnode = Tree.CreateNode
Subnode.Caption = SDB.Localize("Artists")
Subnode.IconIndex = 0
Subnode.UseScript = Script.ScriptPath
Subnode.OnFillChildren = "FillArtists"
Tree.AddNode Node, Subnode, 3
Subnode.HasChildren = True
'add Album
set Subnode = Tree.CreateNode
Subnode.Caption = SDB.Localize("Albums")
Subnode.IconIndex = 0
Subnode.UseScript = Script.ScriptPath
Subnode.OnFillChildren = "FillAlbums"
Tree.AddNode Node, Subnode, 3
Subnode.HasChildren = True
set Sartistnode = Tree.CreateNode
End Sub
'This function returns the Levenshtein distance capped by the limit parameter.
Function damerau_levenshtein(s1, s2, limit, result)
Dim diagonal
Dim horizontal
Dim vertical
Dim swap
Dim final
'Start of the strings analysis
If result(Len(s1), Len(s2)) < 1 OR result(Len(s1), Len(s2)) = "" Then
If Abs(Len(s1) - Len(s2)) >= limit OR result(Len(s1), Len(s2)) >= limit Then
final = limit
Else
If Len(s1) = 0 Or Len(s2) = 0 Then
'End of recursivity
final = Len(s1) + Len(s2)
Else
'Core of levenshtein algorithm
If Mid(s1, 1, 1) = Mid(s2, 1, 1) Then
final = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit, result)
Else
If Mid(s1, 1, 1) = Mid(s2, 2, 1) And Mid(s1, 2, 1) = Mid(s2, 1, 1) Then
'Damerau extension counting swapped letters
swap = damerau_levenshtein(Mid(s1, 3), Mid(s2, 3), limit - 1, result)
final = 1 + swap
Else
'The function minimum is implemented via the limit parameter.
'The diagonal search usually reaches the limit the quickest.
diagonal = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit - 1, result)
horizontal = damerau_levenshtein(Mid(s1, 2), s2, diagonal, result)
vertical = damerau_levenshtein(s1, Mid(s2, 2), horizontal, result)
final = 1 + vertical
End If
End If
End If
End If
Else
'retrieve intermediate result
final = result(Len(s1), Len(s2)) - 1
End If
'returns the distance capped by the limit
If final < limit Then
damerau_levenshtein = final
'store intermediate result
result(Len(s1), Len(s2)) = final + 1
Else
damerau_levenshtein = limit
End If
End Function
Sub FillTracks(Node)
Dim sql,Tracks
Set Tracks = SDB.MainTracksWindow
sql = "WHERE Songs.Artist = """ & Node.Caption & """"
Tracks.AddTracksFromQuery(sql)
Tracks.FinishAdding
End Sub
Sub FillAlbumTracks(Node)
Dim sql,Tracks
Set Tracks = SDB.MainTracksWindow
sql = "WHERE Songs.Album = """ & Node.Caption & """"
Tracks.AddTracksFromQuery(sql)
Tracks.FinishAdding
End Sub
Function TrackDragDrop( destNode, srcNode, SongList, DropType, Test)
if Test then
TrackDragDrop = 2
else
Dim i, itm
For i=0 To SongList.Count-1
Set itm = SongList.Item(i)
if itm.ArtistName=itm.AlbumArtistName then
itm.AlbumArtistName = destNode.Caption
end if
itm.ArtistName = destNode.Caption
itm.UpdateDB
itm.UpdateArtist
itm.WriteTags
Next
end if
End Function
Function AlbumTrackDragDrop( destNode, srcNode, SongList, DropType, Test)
if Test then
AlbumTrackDragDrop = 2
else
Dim i, itm
For i=0 To SongList.Count-1
Set itm = SongList.Item(i)
itm.AlbumName = destNode.Caption
itm.UpdateDB
itm.UpdateAlbum
itm.WriteTags
Next
end if
End Function
Sub FillArtists(Subnode)
Dim list,artlist,i,j,art1,art2,dd,a1,a2,res,isnode
Dim Progress
Set Tree = SDB.MainTree
'Set list = SDB.CurrentSongList
'Set list = SDB.AllVisibleSongList
Set list = SDB.SelectedSongList
Set artlist = list.Artists
Set Progress = SDB.Progress
Progress.Text = SDB.Localize("Find similar Artists ...")
Progress.MaxValue = artlist.count
for i= 0 to artlist.count - 1
Progress.Value = i+1
if Progress.Terminate then
exit for
end if
set art1=artlist.item(i)
a1=art1.Name
isnode = False
for j=i+1 to artlist.count-1
set art2=artlist.item(j)
a2=art2.Name
Erase d
dd=damerau_levenshtein(a1,a2,ArtistMax+1,d)
if dd < ArtistMax then
if Not (isnode) then
isnode=True
Set Artistnode = Tree.CreateNode
Artistnode.Caption = a1
Tree.AddNode Subnode, Artistnode, 3
Artistnode.HasChildren = True
Set Sartistnode = Tree.CreateNode
Sartistnode.Caption = a1
Sartistnode.OnDragDrop = "TrackDragDrop"
Sartistnode.OnFillTracksFunct = "FillTracks"
Sartistnode.UseScript = Script.ScriptPath
Tree.AddNode Artistnode, Sartistnode, 3
end if
Set Sartistnode = Tree.CreateNode
Sartistnode.Caption = a2
Sartistnode.OnDragDrop = "TrackDragDrop"
Sartistnode.OnFillTracksFunct = "FillTracks"
Sartistnode.UseScript = Script.ScriptPath
Tree.AddNode Artistnode, Sartistnode, 3
end if
next
next
End Sub
Sub FillAlbums(Subnode)
Dim list,albumlist,i,j,album1,album2,dd,a1,a2,res,isnode,i1,i2
Dim Progress
Set Tree = SDB.MainTree
'Set list = SDB.CurrentSongList
'Set list = SDB.AllVisibleSongList
Set list = SDB.SelectedSongList
Set albumlist = list.Albums
Set Progress = SDB.Progress
Progress.Text = SDB.Localize("Find similar Albums ...")
Progress.MaxValue = albumlist.count
for i=0 to albumlist.count-1
Progress.Value = i+1
if Progress.Terminate then
exit for
end if
set album1=albumlist.item(i)
a1=album1.Name
isnode=False
for j=i+1 to albumlist.count-1
set album2=albumlist.item(j)
a2=album2.Name
i1=InStr(a1, " - CD")
i2=InStr(a2, " - CD")
if i1 > 0 AND i2 > 0 AND IsNumeric(Mid(a1,i1+5,1)) AND Mid(a1,i1+5,1) = Mid(a1,i1+5,1) then
a1=Mid(a1,i1+5,1)=" "
a2=Mid(a2,i1+5,1)=" "
a1=Replace(a1," - CD" ,"")
a2=Replace(a2," - CD ","")
end if
i1=0
i2=0
i1=InStr(a1, " - Vol.")
i2=InStr(a2, " - Vol.")
if i1 > 0 AND i2 > 0 AND IsNumeric(Mid(a1,i1+7,1)) AND Mid(a1,i1+7,1) = Mid(a1,i1+7,1) then
a1=Mid(a1,i1+7,1)=" "
a2=Mid(a2,i1+7,1)=" "
a1=Replace(a1, " - Vol. ", "")
a2=Replace(a2, " - Vol. ", "")
end if
if Not a1 = a2 then
Erase d
dd=damerau_levenshtein(a1,a2,AlbumMax+1,d)
if dd < AlbumMax then
if Not (isnode) then
isnode=True
Set Albumnode = Tree.CreateNode
Albumnode.Caption = a1
Tree.AddNode Subnode, Albumnode, 3
Albumnode.HasChildren = True
Set Salbumnode = Tree.CreateNode
Salbumnode.Caption = a1
Salbumnode.OnDragDrop = "AlbumTrackDragDrop"
Salbumnode.OnFillTracksFunct = "FillAlbumTracks"
Salbumnode.UseScript = Script.ScriptPath
Tree.AddNode Albumnode, Salbumnode, 3
end if
Set Salbumnode = Tree.CreateNode
Salbumnode.Caption = a2
Salbumnode.OnDragDrop = "AlbumTrackDragDrop"
Salbumnode.OnFillTracksFunct = "FillAlbumTracks"
Salbumnode.UseScript = Script.ScriptPath
Tree.AddNode Albumnode, Salbumnode, 3
end if
end if
next
next
End Sub
Hi GuHu,
i made some improvements to your script:
[list]
*) exchanged the leveshtein distance to the more general damerau-levenshtein distance (algorithm from wikibooks http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Visual_Basic_for_Applications with some little changes), that is nearly the same but counts swaped caracters as a single difference instead of two. should be faster too as this implementation goes along a diagonal stripe of the matrix and does not compare strings whitch have differece in lenght more than the given max. but still slow as complexity is O(|s1|*|s2|) at best.
*) the script now treats only the selected songs in the main window.
*) the script handles album typos too.
*) as i don't want albums like "xxxx - CD1" and "xxxx - CD2" caunted as typos the script will not care for " - CD"+$NUMBER$ and " - Vol."+$NUMBER$. if you have another way to name cd's of a set like (CD?) you have to modify lines 251 to 270.
[/list]
[code]
' MediaMonkey Script
'
' NAME: Typo 0.2
' AUTHOR: Thufir Hawat
' DATE:2008.03.13
'
' Original-AUTHOR: GuHu
' DATE : 01/18/08
'
'
' INSTALL: Copy to Scripts/auto directory
'
Option Explicit
Dim ArtistMax : ArtistMax=4 ' max distance for Artists
Dim AlbumMax : AlbumMax=4 ' max distance for Albums
Dim d(250,250) ' matrix here and not in function for performance
Dim Tree, Node, Subnode, Artistnode,Sartistnode,Albumnode,Salbumnode
Sub OnStartup
'add node
Set Tree = SDB.MainTree
Set Node = Tree.CreateNode
Node.Caption = SDB.Localize("Typos")
Node.IconIndex = 49
Tree.AddNode Tree.Node_FilesToEdit, Node, 2
Node.HasChildren = True
'add Artists
set Subnode = Tree.CreateNode
Subnode.Caption = SDB.Localize("Artists")
Subnode.IconIndex = 0
Subnode.UseScript = Script.ScriptPath
Subnode.OnFillChildren = "FillArtists"
Tree.AddNode Node, Subnode, 3
Subnode.HasChildren = True
'add Album
set Subnode = Tree.CreateNode
Subnode.Caption = SDB.Localize("Albums")
Subnode.IconIndex = 0
Subnode.UseScript = Script.ScriptPath
Subnode.OnFillChildren = "FillAlbums"
Tree.AddNode Node, Subnode, 3
Subnode.HasChildren = True
set Sartistnode = Tree.CreateNode
End Sub
'This function returns the Levenshtein distance capped by the limit parameter.
Function damerau_levenshtein(s1, s2, limit, result)
Dim diagonal
Dim horizontal
Dim vertical
Dim swap
Dim final
'Start of the strings analysis
If result(Len(s1), Len(s2)) < 1 OR result(Len(s1), Len(s2)) = "" Then
If Abs(Len(s1) - Len(s2)) >= limit OR result(Len(s1), Len(s2)) >= limit Then
final = limit
Else
If Len(s1) = 0 Or Len(s2) = 0 Then
'End of recursivity
final = Len(s1) + Len(s2)
Else
'Core of levenshtein algorithm
If Mid(s1, 1, 1) = Mid(s2, 1, 1) Then
final = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit, result)
Else
If Mid(s1, 1, 1) = Mid(s2, 2, 1) And Mid(s1, 2, 1) = Mid(s2, 1, 1) Then
'Damerau extension counting swapped letters
swap = damerau_levenshtein(Mid(s1, 3), Mid(s2, 3), limit - 1, result)
final = 1 + swap
Else
'The function minimum is implemented via the limit parameter.
'The diagonal search usually reaches the limit the quickest.
diagonal = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit - 1, result)
horizontal = damerau_levenshtein(Mid(s1, 2), s2, diagonal, result)
vertical = damerau_levenshtein(s1, Mid(s2, 2), horizontal, result)
final = 1 + vertical
End If
End If
End If
End If
Else
'retrieve intermediate result
final = result(Len(s1), Len(s2)) - 1
End If
'returns the distance capped by the limit
If final < limit Then
damerau_levenshtein = final
'store intermediate result
result(Len(s1), Len(s2)) = final + 1
Else
damerau_levenshtein = limit
End If
End Function
Sub FillTracks(Node)
Dim sql,Tracks
Set Tracks = SDB.MainTracksWindow
sql = "WHERE Songs.Artist = """ & Node.Caption & """"
Tracks.AddTracksFromQuery(sql)
Tracks.FinishAdding
End Sub
Sub FillAlbumTracks(Node)
Dim sql,Tracks
Set Tracks = SDB.MainTracksWindow
sql = "WHERE Songs.Album = """ & Node.Caption & """"
Tracks.AddTracksFromQuery(sql)
Tracks.FinishAdding
End Sub
Function TrackDragDrop( destNode, srcNode, SongList, DropType, Test)
if Test then
TrackDragDrop = 2
else
Dim i, itm
For i=0 To SongList.Count-1
Set itm = SongList.Item(i)
if itm.ArtistName=itm.AlbumArtistName then
itm.AlbumArtistName = destNode.Caption
end if
itm.ArtistName = destNode.Caption
itm.UpdateDB
itm.UpdateArtist
itm.WriteTags
Next
end if
End Function
Function AlbumTrackDragDrop( destNode, srcNode, SongList, DropType, Test)
if Test then
AlbumTrackDragDrop = 2
else
Dim i, itm
For i=0 To SongList.Count-1
Set itm = SongList.Item(i)
itm.AlbumName = destNode.Caption
itm.UpdateDB
itm.UpdateAlbum
itm.WriteTags
Next
end if
End Function
Sub FillArtists(Subnode)
Dim list,artlist,i,j,art1,art2,dd,a1,a2,res,isnode
Dim Progress
Set Tree = SDB.MainTree
'Set list = SDB.CurrentSongList
'Set list = SDB.AllVisibleSongList
Set list = SDB.SelectedSongList
Set artlist = list.Artists
Set Progress = SDB.Progress
Progress.Text = SDB.Localize("Find similar Artists ...")
Progress.MaxValue = artlist.count
for i= 0 to artlist.count - 1
Progress.Value = i+1
if Progress.Terminate then
exit for
end if
set art1=artlist.item(i)
a1=art1.Name
isnode = False
for j=i+1 to artlist.count-1
set art2=artlist.item(j)
a2=art2.Name
Erase d
dd=damerau_levenshtein(a1,a2,ArtistMax+1,d)
if dd < ArtistMax then
if Not (isnode) then
isnode=True
Set Artistnode = Tree.CreateNode
Artistnode.Caption = a1
Tree.AddNode Subnode, Artistnode, 3
Artistnode.HasChildren = True
Set Sartistnode = Tree.CreateNode
Sartistnode.Caption = a1
Sartistnode.OnDragDrop = "TrackDragDrop"
Sartistnode.OnFillTracksFunct = "FillTracks"
Sartistnode.UseScript = Script.ScriptPath
Tree.AddNode Artistnode, Sartistnode, 3
end if
Set Sartistnode = Tree.CreateNode
Sartistnode.Caption = a2
Sartistnode.OnDragDrop = "TrackDragDrop"
Sartistnode.OnFillTracksFunct = "FillTracks"
Sartistnode.UseScript = Script.ScriptPath
Tree.AddNode Artistnode, Sartistnode, 3
end if
next
next
End Sub
Sub FillAlbums(Subnode)
Dim list,albumlist,i,j,album1,album2,dd,a1,a2,res,isnode,i1,i2
Dim Progress
Set Tree = SDB.MainTree
'Set list = SDB.CurrentSongList
'Set list = SDB.AllVisibleSongList
Set list = SDB.SelectedSongList
Set albumlist = list.Albums
Set Progress = SDB.Progress
Progress.Text = SDB.Localize("Find similar Albums ...")
Progress.MaxValue = albumlist.count
for i=0 to albumlist.count-1
Progress.Value = i+1
if Progress.Terminate then
exit for
end if
set album1=albumlist.item(i)
a1=album1.Name
isnode=False
for j=i+1 to albumlist.count-1
set album2=albumlist.item(j)
a2=album2.Name
i1=InStr(a1, " - CD")
i2=InStr(a2, " - CD")
if i1 > 0 AND i2 > 0 AND IsNumeric(Mid(a1,i1+5,1)) AND Mid(a1,i1+5,1) = Mid(a1,i1+5,1) then
a1=Mid(a1,i1+5,1)=" "
a2=Mid(a2,i1+5,1)=" "
a1=Replace(a1," - CD" ,"")
a2=Replace(a2," - CD ","")
end if
i1=0
i2=0
i1=InStr(a1, " - Vol.")
i2=InStr(a2, " - Vol.")
if i1 > 0 AND i2 > 0 AND IsNumeric(Mid(a1,i1+7,1)) AND Mid(a1,i1+7,1) = Mid(a1,i1+7,1) then
a1=Mid(a1,i1+7,1)=" "
a2=Mid(a2,i1+7,1)=" "
a1=Replace(a1, " - Vol. ", "")
a2=Replace(a2, " - Vol. ", "")
end if
if Not a1 = a2 then
Erase d
dd=damerau_levenshtein(a1,a2,AlbumMax+1,d)
if dd < AlbumMax then
if Not (isnode) then
isnode=True
Set Albumnode = Tree.CreateNode
Albumnode.Caption = a1
Tree.AddNode Subnode, Albumnode, 3
Albumnode.HasChildren = True
Set Salbumnode = Tree.CreateNode
Salbumnode.Caption = a1
Salbumnode.OnDragDrop = "AlbumTrackDragDrop"
Salbumnode.OnFillTracksFunct = "FillAlbumTracks"
Salbumnode.UseScript = Script.ScriptPath
Tree.AddNode Albumnode, Salbumnode, 3
end if
Set Salbumnode = Tree.CreateNode
Salbumnode.Caption = a2
Salbumnode.OnDragDrop = "AlbumTrackDragDrop"
Salbumnode.OnFillTracksFunct = "FillAlbumTracks"
Salbumnode.UseScript = Script.ScriptPath
Tree.AddNode Albumnode, Salbumnode, 3
end if
end if
next
next
End Sub
[/code]