 | OO-Snippets: Simple natural sort algorithmCommons| Keywords | com.sun.star.util.TextSearch, com.sun.star.util.SearchOptions, com.sun.star.util.SearchResult, Quicksort, Regular Expression, RegExp |
|---|
| Language | OOBasic |
|---|
| Application | Office |
|---|
| Authors | Paolo Mantovani (initial)
|
|---|
| Supported Versions | 1.1.x 2.0.x |
|---|
| Supported OS | All |
|---|
| Question | Sorting strings which contains numbers
I need to sort a list of file names that are the output of a mail merge.
Each filename has the same prefix and a serial ending number.
I tried to sort filenames with a quicksort algorithm, but the output appear like this:
myfileprefix0.sxw
myfileprefix1.sxw
myfileprefix10.sxw
myfileprefix11.sxw
...
myfileprefix2.sxw
myfileprefix20.sxw
myfileprefix21.sxw
...
...
and so on...
The result is formally correct, because files are sorted alphabetically, but an human wouldn't never sorted the list in that way.
|
|---|
| Answer | This is a common problem and was already solved in many languages and contexts. What you need is called "natural" sort algorithm. see for example http://www.naturalordersort.org/ or search with google for "natural sort algorithm" The following code contains a very simple implementation of this algorithm. The main work is performed by the function NaturalStrComp() This function calls RegExpStrReplace() to make its work. This one is based on the service css.util.TextSearch Performances are slow. StarBasic language is not fast as compiled ones and further it does not offer advanced functions for string crunching natively. Anyway, maybe some little optimizations are stll possible. To use this code: copy it into an empty module and run the Sub Test() on top of the module to see an example. |
|---|
Option Explicit
Sub Test
Dim List(20)
Dim I As Integer
Dim iCaseSens As Integer
Dim iNatural As Integer
Dim iReversed As Integer
For I = 0 To 20
List(I) = "mytestfile" & Int(199*rnd) & ".sxw"
Next I
msgbox "not sorted"
msgbox Join( list(), chr(10))
msgbox "alphabetically sorted"
ExtendedSortStringList(list())
msgbox Join( list(), chr(10))
msgbox """natural"" sorted (slow)"
iCaseSens = 0
iNatural = 1
iReversed = 0
ExtendedSortStringList(list(), iCaseSens, iNatural, iReversed)
msgbox Join( list(), chr(10))
End Sub
Sub ExtendedSortStringList(Data(), _
Optional iCaseSensitive As Integer, _
Optional iNaturalSort As Integer, _
Optional iReversed As Integer)
Dim iCaseSens As Integer
Dim iNatural As Integer
Dim iRev As Integer
Dim iMin As Long
Dim iMax As Long
If Not IsMissing(iCaseSensitive) Then
iCaseSens = iCaseSensitive
End If
If Not IsMissing(iNaturalSort) Then
iNatural = iNaturalSort
End If
If Not IsMissing(iReversed) Then
iRev = iReversed
End If
iMin = LBound(Data())
iMax = UBound(Data())
If iNatural = 0 Then
If iRev = 0 Then
SimpleStringSort(Data(), iCaseSens, iMin, iMax)
Else
SimpleStringSortReversed(Data(), iCaseSens, iMin, iMax)
End If
Else
If iRev = 0 Then
NaturalStringSort(Data(), iCaseSens, iMin, iMax)
Else
NaturalStringSortReversed(Data(), iCaseSens, iMin, iMax)
End If
End if
End Sub
Sub SimpleStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If StrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
SimpleStringSort(Data(), iCaseSensitive, Lower, Left - 1)
SimpleStringSort(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
Sub SimpleStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If StrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
SimpleStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
SimpleStringSortReversed(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
Sub NaturalStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
NaturalStringSort(Data(), iCaseSensitive, Lower, Left - 1)
NaturalStringSort(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
Sub NaturalStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
NaturalStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
NaturalStringSortReversed(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
Function NaturalStrComp(sText1 As String, sText2 As String, iCaseSensitive As Integer) As Integer
Dim sLocText1 As String
Dim sLocText2 As String
Dim mNumList1()
Dim mNumList2()
Dim iNum1 As Integer
Dim iNum2 As Integer
Dim I As Integer
Dim iResult As Integer
sLocText1 = RegExpStrReplace(sText1, "[0-9]+", "#")
sLocText2 = RegExpStrReplace(sText2, "[0-9]+", "#")
If StrComp(sLocText1, sLocText2, iCaseSensitive) = 0 Then
sLocText1 = RegExpStrReplace(sText1, "[^0-9]+",Chr(0))
sLocText2 = RegExpStrReplace(sText2, "[^0-9]+",Chr(0))
mNumList1() = Split(sLocText1,Chr(0))
mNumList2() = Split(sLocText2,Chr(0))
For I = LBound(mNumList1()) To UBound(mNumList1())
iNum1 = mNumList1(I)
iNum2 = mNumList2(I)
Select Case iNum1
Case Is = iNum2
iResult = 0
Case Is > iNum2
iResult = 1
Exit For
Case Is < iNum2
iResult = -1
Exit For
End Select
Next I
Else
iResult = StrComp(sText1, sText2, iCaseSensitive)
End if
NaturalStrComp = iResult
End Function
Function RegExpStrReplace(ByVal sText As String, sSearchRegExp As String, sReplace As String) As String
Static oTextSearch As Object
Static oSearchOpts As Object
Dim oResult As Object
Dim iStartPos As Integer
Dim iTextLen As Integer
Dim iMatchStartPos As Integer
Dim iMatchEndPos As Integer
Dim iMatchLen As Integer
Dim sMatchString As String
Dim sLocText As String
If IsNull(oTextSearch) Then
oTextSearch = CreateUnoService("com.sun.star.util.TextSearch")
oSearchOpts = CreateUnoStruct("com.sun.star.util.SearchOptions")
With oSearchOpts
.searchFlag = com.sun.star.util.SearchFlags.REG_EXTENDED
.algorithmType = com.sun.star.util.SearchAlgorithms.REGEXP
End With
End if
oSearchOpts.searchString = sSearchRegExp
oTextSearch.setOptions(oSearchOpts)
oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
Do While oResult.subRegExpressions > 0
iMatchStartPos = oResult.startOffset(0)
iMatchEndPos = oResult.endOffset(0)
iMatchLen = iMatchEndPos - iMatchStartPos
sLocText = Left(sText, iMatchStartPos)
sLocText = sLocText & sReplace
sLocText = sLocText & Right(sText, Len(sText) - iMatchEndPos)
sText = sLocText
iStartPos = iMatchStartPos + Len(sReplace)
oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
Loop
RegExpStrReplace = sText
End Function
Sub SwapElements(Data(), I As Long, J As Long)
Dim vTemp As Variant
vTemp = Data(I)
Data(I) = Data(J)
Data(J) = vTemp
End Sub
|
Changelog| Date | User | Modification |
|---|
| 2005-06-16 | paolomantovani | Initial version |
|