OO-Snippets: Simple natural sort algorithm

Commons

Keywordscom.sun.star.util.TextSearch, com.sun.star.util.SearchOptions, com.sun.star.util.SearchResult, Quicksort, Regular Expression, RegExp
LanguageOOBasic
ApplicationOffice
AuthorsPaolo Mantovani (initial)
Supported Versions1.1.x  2.0.x  
Supported OSAll  
QuestionSorting strings which contains numbers

I need to sort a list of file names that are the output of a mail merge.

Each filename has the same prefix and a serial ending number.

I tried to sort filenames with a quicksort algorithm, but the output appear like this:

  • myfileprefix0.sxw
  • myfileprefix1.sxw
  • myfileprefix10.sxw
  • myfileprefix11.sxw
  • ...

  • myfileprefix2.sxw
  • myfileprefix20.sxw
  • myfileprefix21.sxw
  • ...

    ...

    and so on...

    The result is formally correct, because files are sorted alphabetically, but an human wouldn't never sorted the list in that way.

    Answer

    This is a common problem and was already solved in many languages and contexts.

    What you need is called "natural" sort algorithm.

    see for example

    http://www.naturalordersort.org/

    or search with google for "natural sort algorithm"

    The following code contains a very simple implementation of this algorithm.

    The main work is performed by the function NaturalStrComp()

    This function calls RegExpStrReplace() to make its work.

    This one is based on the service css.util.TextSearch

    Performances are slow.

    StarBasic language is not fast as compiled ones and further it does not offer advanced functions for string crunching natively.

    Anyway, maybe some little optimizations are stll possible.

    To use this code:

    copy it into an empty module and run the Sub Test() on top of the module to see an example.

    Code-Snippet-Listing (snippet-source)

    REM  *****  BASIC  *****
    '_______________________________________________________________________________________________
    '	
    '	String sorting functions
    '_______________________________________________________________________________________________
    '
    '
    ' Declarations:
    Option Explicit
    
    
    '_______________________________________________________________________________________________
    Sub Test
    Dim List(20)
    Dim I As Integer
    Dim iCaseSens As Integer
    Dim iNatural As Integer
    Dim iReversed As Integer
    	
    	'generate an unsorted list of file names
    	For I = 0 To 20
    		List(I) = "mytestfile" & Int(199*rnd) & ".sxw"
    	Next I
    	
    
    	msgbox "not sorted"
    	msgbox Join( list(), chr(10))
    	
    	msgbox "alphabetically sorted"
    	ExtendedSortStringList(list())
    	msgbox Join( list(), chr(10))
    	
    	msgbox """natural"" sorted (slow)"
    	'set up some flags
    	iCaseSens = 0
    	iNatural = 1
    	iReversed = 0
    	ExtendedSortStringList(list(), iCaseSens, iNatural, iReversed)
    	msgbox Join( list(), chr(10))
    
    End Sub
    
    
    '_______________________________________________________________________________________________
    Sub ExtendedSortStringList(Data(), _
    							Optional iCaseSensitive As Integer, _
    							Optional iNaturalSort As Integer, _
    							Optional iReversed As Integer)
    
    Dim iCaseSens As Integer
    Dim iNatural As Integer
    Dim iRev As Integer
    Dim iMin As Long
    Dim iMax As Long
    
    	If Not IsMissing(iCaseSensitive) Then
    		iCaseSens = iCaseSensitive
    	End If
    
    	If Not IsMissing(iNaturalSort) Then
    		iNatural = iNaturalSort
    	End If
    	
    	If Not IsMissing(iReversed) Then
    		iRev = iReversed
    	End If
    	
    	iMin = LBound(Data())
    	iMax = UBound(Data())
    	
    	If iNatural = 0 Then
    		If iRev = 0 Then
    			SimpleStringSort(Data(), iCaseSens, iMin, iMax)
    		Else
    			SimpleStringSortReversed(Data(), iCaseSens, iMin, iMax)	
    		End If
    	Else
    		If iRev = 0 Then
    			NaturalStringSort(Data(), iCaseSens, iMin, iMax)
    		Else
    			NaturalStringSortReversed(Data(), iCaseSens, iMin, iMax)
    		End If
    	End if
    	
    End Sub
    
    
    '_______________________________________________________________________________________________
    Sub SimpleStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
    Dim Right As Long
    Dim Left  As Long
    
    	If Lower < Upper Then
    		Left = Lower + 1
    		Right = Upper + 1
    		
    		Do While Left < Right
    			If StrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
    				Left = Left + 1
    			Else
    				Right = Right - 1
    				SwapElements(Data(), Left, Right)
    			End If
    		Loop
    		
    		Left = Left - 1
    		SwapElements(Data(), Lower, Left)
    		SimpleStringSort(Data(), iCaseSensitive, Lower, Left - 1)
    		SimpleStringSort(Data(), iCaseSensitive, Right, Upper)
    	End If
    End Sub
    
    
    '_______________________________________________________________________________________________
    Sub SimpleStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
    Dim Right As Long
    Dim Left  As Long
    
    	If Lower < Upper Then
    		Left = Lower + 1
    		Right = Upper + 1
    		
    		Do While Left < Right
    			If StrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
    				Left = Left + 1
    			Else
    				Right = Right - 1
    				SwapElements(Data(), Left, Right)
    			End If
    		Loop
    		
    		Left = Left - 1
    		SwapElements(Data(), Lower, Left)
    		SimpleStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
    		SimpleStringSortReversed(Data(), iCaseSensitive, Right, Upper)
    	End If
    End Sub
    
    
    '_______________________________________________________________________________________________
    Sub NaturalStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
    Dim Right As Long
    Dim Left  As Long
    
    	If Lower < Upper Then
    		Left = Lower + 1
    		Right = Upper + 1
    		
    		Do While Left < Right
    			If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
    				Left = Left + 1
    			Else
    				Right = Right - 1
    				SwapElements(Data(), Left, Right)
    			End If
    		Loop
    		
    		Left = Left - 1
    		SwapElements(Data(), Lower, Left)
    		NaturalStringSort(Data(), iCaseSensitive, Lower, Left - 1)
    		NaturalStringSort(Data(), iCaseSensitive, Right, Upper)
    	End If
    End Sub
    
    
    '_______________________________________________________________________________________________
    Sub NaturalStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
    Dim Right As Long
    Dim Left  As Long
    
    	If Lower < Upper Then
    		Left = Lower + 1
    		Right = Upper + 1
    		
    		Do While Left < Right
    			If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
    				Left = Left + 1
    			Else
    				Right = Right - 1
    				SwapElements(Data(), Left, Right)
    			End If
    		Loop
    		
    		Left = Left - 1
    		SwapElements(Data(), Lower, Left)
    		NaturalStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
    		NaturalStringSortReversed(Data(), iCaseSensitive, Right, Upper)
    	End If
    End Sub
    
    
    '_______________________________________________________________________________________________
    Function NaturalStrComp(sText1 As String, sText2 As String, iCaseSensitive As Integer) As Integer
    						
    Dim sLocText1 As String
    Dim sLocText2 As String
    Dim mNumList1()
    Dim mNumList2()
    Dim iNum1 As Integer
    Dim iNum2 As Integer
    Dim I As Integer
    Dim iResult As Integer
    	
    	'replace numbers with a placeholder
    	sLocText1 = RegExpStrReplace(sText1, "[0-9]+", "#")
    	sLocText2 = RegExpStrReplace(sText2, "[0-9]+", "#")	
    	
    	'if the two strings are equal we will evaluate the numbers
    	If StrComp(sLocText1, sLocText2, iCaseSensitive) = 0 Then
    		'estract numbers from strings
    		sLocText1 = RegExpStrReplace(sText1, "[^0-9]+",Chr(0))
    		sLocText2 = RegExpStrReplace(sText2, "[^0-9]+",Chr(0))
    		
    		mNumList1() = Split(sLocText1,Chr(0))
    		mNumList2() = Split(sLocText2,Chr(0))
    		
    		'note that the two lists have the same number of elements
    		For I = LBound(mNumList1()) To UBound(mNumList1())
    			iNum1 = mNumList1(I)
    			iNum2 = mNumList2(I)
    			Select Case iNum1
    				Case Is = iNum2
    					iResult = 0
    					
    				Case Is > iNum2
    					iResult = 1
    					Exit For
    					
    				Case Is < iNum2
    					iResult = -1
    					Exit For
    					
    			End Select
    		Next I
    		
    	Else
    		'evaluate strings
    		iResult = StrComp(sText1, sText2, iCaseSensitive)
    	End if
    	
    	NaturalStrComp = iResult
    	
    End Function
    
    
    '_______________________________________________________________________________________________
    Function RegExpStrReplace(ByVal sText As String, sSearchRegExp As String, sReplace As String) As String
    'Notice:
    'in general, this function should allow to set the flag "CaseSensitive". 
    'Since in this implementation this flag would never be  used, in order to preserve
    'performances the flag and relative code has been removed
    
    Static oTextSearch As Object
    Static oSearchOpts As Object
    
    Dim oResult As Object
    
    Dim iStartPos As Integer
    Dim iTextLen As Integer
    Dim iMatchStartPos As Integer
    Dim iMatchEndPos As Integer
    Dim iMatchLen As Integer
    
    Dim sMatchString As String
    Dim sLocText As String
    	
    	'initialize the service only if needed
    	If IsNull(oTextSearch) Then
    		oTextSearch = CreateUnoService("com.sun.star.util.TextSearch")
    		oSearchOpts = CreateUnoStruct("com.sun.star.util.SearchOptions")
    		With oSearchOpts
    			.searchFlag = com.sun.star.util.SearchFlags.REG_EXTENDED
    			.algorithmType = com.sun.star.util.SearchAlgorithms.REGEXP
    		End With
    	End if
    	
    	oSearchOpts.searchString = sSearchRegExp
    	oTextSearch.setOptions(oSearchOpts)
    	
    	'do a first search
    	oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
    
    	'iterate the S&R for all occourrences found	
    	Do While oResult.subRegExpressions > 0
    		
    		iMatchStartPos = oResult.startOffset(0)
    		iMatchEndPos = oResult.endOffset(0)
    		iMatchLen = iMatchEndPos - iMatchStartPos
    		
    		'replace the match in the copy of the original text
    		sLocText = Left(sText, iMatchStartPos)
    		sLocText = sLocText & sReplace
    		sLocText = sLocText & Right(sText, Len(sText) - iMatchEndPos)
    		sText = sLocText
    		
    		'next search will start after the current replaced match
    		iStartPos = iMatchStartPos + Len(sReplace)
    		
    		'do the next search
    		oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
    	Loop
    		
    	RegExpStrReplace = sText
    
    End Function
    
    
    '_______________________________________________________________________________________________
    Sub SwapElements(Data(), I As Long, J As Long)
    Dim vTemp As Variant
    
    	vTemp = Data(I)
    	Data(I) = Data(J)
    	Data(J) = vTemp
    	
    End Sub
    

    Changelog

    DateUserModification
    2005-06-16paolomantovaniInitial version

    and