From 10b7ad9d0cf758d2eb4cb9eee8312b2fef70c2a5 Mon Sep 17 00:00:00 2001 From: Rika Ichinose Date: Mon, 17 Jun 2024 10:44:13 +0300 Subject: [PATCH] Faster val(str, enum). --- rtl/inc/sstrings.inc | 123 ++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/rtl/inc/sstrings.inc b/rtl/inc/sstrings.inc index f94212d74f..cabfea144f 100644 --- a/rtl/inc/sstrings.inc +++ b/rtl/inc/sstrings.inc @@ -1624,37 +1624,6 @@ end; {$ifndef FPC_STR_ENUM_INTERN} function fpc_val_enum_shortstr(str2ordindex:pointer;const s:shortstring;out code:valsint):longint; [public, alias:'FPC_VAL_ENUM_SHORTSTR']; compilerproc; - function string_compare(const s1,s2:shortstring):sizeint; - - {We cannot use the > and < operators to compare a string here, because we if the string is - not found in the enum, we need to return the position of error in "code". Code equals the - highest matching character of all string compares, which is only known inside the string - comparison.} - - var i,l:byte; - c1,c2:AnsiChar; - - begin - l:=length(s1); - if length(s1)>length(s2) then - l:=length(s2); - i:=1; - while i<=l do - begin - c1:=s1[i]; - c2:=s2[i]; - if c1<>c2 then - break; - inc(i); - end; - if i>code then - code:=i; - if i<=l then - string_compare:=byte(c1)-byte(c2) - else - string_compare:=length(s1)-length(s2); - end; - type Psorted_array=^Tsorted_array; Tsorted_array={$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif}record o:longint; @@ -1667,43 +1636,79 @@ type Psorted_array=^Tsorted_array; data:array[0..0] of Tsorted_array; end; -var l,h,m:cardinal; - c:sizeint; - sorted_array:^Tsorted_array; - spaces:byte; - t:shortstring; +var l,r,l2,r2,m,sp,isp:SizeInt; + c:char; + cs:Pstring; begin {Val for numbers accepts spaces at the start, so lets do the same for enums. Skip spaces at the start of the string.} - spaces:=1; - code:=1; - while (spaces<=length(s)) and (s[spaces]=' ') do - inc(spaces); - t:=upcase(copy(s,spaces,255)); - sorted_array:=pointer(@Pstring_to_ord(str2ordindex)^.data); - {Use a binary search to get the string.} - l:=1; - h:=Pstring_to_ord(str2ordindex)^.count; + sp:=1; + while (sp<=length(s)) and (s[sp]=' ') do + inc(sp); + + { Let input be “abd” and sorted names be: _hm a aa ab aba abb abc abd ac ad b c + Start: L ┘R (R points PAST the last item in the range.) + After iteration 0 (“a” analyzed): L ┘R + After iteration 1 (“ab” analyzed): L ┘R + After iteration 2 (“abd” analyzed): L ┘R } + l:=0; + r:=Pstring_to_ord(str2ordindex)^.count; + dec(sp); { sp/isp are incremented at the beginning of the loop so that 'continue's advance sp/isp. } + isp:=0; { isp is the position without spaces. } repeat - m:=(l+h) div 2; - c:=string_compare(t,upcase(sorted_array[m-1].s^)); - if c>0 then - l:=m+1 - else if c<0 then - h:=m-1 - else + inc(sp); + if sp>length(s) then break; - if l>h then + inc(isp); + c:=UpCase(s[sp]); + cs:=Psorted_array(Pstring_to_ord(str2ordindex)^.data)[l].s; + { Among all strings beginning with, say, ‘ab’, the ‘ab’ itself will be the first. + So after this check, “isp ≤ length(any string in the range)” is guaranteed. } + if isp>length(cs^) then begin - {Not found...} - inc(code,spaces-1); {Add skipped spaces again.} - {The result of val in case of error is undefined, don't assign a function result.} - exit; + inc(l); + if l=r then + break; end; + if UpCase(cs^[isp])=c then { Shortcut: L may be already correct (enums often have common prefixes). } + begin + if l+1=r then { Shortcut: the only string left (enums often have different suffixes). } + continue; + end + else + begin + r2:=r; { Search for new L. } + repeat + m:=SizeUint(l+r2) div 2; + if UpCase(Psorted_array(Pstring_to_ord(str2ordindex)^.data)[m].s^[isp])