mirror of
				https://gitlab.com/freepascal.org/lazarus/lazarus.git
				synced 2025-11-04 14:49:34 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			439 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			439 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
{
 | 
						|
 *****************************************************************************
 | 
						|
 *                                                                           *
 | 
						|
 *  This file is part of the LazUtils package                                *
 | 
						|
 *                                                                           *
 | 
						|
 *  See the file COPYING.modifiedLGPL.txt, included in this distribution,    *
 | 
						|
 *  for details about the copyright.                                         *
 | 
						|
 *                                                                           *
 | 
						|
 *  This program is distributed in the hope that it will be useful,          *
 | 
						|
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of           *
 | 
						|
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                     *
 | 
						|
 *                                                                           *
 | 
						|
 *****************************************************************************
 | 
						|
}
 | 
						|
unit paswstring;
 | 
						|
 | 
						|
{$mode objfpc}
 | 
						|
{$inline on}
 | 
						|
//{$define PASWSTRING_VERBOSE}
 | 
						|
//{.$define PASWSTRING_SUPPORT_NONUTF8_ANSISTRING} disabled by default because
 | 
						|
// non utf-8 ansistring is rare in UNIXes and lconvencoding makes the executable big
 | 
						|
 | 
						|
interface
 | 
						|
 | 
						|
uses
 | 
						|
  SysUtils, lazutf8
 | 
						|
  {$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}, lconvencoding{$endif}
 | 
						|
  ;
 | 
						|
 | 
						|
{$IFNDEF VER2_7}
 | 
						|
procedure SetPasWidestringManager;
 | 
						|
{$ENDIF}
 | 
						|
 | 
						|
implementation
 | 
						|
 | 
						|
{$IFNDEF VER2_7}
 | 
						|
procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
 | 
						|
 | 
						|
// len comes in widechars, not bytes
 | 
						|
procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
 | 
						|
var
 | 
						|
  widestr: widestring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('Wide2AnsiMove START');{$endif}
 | 
						|
  // Copy the originating string taking into account the specified length
 | 
						|
  SetLength(widestr, len);
 | 
						|
  System.Move(source^, widestr[1], len * SizeOf(WideChar));
 | 
						|
 | 
						|
  // Now convert it, using UTF-16 -> UTF-8
 | 
						|
  dest := UTF16ToUTF8(widestr);
 | 
						|
  {$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
 | 
						|
  // And correct to the real Ansi encoding
 | 
						|
  dest := ConvertEncoding(dest, EncodingUTF8, GetDefaultTextEncoding());
 | 
						|
  {$endif}
 | 
						|
end;
 | 
						|
 | 
						|
procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
 | 
						|
var
 | 
						|
  ansistr: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('Ansi2WideMove START');{$endif}
 | 
						|
  // Copy the originating string taking into account the specified length
 | 
						|
  SetLength(ansistr, len);
 | 
						|
  System.Move(source^, ansistr[1], len);
 | 
						|
 | 
						|
  {$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
 | 
						|
  // Convert to UTF-8
 | 
						|
  ansistr := ConvertEncoding(ansistr, GetDefaultTextEncoding(), EncodingUTF8);
 | 
						|
  {$endif}
 | 
						|
  // Now convert it, using UTF-8 -> UTF-16
 | 
						|
  dest := UTF8ToUTF16(ansistr);
 | 
						|
end;
 | 
						|
 | 
						|
function LowerWideString(const s : WideString) : WideString;
 | 
						|
var
 | 
						|
  str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('LowerWideString START');{$endif}
 | 
						|
  str := UTF16ToUTF8(s);
 | 
						|
  str := UTF8LowerCase(str);
 | 
						|
  Result := UTF8ToUTF16(str);
 | 
						|
end;
 | 
						|
 | 
						|
function UpperWideString(const s : WideString) : WideString;
 | 
						|
var
 | 
						|
  str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('UpperWideString START');{$endif}
 | 
						|
  str := UTF16ToUTF8(s);
 | 
						|
  str := UTF8UpperCase(str);
 | 
						|
  Result := UTF8ToUTF16(str);
 | 
						|
end;
 | 
						|
 | 
						|
procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('EnsureAnsiLen START');{$endif}
 | 
						|
  if (len>length(s)) then
 | 
						|
    if (length(s) < 10*256) then
 | 
						|
      setlength(s,length(s)+10)
 | 
						|
    else
 | 
						|
      setlength(s,length(s)+length(s) shr 8);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('ConcatCharToAnsiStr START');{$endif}
 | 
						|
  EnsureAnsiLen(s,index);
 | 
						|
  pchar(@s[index])^:=c;
 | 
						|
  inc(index);
 | 
						|
end;
 | 
						|
 | 
						|
function LowerAnsiString(const s : AnsiString) : AnsiString;
 | 
						|
var
 | 
						|
  Str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('LowerAnsiString START');{$endif}
 | 
						|
  Str := SysToUTF8(s);
 | 
						|
  Str := UTF8LowerCase(Str);
 | 
						|
  Result := UTF8ToSys(Str);
 | 
						|
end;
 | 
						|
 | 
						|
function UpperAnsiString(const s : AnsiString) : AnsiString;
 | 
						|
var
 | 
						|
  Str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('UpperAnsiString START');{$endif}
 | 
						|
  Str := SysToUTF8(s);
 | 
						|
  Str := UTF8UpperCase(Str);
 | 
						|
  Result := UTF8ToSys(Str);
 | 
						|
end;
 | 
						|
 | 
						|
// Just do a simple byte comparison
 | 
						|
// A more complex analysis would require normalization
 | 
						|
function WideCompareStr(const s1, s2 : WideString) : PtrInt;
 | 
						|
var
 | 
						|
  count, count1, count2: integer;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('WideCompareStr START');{$endif}
 | 
						|
  result := 0;
 | 
						|
  Count1 := Length(S1);
 | 
						|
  Count2 := Length(S2);
 | 
						|
  if Count1>Count2 then
 | 
						|
    Count:=Count2
 | 
						|
  else
 | 
						|
    Count:=Count1;
 | 
						|
  result := SysUtils.CompareMemRange(Pointer(S1),Pointer(S2), Count*2);
 | 
						|
  if result=0 then
 | 
						|
    result:=Count1-Count2;
 | 
						|
end;
 | 
						|
 | 
						|
function WideCompareText(const s1, s2 : WideString): PtrInt;
 | 
						|
var
 | 
						|
  a, b: WideString;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('WideCompareText START');{$endif}
 | 
						|
  a:=LowerWidestring(s1);
 | 
						|
  b:=LowerWidestring(s2);
 | 
						|
  result := WideCompareStr(a,b);
 | 
						|
end;
 | 
						|
 | 
						|
function CharLengthPChar(const Str: PChar): PtrInt;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('CharLengthPChar START');{$endif}
 | 
						|
  Result := UTF8CharacterLength(Str);
 | 
						|
end;
 | 
						|
 | 
						|
function AnsiCompareStr(const s1, s2: ansistring): PtrInt;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiCompareStr START');{$endif}
 | 
						|
  Result := SysUtils.CompareStr(s1, s2);
 | 
						|
end;
 | 
						|
 | 
						|
// Similar to AnsiCompareStr, but with PChar
 | 
						|
function StrCompAnsi(s1,s2 : PChar): PtrInt;
 | 
						|
var
 | 
						|
  ansi1, ansi2: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('StrCompAnsi START');{$endif}
 | 
						|
  ansi1 := StrPas(S1);
 | 
						|
  ansi2 := StrPas(S2);
 | 
						|
  Result := SysUtils.CompareStr(ansi1, ansi2);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiCompareText(const S1, S2: ansistring): PtrInt;
 | 
						|
var
 | 
						|
  str1, str2: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiCompareText START');{$endif}
 | 
						|
  str1 := SysToUTF8(S1);
 | 
						|
  str2 := SysToUTF8(S2);
 | 
						|
  Result := UTF8CompareText(str1, str2);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiStrIComp(S1, S2: PChar): PtrInt;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrIComp START');{$endif}
 | 
						|
  Result := AnsiCompareText(StrPas(s1),StrPas(s2));
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
 | 
						|
  var
 | 
						|
    a, b: pchar;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLComp START');{$endif}
 | 
						|
  Result := 0;
 | 
						|
  if (maxlen=0) then
 | 
						|
    exit(0);
 | 
						|
  if (s1[maxlen]<>#0) then
 | 
						|
    begin
 | 
						|
      getmem(a,maxlen+1);
 | 
						|
      move(s1^,a^,maxlen);
 | 
						|
      a[maxlen]:=#0;
 | 
						|
    end
 | 
						|
  else
 | 
						|
    a:=s1;
 | 
						|
  if (s2[maxlen]<>#0) then
 | 
						|
    begin
 | 
						|
      getmem(b,maxlen+1);
 | 
						|
      move(s2^,b^,maxlen);
 | 
						|
      b[maxlen]:=#0;
 | 
						|
    end
 | 
						|
  else
 | 
						|
    b:=s2;
 | 
						|
  result:=StrCompAnsi(a,b);
 | 
						|
  if (a<>s1) then
 | 
						|
    freemem(a);
 | 
						|
  if (b<>s2) then
 | 
						|
    freemem(b);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
 | 
						|
  var
 | 
						|
    a, b: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLIComp START');{$endif}
 | 
						|
  if (maxlen=0) then
 | 
						|
    exit(0);
 | 
						|
  setlength(a,maxlen);
 | 
						|
  move(s1^,a[1],maxlen);
 | 
						|
  setlength(b,maxlen);
 | 
						|
  move(s2^,b[1],maxlen);
 | 
						|
  result:=AnsiCompareText(a,b);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
 | 
						|
var
 | 
						|
  newlen: sizeint;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('ansi2pchar START');{$endif}
 | 
						|
  newlen:=length(s);
 | 
						|
  if newlen>strlen(orgp) then
 | 
						|
    fpc_rangeerror;
 | 
						|
  p:=orgp;
 | 
						|
  if (newlen>0) then
 | 
						|
    move(s[1],p[0],newlen);
 | 
						|
  p[newlen]:=#0;
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiStrLower(Str: PChar): PChar;
 | 
						|
var
 | 
						|
  temp: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrLower START');{$endif}
 | 
						|
  temp:=loweransistring(str);
 | 
						|
  ansi2pchar(temp,str,result);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
function AnsiStrUpper(Str: PChar): PChar;
 | 
						|
var
 | 
						|
  temp: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('AnsiStrUpper START');{$endif}
 | 
						|
  temp:=upperansistring(str);
 | 
						|
  ansi2pchar(temp,str,result);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
procedure InitThread;
 | 
						|
begin
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
procedure FiniThread;
 | 
						|
begin
 | 
						|
end;
 | 
						|
 | 
						|
{ Unicode }
 | 
						|
 | 
						|
procedure Unicode2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
 | 
						|
var
 | 
						|
  widestr: unicodestring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('Unicode2AnsiMove START');{$endif}
 | 
						|
  // Copy the originating string taking into account the specified length
 | 
						|
  SetLength(widestr, len);
 | 
						|
  System.Move(source^, widestr[1], len*2);
 | 
						|
 | 
						|
  // Now convert it, using UTF-16 -> UTF-8
 | 
						|
  dest := UTF16ToUTF8(widestr);
 | 
						|
  {$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
 | 
						|
  // And correct to the real Ansi encoding
 | 
						|
  dest := ConvertEncoding(dest, EncodingUTF8, GetDefaultTextEncoding());
 | 
						|
  {$endif}
 | 
						|
end;
 | 
						|
 | 
						|
procedure Ansi2UnicodeMove(source:pchar;var dest:UnicodeString;len:SizeInt);
 | 
						|
var
 | 
						|
  ansistr: ansistring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('Ansi2UnicodeMove START');{$endif}
 | 
						|
  // Copy the originating string taking into account the specified length
 | 
						|
  SetLength(ansistr, len);
 | 
						|
  System.Move(source^, ansistr[1], len);
 | 
						|
 | 
						|
  {$ifdef PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
 | 
						|
  // Convert to UTF-8
 | 
						|
  ansistr := ConvertEncoding(ansistr, GetDefaultTextEncoding(), EncodingUTF8);
 | 
						|
  {$endif}
 | 
						|
  // Now convert it, using UTF-8 -> UTF-16
 | 
						|
  dest := UTF8ToUTF16(ansistr);
 | 
						|
end;
 | 
						|
 | 
						|
function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
 | 
						|
var
 | 
						|
  str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('UpperUnicodeString START');{$endif}
 | 
						|
  str := UTF16ToUTF8(s);
 | 
						|
  str := UTF8UpperCase(str);
 | 
						|
  Result := UTF8ToUTF16(str);
 | 
						|
end;
 | 
						|
 | 
						|
function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
 | 
						|
var
 | 
						|
  str: utf8string;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('LowerUnicodeString START');{$endif}
 | 
						|
  str := UTF16ToUTF8(s);
 | 
						|
  str := UTF8LowerCase(str);
 | 
						|
  Result := UTF8ToUTF16(str);
 | 
						|
end;
 | 
						|
 | 
						|
// Just do a simple byte comparison
 | 
						|
// A more complex analysis would require normalization
 | 
						|
function PasUnicodeCompareStr(const s1, s2 : unicodestring) : PtrInt;
 | 
						|
var
 | 
						|
  count, count1, count2: integer;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('PasUnicodeCompareStr START');{$endif}
 | 
						|
  result := 0;
 | 
						|
  Count1 := Length(S1);
 | 
						|
  Count2 := Length(S2);
 | 
						|
  if Count1>Count2 then
 | 
						|
    Count:=Count2
 | 
						|
  else
 | 
						|
    Count:=Count1;
 | 
						|
  result := SysUtils.CompareMemRange(Pointer(S1),Pointer(S2), Count*2);
 | 
						|
  if result=0 then
 | 
						|
    result:=Count1-Count2;
 | 
						|
end;
 | 
						|
 | 
						|
function PasUnicodeCompareText(const s1, s2 : unicodestring): PtrInt;
 | 
						|
var
 | 
						|
  a, b: unicodestring;
 | 
						|
begin
 | 
						|
  {$ifdef PASWSTRING_VERBOSE}WriteLn('PasUnicodeCompareText START');{$endif}
 | 
						|
  a:=LowerWidestring(s1);
 | 
						|
  b:=LowerWidestring(s2);
 | 
						|
  result := WideCompareStr(a,b);
 | 
						|
end;
 | 
						|
 | 
						|
Procedure SetPasWideStringManager;
 | 
						|
Var
 | 
						|
  PasWideStringManager : TUnicodeStringManager;
 | 
						|
begin
 | 
						|
  PasWideStringManager:=widestringmanager;
 | 
						|
  PasWideStringManager.Wide2AnsiMoveProc:=@Wide2AnsiMove;
 | 
						|
  PasWideStringManager.Ansi2WideMoveProc:=@Ansi2WideMove;
 | 
						|
 | 
						|
  //    UpperUTF8 : procedure(p:PUTF8String);
 | 
						|
  PasWideStringManager.UpperWideStringProc:=@UpperWideString;
 | 
						|
  //    UpperUCS4 : procedure(p:PUCS4Char);
 | 
						|
  //    LowerUTF8 : procedure(p:PUTF8String);
 | 
						|
  PasWideStringManager.LowerWideStringProc:=@LowerWideString;
 | 
						|
  //    LowerUCS4 : procedure(p:PUCS4Char);
 | 
						|
 | 
						|
  {
 | 
						|
    CompUTF8 : function(p1,p2:PUTF8String) : shortint;
 | 
						|
    CompUCS2 : function(p1,p2:PUCS2Char) : shortint;
 | 
						|
    CompUCS4 : function(p1,p2:PUC42Char) : shortint;
 | 
						|
  }
 | 
						|
  PasWideStringManager.CompareWideStringProc:=@WideCompareStr;
 | 
						|
  PasWideStringManager.CompareTextWideStringProc:=@WideCompareText;
 | 
						|
 | 
						|
  { return value: number of code points in the string. Whenever an invalid
 | 
						|
    code point is encountered, all characters part of this invalid code point
 | 
						|
    are considered to form one "character" and the next character is
 | 
						|
    considered to be the start of a new (possibly also invalid) code point }
 | 
						|
  PasWideStringManager.CharLengthPCharProc:=@CharLengthPChar;
 | 
						|
 | 
						|
  { Ansi }
 | 
						|
  PasWideStringManager.UpperAnsiStringProc:=@UpperAnsiString;
 | 
						|
  PasWideStringManager.LowerAnsiStringProc:=@LowerAnsiString;
 | 
						|
  PasWideStringManager.CompareStrAnsiStringProc:=@AnsiCompareStr;
 | 
						|
  PasWideStringManager.CompareTextAnsiStringProc:=@AnsiCompareText;
 | 
						|
  PasWideStringManager.StrCompAnsiStringProc:=@StrCompAnsi;
 | 
						|
  PasWideStringManager.StrICompAnsiStringProc:=@AnsiStrIComp;
 | 
						|
  PasWideStringManager.StrLCompAnsiStringProc:=@AnsiStrLComp;
 | 
						|
  PasWideStringManager.StrLICompAnsiStringProc:=@AnsiStrLIComp;
 | 
						|
  PasWideStringManager.StrLowerAnsiStringProc:=@AnsiStrLower;
 | 
						|
  PasWideStringManager.StrUpperAnsiStringProc:=@AnsiStrUpper;
 | 
						|
  PasWideStringManager.ThreadInitProc:=@InitThread;
 | 
						|
  PasWideStringManager.ThreadFiniProc:=@FiniThread;
 | 
						|
 | 
						|
  { Unicode }
 | 
						|
  PasWideStringManager.Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
 | 
						|
  PasWideStringManager.Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
 | 
						|
  PasWideStringManager.UpperUnicodeStringProc:=@UpperUnicodeString;
 | 
						|
  PasWideStringManager.LowerUnicodeStringProc:=@LowerUnicodeString;
 | 
						|
  PasWideStringManager.CompareUnicodeStringProc:=@PasUnicodeCompareStr;
 | 
						|
  PasWideStringManager.CompareTextUnicodeStringProc:=@PasUnicodeCompareText;
 | 
						|
 | 
						|
  SetUnicodeStringManager(PasWideStringManager);
 | 
						|
end;
 | 
						|
 | 
						|
 | 
						|
initialization
 | 
						|
  SetPasWideStringManager;
 | 
						|
{$ENDIF}
 | 
						|
end.
 |