mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-12-17 14:20:40 +01:00
* Fix bug ID #37315: add some delphi compatibility functions
git-svn-id: trunk@47393 -
This commit is contained in:
parent
4a38d1b34e
commit
a8db09b37e
@ -1,4 +1,18 @@
|
|||||||
unit WideStrUtils;
|
{
|
||||||
|
Delphi/Kylix compatibility unit: String handling routines.
|
||||||
|
|
||||||
|
This file is part of the Free Pascal run time library.
|
||||||
|
Copyright (c) 1999-2005 by the Free Pascal development team
|
||||||
|
|
||||||
|
See the file COPYING.FPC, included in this distribution,
|
||||||
|
for details about the copyright.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
**********************************************************************}
|
||||||
|
unit widestrutils;
|
||||||
|
|
||||||
{$mode objfpc}
|
{$mode objfpc}
|
||||||
{$H+}
|
{$H+}
|
||||||
@ -7,7 +21,7 @@ unit WideStrUtils;
|
|||||||
interface
|
interface
|
||||||
|
|
||||||
uses
|
uses
|
||||||
SysUtils;
|
SysUtils, Classes;
|
||||||
|
|
||||||
function WideStringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString;
|
function WideStringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString;
|
||||||
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
||||||
@ -17,9 +31,81 @@ function UnicodeStringReplace(const S, OldPattern, NewPattern: UnicodeString; Fl
|
|||||||
function UnicodeReplaceStr(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
function UnicodeReplaceStr(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
||||||
function UnicodeReplaceText(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
function UnicodeReplaceText(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
||||||
|
|
||||||
|
type
|
||||||
|
TEncodeType = (etUSASCII, etUTF8, etANSI);
|
||||||
|
|
||||||
|
const
|
||||||
|
sUTF8BOMString: array[1..3] of char = (#$EF, #$BB, #$BF);
|
||||||
|
|
||||||
|
function HasUTF8BOM(S: TStream): boolean; overload;
|
||||||
|
function HasUTF8BOM(const S: RawByteString): boolean; overload;
|
||||||
|
function HasExtendCharacter(const S: RawByteString): boolean;
|
||||||
|
function DetectUTF8Encoding(const S: RawByteString): TEncodeType;
|
||||||
|
function IsUTF8String(const S: RawByteString): boolean;
|
||||||
|
|
||||||
|
//PartialAllowed must be set to true if the buffer is smaller than the file.
|
||||||
|
function IsBufferUtf8(buf:PAnsiChar;PartialAllowed:boolean):boolean;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
|
{
|
||||||
|
The IsBufferUtf8 function code was created by Christian Ghisler (ghisler.com)
|
||||||
|
Christian gave code to open-source at Total Commander public forum
|
||||||
|
}
|
||||||
|
|
||||||
|
const bytesFromUTF8:array[AnsiChar] of byte = (
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 32
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 64
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 96
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //128
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //160
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //192
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //224
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5); //256
|
||||||
|
|
||||||
|
|
||||||
|
function IsFirstUTF8Char(thechar:AnsiChar):boolean; inline;
|
||||||
|
{The remaining bytes in a multi-byte sequence have 10 as their two most significant bits.}
|
||||||
|
begin
|
||||||
|
result:=(byte(thechar) and (128+64))<>128;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function IsSecondaryUTF8Char(thechar:AnsiChar):boolean; inline;
|
||||||
|
{The remaining bytes in a multi-byte sequence have 10 as their two most significant bits.}
|
||||||
|
begin
|
||||||
|
result:=(byte(thechar) and (128+64))=128;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function IsBufferUtf8(buf:PAnsiChar;PartialAllowed:boolean):boolean;
|
||||||
|
{Buffer contains only valid UTF-8 characters, no secondary alone,
|
||||||
|
no primary without the correct nr of secondary}
|
||||||
|
var p:PAnsiChar;
|
||||||
|
utf8bytes:integer;
|
||||||
|
hadutf8bytes:boolean;
|
||||||
|
begin
|
||||||
|
p:=buf;
|
||||||
|
hadutf8bytes:=false;
|
||||||
|
result:=false;
|
||||||
|
utf8bytes:=0;
|
||||||
|
while p^<>#0 do
|
||||||
|
begin
|
||||||
|
if utf8bytes>0 then
|
||||||
|
begin {Expecting secondary AnsiChar}
|
||||||
|
hadutf8bytes:=true;
|
||||||
|
if not IsSecondaryUTF8Char(p^) then exit; {Fail!}
|
||||||
|
dec(utf8bytes);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
if IsFirstUTF8Char(p^) then
|
||||||
|
utf8bytes:=bytesFromUTF8[p^]
|
||||||
|
else
|
||||||
|
//if IsSecondaryUTF8Char(p^) then //Alexey: redundant check
|
||||||
|
exit; {Fail!}
|
||||||
|
inc(p);
|
||||||
|
end;
|
||||||
|
result:=hadutf8bytes and (PartialAllowed or (utf8bytes=0));
|
||||||
|
end;
|
||||||
|
|
||||||
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
||||||
begin
|
begin
|
||||||
Result := WideStringReplace(AText, AFromText, AToText, [rfReplaceAll]);
|
Result := WideStringReplace(AText, AFromText, AToText, [rfReplaceAll]);
|
||||||
@ -52,5 +138,74 @@ begin
|
|||||||
Result:= sysutils.UnicodeStringReplace(S,OldPattern,NewPattern,Flags);
|
Result:= sysutils.UnicodeStringReplace(S,OldPattern,NewPattern,Flags);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function HasUTF8BOM(S: TStream): boolean;
|
||||||
|
var
|
||||||
|
OldPos: Int64;
|
||||||
|
Buf: array[1..3] of char;
|
||||||
|
begin
|
||||||
|
Result := false;
|
||||||
|
if S.Size<3 then exit;
|
||||||
|
FillChar(Buf, SizeOf(Buf), 0);
|
||||||
|
try
|
||||||
|
OldPos := S.Position;
|
||||||
|
S.Position := 0;
|
||||||
|
if S.Read(Buf, 3)<>3 then exit;
|
||||||
|
Result :=
|
||||||
|
(Buf[1]=sUTF8BOMString[1]) and
|
||||||
|
(Buf[2]=sUTF8BOMString[2]) and
|
||||||
|
(Buf[3]=sUTF8BOMString[3]);
|
||||||
|
finally
|
||||||
|
S.Position := OldPos;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function HasUTF8BOM(const S: RawByteString): boolean;
|
||||||
|
begin
|
||||||
|
Result := (Length(S)>=3) and
|
||||||
|
(S[1]=sUTF8BOMString[1]) and
|
||||||
|
(S[2]=sUTF8BOMString[2]) and
|
||||||
|
(S[3]=sUTF8BOMString[3]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function HasExtendCharacter(const S: RawByteString): boolean;
|
||||||
|
var
|
||||||
|
i: integer;
|
||||||
|
begin
|
||||||
|
for i := 1 to Length(S) do
|
||||||
|
if Ord(S[i])>=$80 then
|
||||||
|
begin
|
||||||
|
Result := true;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
Result := false;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function DetectUTF8Encoding(const S: RawByteString): TEncodeType;
|
||||||
|
var
|
||||||
|
FirstExtChar, i: integer;
|
||||||
|
begin
|
||||||
|
FirstExtChar := 0;
|
||||||
|
for i := 1 to Length(S) do
|
||||||
|
if Ord(S[i])>=$80 then
|
||||||
|
begin
|
||||||
|
FirstExtChar := i;
|
||||||
|
Break;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if FirstExtChar=0 then
|
||||||
|
Result := etUSASCII
|
||||||
|
else
|
||||||
|
if IsBufferUtf8(@S[FirstExtChar], false) then
|
||||||
|
Result := etUTF8
|
||||||
|
else
|
||||||
|
Result := etANSI;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function IsUTF8String(const S: RawByteString): boolean;
|
||||||
|
begin
|
||||||
|
Result := DetectUTF8Encoding(S) = etUTF8;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user