mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-16 10:59:10 +02:00
* Fix bug ID #37315: add some delphi compatibility functions
git-svn-id: trunk@47393 -
This commit is contained in:
parent
4a38d1b34e
commit
a8db09b37e
@ -1,4 +1,18 @@
|
||||
unit WideStrUtils;
|
||||
{
|
||||
Delphi/Kylix compatibility unit: String handling routines.
|
||||
|
||||
This file is part of the Free Pascal run time library.
|
||||
Copyright (c) 1999-2005 by the Free Pascal development team
|
||||
|
||||
See the file COPYING.FPC, included in this distribution,
|
||||
for details about the copyright.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
**********************************************************************}
|
||||
unit widestrutils;
|
||||
|
||||
{$mode objfpc}
|
||||
{$H+}
|
||||
@ -7,7 +21,7 @@ unit WideStrUtils;
|
||||
interface
|
||||
|
||||
uses
|
||||
SysUtils;
|
||||
SysUtils, Classes;
|
||||
|
||||
function WideStringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString;
|
||||
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
||||
@ -17,9 +31,81 @@ function UnicodeStringReplace(const S, OldPattern, NewPattern: UnicodeString; Fl
|
||||
function UnicodeReplaceStr(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
||||
function UnicodeReplaceText(const AText, AFromText, AToText: UnicodeString): UnicodeString; inline;
|
||||
|
||||
type
|
||||
TEncodeType = (etUSASCII, etUTF8, etANSI);
|
||||
|
||||
const
|
||||
sUTF8BOMString: array[1..3] of char = (#$EF, #$BB, #$BF);
|
||||
|
||||
function HasUTF8BOM(S: TStream): boolean; overload;
|
||||
function HasUTF8BOM(const S: RawByteString): boolean; overload;
|
||||
function HasExtendCharacter(const S: RawByteString): boolean;
|
||||
function DetectUTF8Encoding(const S: RawByteString): TEncodeType;
|
||||
function IsUTF8String(const S: RawByteString): boolean;
|
||||
|
||||
//PartialAllowed must be set to true if the buffer is smaller than the file.
|
||||
function IsBufferUtf8(buf:PAnsiChar;PartialAllowed:boolean):boolean;
|
||||
|
||||
implementation
|
||||
|
||||
{
|
||||
The IsBufferUtf8 function code was created by Christian Ghisler (ghisler.com)
|
||||
Christian gave code to open-source at Total Commander public forum
|
||||
}
|
||||
|
||||
const bytesFromUTF8:array[AnsiChar] of byte = (
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 32
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 64
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 96
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //128
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //160
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //192
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //224
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5); //256
|
||||
|
||||
|
||||
function IsFirstUTF8Char(thechar:AnsiChar):boolean; inline;
|
||||
{The remaining bytes in a multi-byte sequence have 10 as their two most significant bits.}
|
||||
begin
|
||||
result:=(byte(thechar) and (128+64))<>128;
|
||||
end;
|
||||
|
||||
function IsSecondaryUTF8Char(thechar:AnsiChar):boolean; inline;
|
||||
{The remaining bytes in a multi-byte sequence have 10 as their two most significant bits.}
|
||||
begin
|
||||
result:=(byte(thechar) and (128+64))=128;
|
||||
end;
|
||||
|
||||
function IsBufferUtf8(buf:PAnsiChar;PartialAllowed:boolean):boolean;
|
||||
{Buffer contains only valid UTF-8 characters, no secondary alone,
|
||||
no primary without the correct nr of secondary}
|
||||
var p:PAnsiChar;
|
||||
utf8bytes:integer;
|
||||
hadutf8bytes:boolean;
|
||||
begin
|
||||
p:=buf;
|
||||
hadutf8bytes:=false;
|
||||
result:=false;
|
||||
utf8bytes:=0;
|
||||
while p^<>#0 do
|
||||
begin
|
||||
if utf8bytes>0 then
|
||||
begin {Expecting secondary AnsiChar}
|
||||
hadutf8bytes:=true;
|
||||
if not IsSecondaryUTF8Char(p^) then exit; {Fail!}
|
||||
dec(utf8bytes);
|
||||
end
|
||||
else
|
||||
if IsFirstUTF8Char(p^) then
|
||||
utf8bytes:=bytesFromUTF8[p^]
|
||||
else
|
||||
//if IsSecondaryUTF8Char(p^) then //Alexey: redundant check
|
||||
exit; {Fail!}
|
||||
inc(p);
|
||||
end;
|
||||
result:=hadutf8bytes and (PartialAllowed or (utf8bytes=0));
|
||||
end;
|
||||
|
||||
function WideReplaceStr(const AText, AFromText, AToText: WideString): WideString; inline;
|
||||
begin
|
||||
Result := WideStringReplace(AText, AFromText, AToText, [rfReplaceAll]);
|
||||
@ -52,5 +138,74 @@ begin
|
||||
Result:= sysutils.UnicodeStringReplace(S,OldPattern,NewPattern,Flags);
|
||||
end;
|
||||
|
||||
function HasUTF8BOM(S: TStream): boolean;
|
||||
var
|
||||
OldPos: Int64;
|
||||
Buf: array[1..3] of char;
|
||||
begin
|
||||
Result := false;
|
||||
if S.Size<3 then exit;
|
||||
FillChar(Buf, SizeOf(Buf), 0);
|
||||
try
|
||||
OldPos := S.Position;
|
||||
S.Position := 0;
|
||||
if S.Read(Buf, 3)<>3 then exit;
|
||||
Result :=
|
||||
(Buf[1]=sUTF8BOMString[1]) and
|
||||
(Buf[2]=sUTF8BOMString[2]) and
|
||||
(Buf[3]=sUTF8BOMString[3]);
|
||||
finally
|
||||
S.Position := OldPos;
|
||||
end;
|
||||
end;
|
||||
|
||||
function HasUTF8BOM(const S: RawByteString): boolean;
|
||||
begin
|
||||
Result := (Length(S)>=3) and
|
||||
(S[1]=sUTF8BOMString[1]) and
|
||||
(S[2]=sUTF8BOMString[2]) and
|
||||
(S[3]=sUTF8BOMString[3]);
|
||||
end;
|
||||
|
||||
function HasExtendCharacter(const S: RawByteString): boolean;
|
||||
var
|
||||
i: integer;
|
||||
begin
|
||||
for i := 1 to Length(S) do
|
||||
if Ord(S[i])>=$80 then
|
||||
begin
|
||||
Result := true;
|
||||
exit;
|
||||
end;
|
||||
Result := false;
|
||||
end;
|
||||
|
||||
function DetectUTF8Encoding(const S: RawByteString): TEncodeType;
|
||||
var
|
||||
FirstExtChar, i: integer;
|
||||
begin
|
||||
FirstExtChar := 0;
|
||||
for i := 1 to Length(S) do
|
||||
if Ord(S[i])>=$80 then
|
||||
begin
|
||||
FirstExtChar := i;
|
||||
Break;
|
||||
end;
|
||||
|
||||
if FirstExtChar=0 then
|
||||
Result := etUSASCII
|
||||
else
|
||||
if IsBufferUtf8(@S[FirstExtChar], false) then
|
||||
Result := etUTF8
|
||||
else
|
||||
Result := etANSI;
|
||||
end;
|
||||
|
||||
function IsUTF8String(const S: RawByteString): boolean;
|
||||
begin
|
||||
Result := DetectUTF8Encoding(S) = etUTF8;
|
||||
end;
|
||||
|
||||
|
||||
end.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user