xmlread.pp, moved decoder procedures to xmlutils.pp, so they can be reused by other code.

git-svn-id: trunk@15737 -
This commit is contained in:
sergei 2010-08-08 03:27:31 +00:00
parent 118f1d645b
commit f138637678
2 changed files with 142 additions and 132 deletions

View File

@ -496,137 +496,6 @@ begin
end;
end;
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
cnt: Cardinal;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt div sizeof(WideChar) then
cnt := InCnt div sizeof(WideChar);
Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
Dec(InCnt, cnt*sizeof(WideChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
I: Integer;
cnt: Cardinal;
InPtr: PChar;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt div sizeof(WideChar) then
cnt := InCnt div sizeof(WideChar);
InPtr := InBuf;
for I := 0 to cnt-1 do
begin
OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
Inc(InPtr, 2);
end;
Dec(InCnt, cnt*sizeof(WideChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
I: Integer;
cnt: Cardinal;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt then
cnt := InCnt;
for I := 0 to cnt-1 do
OutBuf[I] := WideChar(ord(InBuf[I]));
Dec(InCnt, cnt);
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
const
MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
var
i, j, bc: Cardinal;
Value: Cardinal;
begin
result := 0;
i := OutCnt;
while (i > 0) and (InCnt > 0) do
begin
bc := 1;
Value := ord(InBuf^);
if Value < $80 then
OutBuf^ := WideChar(Value)
else
begin
if Value < $C2 then
begin
Result := -1;
Break;
end;
Inc(bc);
if Value > $DF then
begin
Inc(bc);
if Value > $EF then
begin
Inc(bc);
if Value > $F7 then // never encountered in the tests.
begin
Result := -1;
Break;
end;
end;
end;
if InCnt < bc then
Break;
j := 1;
while j < bc do
begin
if InBuf[j] in [#$80..#$BF] then
Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
else
begin
Result := -1;
Break;
end;
Inc(j);
end;
Value := Value and MaxCode[bc];
// RFC2279 check
if Value <= MaxCode[bc-1] then
begin
Result := -1;
Break;
end;
case Value of
0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
$10000..$10FFFF:
begin
if i < 2 then Break;
OutBuf^ := WideChar($D7C0 + (Value shr 10));
OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
Inc(OutBuf); // once here
Dec(i);
end
else
begin
Result := -1;
Break;
end;
end;
end;
Inc(OutBuf);
Inc(InBuf, bc);
Dec(InCnt, bc);
Dec(i);
end;
if Result >= 0 then
Result := OutCnt-i;
OutCnt := i;
end;
function Is_8859_1(const AEncoding: string): Boolean;
begin
@ -1002,7 +871,7 @@ begin
// see rmt-e2e-61, it now fails but for a completely different reason.
FillChar(NewDecoder, sizeof(TDecoder), 0);
if Is_8859_1(AEncoding) then
FDecoder.Decode := @Decode_88591
FDecoder.Decode := @Decode_8859_1
else if FindDecoder(AEncoding, NewDecoder) then
FDecoder := NewDecoder
else

View File

@ -160,6 +160,13 @@ procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
{ Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
{$i names.inc}
implementation
@ -905,6 +912,140 @@ begin
CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
end;
{ standard decoders }
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
cnt: Cardinal;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt div sizeof(WideChar) then
cnt := InCnt div sizeof(WideChar);
Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
Dec(InCnt, cnt*sizeof(WideChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
I: Integer;
cnt: Cardinal;
InPtr: PChar;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt div sizeof(WideChar) then
cnt := InCnt div sizeof(WideChar);
InPtr := InBuf;
for I := 0 to cnt-1 do
begin
OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
Inc(InPtr, 2);
end;
Dec(InCnt, cnt*sizeof(WideChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
var
I: Integer;
cnt: Cardinal;
begin
cnt := OutCnt; // num of widechars
if cnt > InCnt then
cnt := InCnt;
for I := 0 to cnt-1 do
OutBuf[I] := WideChar(ord(InBuf[I]));
Dec(InCnt, cnt);
Dec(OutCnt, cnt);
Result := cnt;
end;
function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
const
MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
var
i, j, bc: Cardinal;
Value: Cardinal;
begin
result := 0;
i := OutCnt;
while (i > 0) and (InCnt > 0) do
begin
bc := 1;
Value := ord(InBuf^);
if Value < $80 then
OutBuf^ := WideChar(Value)
else
begin
if Value < $C2 then
begin
Result := -1;
Break;
end;
Inc(bc);
if Value > $DF then
begin
Inc(bc);
if Value > $EF then
begin
Inc(bc);
if Value > $F7 then // never encountered in the tests.
begin
Result := -1;
Break;
end;
end;
end;
if InCnt < bc then
Break;
j := 1;
while j < bc do
begin
if InBuf[j] in [#$80..#$BF] then
Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
else
begin
Result := -1;
Break;
end;
Inc(j);
end;
Value := Value and MaxCode[bc];
// RFC2279 check
if Value <= MaxCode[bc-1] then
begin
Result := -1;
Break;
end;
case Value of
0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
$10000..$10FFFF:
begin
if i < 2 then Break;
OutBuf^ := WideChar($D7C0 + (Value shr 10));
OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
Inc(OutBuf); // once here
Dec(i);
end
else
begin
Result := -1;
Break;
end;
end;
end;
Inc(OutBuf);
Inc(InBuf, bc);
Dec(InCnt, bc);
Dec(i);
end;
if Result >= 0 then
Result := OutCnt-i;
OutCnt := i;
end;
initialization