+ added support for logical/visual cursor mapping

This commit is contained in:
mazen 2004-09-06 15:29:25 +00:00
parent 4add7ccbc6
commit ac3ef8d40a

View File

@ -10,110 +10,326 @@ interface
type type
TUCS32Char = Cardinal; TUCS32Char = Cardinal;
TUTF8Char = String[3]; TUCS16Char = Word;
TUTF8Str = UTF8String; TUTF8Char = String[4];
TUTF8String = UTF8String;
TDirection=( TDirection=(
drNONE, drNONE,
drRTL, drRTL,
drLTR drLTR
); );
function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char; {****************************Conversion routines*******************************}
function UnicodeToUtf8(aChar:WideChar):TUTF8Char; {Converts an UCS 16/32 bits charcater to UTF8 character}
procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer); function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
{Converts a wide char UCS 16 bits chcarcter to UTF8 character}
function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
{Converts an UTF8 character to UCS 32 bits character}
function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
{Converts an UTF8 string to a double byte string}
function UTF8ToDoubleByteString(const UTF8Str:TUTF8String):String;
function UTF8ToDoubleByte(UTF8Str:PChar; Len:Cardinal; DBStr:PByte):Cardinal;
{****************************Logical aspects***********************************}
{Returns the number of logical characters}
function LLength(const UTF8Str:TUTF8String):Cardinal;
{Converts visual position to logical position}
function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
{Returns character at a given logical position according to paragraph direction}
function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
{****************************Visual aspects************************************}
{Returns the number of visual characters}
function VLength(const UTF8Str:TUTF8String):Cardinal;
function VLength(p: PChar; Count:Cardinal):Cardinal;
{Converts a logical position to a visual position}
function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
{Returns character at a given visual position according to paragraph direction}
function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
implementation implementation
function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char; function ComputeCharLength(p:PChar):Cardinal;
begin
if ord(p^)<%11000000
then
{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
Result:=1
else if ((ord(p^) and %11100000) = %11000000)
then
if (ord(p[1]) and %11000000) = %10000000 then
Result:=2
else
Result:=1
else if ((ord(p^) and %11110000) = %11100000)
then
if ((ord(p[1]) and %11000000) = %10000000)
and ((ord(p[2]) and %11000000) = %10000000)
then
Result:=3
else
Result:=1
else if ((ord(p^) and %11111000) = %11110000)
then
if ((ord(p[1]) and %11000000) = %10000000)
and ((ord(p[2]) and %11000000) = %10000000)
and ((ord(p[3]) and %11000000) = %10000000)
then
Result:=4
else
Result:=1
else
Result:=1
end;
{****************************Conversion routines*******************************}
function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
begin begin
case aChar of case aChar of
0..$7f: 0..$7f:
begin begin
Result[1]:=char(aChar); Result[1]:=char(aChar);
SetLength(Result,1); SetLength(UnicodeToUTF8,1);
end; end;
$80..$7ff: $80..$7ff:
begin begin
Result[1]:=char($c0 or (aChar shr 6)); Result[1]:=char($c0 or (aChar shr 6));
Result[2]:=char($80 or (aChar and $3f)); Result[2]:=char($80 or (aChar and $3f));
SetLength(Result,2); SetLength(UnicodeToUTF8,2);
end; end;
else $800..$ffff:
begin begin
Result[1]:=char($e0 or (aChar shr 12));
Result[2]:=char($80 or ((aChar shr 6)and $3f));
Result[3]:=char($80 or (aChar and $3f));
SetLength(Result,3); SetLength(Result,3);
Result[1]:=char($e0 or (aChar shr 12));
Result[2]:=char($80 or ((aChar shr 6) and $3f));
Result[3]:=char($80 or (aChar and $3f));
end; end;
$10000..$1fffff:
begin
SetLength(UnicodeToUTF8,4);
Result[1]:=char($f0 or (aChar shr 18));
Result[2]:=char($80 or ((aChar shr 12) and $3f));
Result[3]:=char($80 or ((aChar shr 6) and $3f));
Result[4]:=char($80 or (aChar and $3f));
end;
else
SetLength(UnicodeToUTF8, 0);
end; end;
end; end;
function UnicodeToUtf8(aChar:WideChar):TUTF8Char; function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
begin begin
UnicodeToUtf8(Word(aChar)); Result := UnicodeToUTF8(Word(aChar));
end; end;
procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer); function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
begin
case ComputeCharLength(@UTF8Char[1]) of
1:{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
Result := ord(UTF8Char[1]);
2:
Result := ((ord(UTF8Char[1]) and %00011111) shl 6)
or (ord(UTF8Char[2]) and %00111111);
3:
Result := ((ord(UTF8Char[1]) and %00011111) shl 12)
or ((ord(UTF8Char[1]) and %00111111) shl 6)
or (ord(UTF8Char[2]) and %00111111);
4:
Result := ((ord(UTF8Char[1]) and %00011111) shl 18)
or ((ord(UTF8Char[2]) and %00111111) shl 12)
or ((ord(UTF8Char[3]) and %00111111) shl 6)
or (ord(UTF8Char[4]) and %00111111);
else
Result := $FFFFFFFF;
end
end;
function UTF8ToDoubleByteString(const UTF8Str: TUTF8String): string;
var
Len: Integer;
begin
Len:=VLength(UTF8Str);
SetLength(Result,Len*2);
if Len=0 then exit;
UTF8ToDoubleByte(PChar(UTF8Str),length(UTF8Str),PByte(Result));
end;
function UTF8ToDoubleByte(UTF8Str: PChar; Len:Cardinal; DBStr: PByte):Cardinal;
var
SrcPos: PChar;
CharLen: LongInt;
DestPos: PByte;
u: Cardinal;
begin
SrcPos:=UTF8Str;
DestPos:=DBStr;
Result:=0;
while Len>0 do begin
u:=UTF8ToUnicode(SrcPos);
DestPos^:=byte((u shr 8) and $ff);
inc(DestPos);
DestPos^:=byte(u and $ff);
inc(DestPos);
inc(SrcPos,CharLen);
dec(Len,CharLen);
inc(Result);
end;
end;
{****************************Logical aspects***********************************}
function LLength(const UTF8Str:TUTF8String):Cardinal;
begin
Result := Length(UTF8Str);
end;
function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
var var
{At beginning of the line we don't know which direction, thus the first {At beginning of the line we don't know which direction, thus the first
character usually decides of paragrph direction} character usually decides of paragrph direction}
dir:TDirection; LeftCursorPos, RightCursorPos:Integer;
LeftCursorPos, RightCursorPos, InsertPos:Integer;
uLen:Integer; uLen:Integer;
begin begin
dir := drNONE; uLen := Length(UTF8Str);
uLen := Length(uString);
LeftCursorPos := 1; LeftCursorPos := 1;
RightCursorPos := 1; RightCursorPos := 1;
InsertPos := 1; Result := 1;
if(uLen > 0) then if(uLen > 0) then
repeat repeat
case uString[InsertPos] of case UTF8Str[Result] of
#32,'{','}','/'://Does not change direction, this is a neutral character; #32,'{','}','/'://Does not change direction, this is a neutral character;
begin begin
if(dir = drLTR) then if(pDir = drLTR) then
Inc(RightCursorPos); Inc(RightCursorPos);
end; end;
#$d8,#$d9://Arabic #$d8,#$d9://Arabic
begin begin
dir := drRTL; pDir := drRTL;
Inc(InsertPos);//Consume control character Inc(Result);//Consume control character
end; end;
else //Latin else //Latin
begin begin
dir := drLTR; pDir := drLTR;
RightCursorPos := LeftCursorPos + 1; RightCursorPos := LeftCursorPos + 1;
end; end;
end; end;
Inc(LeftCursorPos); Inc(LeftCursorPos);
Inc(InsertPos); Inc(Result);
until(InsertPos > uLen) or until(Result > uLen) or
((dir = drLTR) and (LeftCursorPos > CursorPos)) or ((pDir = drLTR) and (LeftCursorPos > vp)) or
((dir = drRTL) and (RightCursorPos > CursorPos)); ((pDir = drRTL) and (RightCursorPos > vp));
//WriteLn('uLen=',uLen,' InsertPos=',InsertPos,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos); //WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
if(InsertPos > uLen) if(Result > uLen)
then begin then begin
if(CursorPos > LeftCursorPos) then begin if(vp > LeftCursorPos) then begin
Inc(InsertPos, CursorPos - LeftCursorPos); Inc(Result, vp - LeftCursorPos);
LeftCursorPos := CursorPos; LeftCursorPos := vp;
end; end;
Inc(LeftCursorPos); Inc(LeftCursorPos);
if(CursorPos > RightCursorPos) then if(vp > RightCursorPos) then
if(dir = drLTR) then if(pDir = drLTR) then
RightCursorPos := CursorPos; RightCursorPos := vp;
uString := uString + StringOfChar(' ', InsertPos - uLen);
end; end;
//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos); //WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
System.insert(CharToInsert, uString,InsertPos); Result := Result;
case CharToInsert[1] of end;
function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
begin
while(lp > 0) and (UTF8Str[lp] > #128) do
Dec(lp);
if lp = 0
then
Exit('');
Move(Result, UTF8Str[lp], SizeOf(Result));
SetLength(Result, ComputeCharLength(@Result[1]));
end;
{****************************Visual aspects************************************}
function VLength(const UTF8Str:TUTF8String):Cardinal;
begin
Result := VLength(PChar(UTF8Str),LLength(UTF8Str));
end;
function VLength(p:PChar; Count:Cardinal):Cardinal;
var
CharLen: LongInt;
begin
VLength:=0;
while (Count>0) do begin
inc(Result);
CharLen:=ComputeCharLength(p);
inc(p,CharLen);
dec(Count,CharLen);
end;
end;
function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
var
{At beginning of the line we don't know which direction, thus the first
character usually decides of paragrph direction}
LeftCursorPos, RightCursorPos:Integer;
uLen:Integer;
begin
uLen := Length(UTF8Str);
LeftCursorPos := 1;
RightCursorPos := 1;
Result := 1;
if(uLen > 0) then
repeat
case UTF8Str[Result] of
#32,'{','}','/'://Does not change direction, this is a neutral character;
begin
if(pDir = drLTR) then
Inc(RightCursorPos);
end;
#$d8,#$d9://Arabic
begin
pDir := drRTL;
Inc(Result);//Consume control character
end;
else //Latin
begin
pDir := drLTR;
RightCursorPos := LeftCursorPos + 1;
end;
end;
Inc(LeftCursorPos);
Inc(Result);
until(Result > uLen) or
((pDir = drLTR) and (LeftCursorPos > lp)) or
((pDir = drRTL) and (RightCursorPos > lp));
//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
if(Result > uLen)
then begin
if(lp > LeftCursorPos) then begin
Inc(Result, lp - LeftCursorPos);
LeftCursorPos := lp;
end;
Inc(LeftCursorPos);
if(lp > RightCursorPos) then
if(pDir = drLTR) then
RightCursorPos := lp;
end;
//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
Result := Result;
{ case dir of
#32: #32:
CursorPos := LeftCursorPos; CursorPos := LeftCursorPos;
#$d8,#$d9: #$d8,#$d9:
CursorPos := RightCursorPos; CursorPos := RightCursorPos;
else else
CursorPos := LeftCursorPos; CursorPos := LeftCursorPos;
end; end;}
//WriteLn('InsertPos=',InsertPos,' New CursorPos=',CursorPos); //WriteLn('Result=',Result,' New CursorPos=',CursorPos);
end; end;
function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
begin
end;
function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
var
CharLen: LongInt;
begin
Result:=LCharOf(UTF8Str,LPos(UTF8Str, vp, dir));
end;
end. end.