mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-07 01:05:56 +02:00
+ added support for logical/visual cursor mapping
This commit is contained in:
parent
4add7ccbc6
commit
ac3ef8d40a
@ -10,110 +10,326 @@ interface
|
||||
|
||||
type
|
||||
TUCS32Char = Cardinal;
|
||||
TUTF8Char = String[3];
|
||||
TUTF8Str = UTF8String;
|
||||
TUCS16Char = Word;
|
||||
TUTF8Char = String[4];
|
||||
TUTF8String = UTF8String;
|
||||
TDirection=(
|
||||
drNONE,
|
||||
drRTL,
|
||||
drLTR
|
||||
);
|
||||
|
||||
function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char;
|
||||
function UnicodeToUtf8(aChar:WideChar):TUTF8Char;
|
||||
procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer);
|
||||
{****************************Conversion routines*******************************}
|
||||
{Converts an UCS 16/32 bits charcater to UTF8 character}
|
||||
function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
|
||||
{Converts a wide char UCS 16 bits chcarcter to UTF8 character}
|
||||
function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
|
||||
{Converts an UTF8 character to UCS 32 bits character}
|
||||
function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
|
||||
{Converts an UTF8 string to a double byte string}
|
||||
function UTF8ToDoubleByteString(const UTF8Str:TUTF8String):String;
|
||||
function UTF8ToDoubleByte(UTF8Str:PChar; Len:Cardinal; DBStr:PByte):Cardinal;
|
||||
{****************************Logical aspects***********************************}
|
||||
{Returns the number of logical characters}
|
||||
function LLength(const UTF8Str:TUTF8String):Cardinal;
|
||||
{Converts visual position to logical position}
|
||||
function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
|
||||
{Returns character at a given logical position according to paragraph direction}
|
||||
function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
|
||||
{****************************Visual aspects************************************}
|
||||
{Returns the number of visual characters}
|
||||
function VLength(const UTF8Str:TUTF8String):Cardinal;
|
||||
function VLength(p: PChar; Count:Cardinal):Cardinal;
|
||||
{Converts a logical position to a visual position}
|
||||
function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
|
||||
function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
|
||||
{Returns character at a given visual position according to paragraph direction}
|
||||
function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
|
||||
|
||||
implementation
|
||||
|
||||
function UnicodeToUtf8(aChar:TUCS32Char):TUTF8Char;
|
||||
function ComputeCharLength(p:PChar):Cardinal;
|
||||
begin
|
||||
if ord(p^)<%11000000
|
||||
then
|
||||
{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
|
||||
Result:=1
|
||||
else if ((ord(p^) and %11100000) = %11000000)
|
||||
then
|
||||
if (ord(p[1]) and %11000000) = %10000000 then
|
||||
Result:=2
|
||||
else
|
||||
Result:=1
|
||||
else if ((ord(p^) and %11110000) = %11100000)
|
||||
then
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000)
|
||||
then
|
||||
Result:=3
|
||||
else
|
||||
Result:=1
|
||||
else if ((ord(p^) and %11111000) = %11110000)
|
||||
then
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000)
|
||||
and ((ord(p[3]) and %11000000) = %10000000)
|
||||
then
|
||||
Result:=4
|
||||
else
|
||||
Result:=1
|
||||
else
|
||||
Result:=1
|
||||
end;
|
||||
{****************************Conversion routines*******************************}
|
||||
function UnicodeToUTF8(aChar:TUCS32Char):TUTF8Char;
|
||||
begin
|
||||
case aChar of
|
||||
0..$7f:
|
||||
begin
|
||||
Result[1]:=char(aChar);
|
||||
SetLength(Result,1);
|
||||
SetLength(UnicodeToUTF8,1);
|
||||
end;
|
||||
$80..$7ff:
|
||||
begin
|
||||
Result[1]:=char($c0 or (aChar shr 6));
|
||||
Result[2]:=char($80 or (aChar and $3f));
|
||||
SetLength(Result,2);
|
||||
SetLength(UnicodeToUTF8,2);
|
||||
end;
|
||||
else
|
||||
$800..$ffff:
|
||||
begin
|
||||
Result[1]:=char($e0 or (aChar shr 12));
|
||||
Result[2]:=char($80 or ((aChar shr 6)and $3f));
|
||||
Result[3]:=char($80 or (aChar and $3f));
|
||||
SetLength(Result,3);
|
||||
Result[1]:=char($e0 or (aChar shr 12));
|
||||
Result[2]:=char($80 or ((aChar shr 6) and $3f));
|
||||
Result[3]:=char($80 or (aChar and $3f));
|
||||
end;
|
||||
$10000..$1fffff:
|
||||
begin
|
||||
SetLength(UnicodeToUTF8,4);
|
||||
Result[1]:=char($f0 or (aChar shr 18));
|
||||
Result[2]:=char($80 or ((aChar shr 12) and $3f));
|
||||
Result[3]:=char($80 or ((aChar shr 6) and $3f));
|
||||
Result[4]:=char($80 or (aChar and $3f));
|
||||
end;
|
||||
else
|
||||
SetLength(UnicodeToUTF8, 0);
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToUtf8(aChar:WideChar):TUTF8Char;
|
||||
function UnicodeToUTF8(aChar:WideChar):TUTF8Char;
|
||||
begin
|
||||
UnicodeToUtf8(Word(aChar));
|
||||
Result := UnicodeToUTF8(Word(aChar));
|
||||
end;
|
||||
|
||||
procedure insert(CharToInsert:TUTF8Char; var uString:TUTF8Str; var CursorPos:Integer);
|
||||
function UTF8ToUnicode(const UTF8Char:TUTF8Char):TUCS32Char;
|
||||
begin
|
||||
case ComputeCharLength(@UTF8Char[1]) of
|
||||
1:{regular single byte character (#0 is a normal char, this is UTF8Charascal ;)}
|
||||
Result := ord(UTF8Char[1]);
|
||||
2:
|
||||
Result := ((ord(UTF8Char[1]) and %00011111) shl 6)
|
||||
or (ord(UTF8Char[2]) and %00111111);
|
||||
3:
|
||||
Result := ((ord(UTF8Char[1]) and %00011111) shl 12)
|
||||
or ((ord(UTF8Char[1]) and %00111111) shl 6)
|
||||
or (ord(UTF8Char[2]) and %00111111);
|
||||
4:
|
||||
Result := ((ord(UTF8Char[1]) and %00011111) shl 18)
|
||||
or ((ord(UTF8Char[2]) and %00111111) shl 12)
|
||||
or ((ord(UTF8Char[3]) and %00111111) shl 6)
|
||||
or (ord(UTF8Char[4]) and %00111111);
|
||||
else
|
||||
Result := $FFFFFFFF;
|
||||
end
|
||||
end;
|
||||
|
||||
function UTF8ToDoubleByteString(const UTF8Str: TUTF8String): string;
|
||||
var
|
||||
Len: Integer;
|
||||
begin
|
||||
Len:=VLength(UTF8Str);
|
||||
SetLength(Result,Len*2);
|
||||
if Len=0 then exit;
|
||||
UTF8ToDoubleByte(PChar(UTF8Str),length(UTF8Str),PByte(Result));
|
||||
end;
|
||||
|
||||
function UTF8ToDoubleByte(UTF8Str: PChar; Len:Cardinal; DBStr: PByte):Cardinal;
|
||||
var
|
||||
SrcPos: PChar;
|
||||
CharLen: LongInt;
|
||||
DestPos: PByte;
|
||||
u: Cardinal;
|
||||
begin
|
||||
SrcPos:=UTF8Str;
|
||||
DestPos:=DBStr;
|
||||
Result:=0;
|
||||
while Len>0 do begin
|
||||
u:=UTF8ToUnicode(SrcPos);
|
||||
DestPos^:=byte((u shr 8) and $ff);
|
||||
inc(DestPos);
|
||||
DestPos^:=byte(u and $ff);
|
||||
inc(DestPos);
|
||||
inc(SrcPos,CharLen);
|
||||
dec(Len,CharLen);
|
||||
inc(Result);
|
||||
end;
|
||||
end;
|
||||
{****************************Logical aspects***********************************}
|
||||
function LLength(const UTF8Str:TUTF8String):Cardinal;
|
||||
begin
|
||||
Result := Length(UTF8Str);
|
||||
end;
|
||||
|
||||
function LPos(const UTF8Str:TUTF8String; vp:Cardinal; pDir:TDirection):Cardinal;
|
||||
var
|
||||
{At beginning of the line we don't know which direction, thus the first
|
||||
character usually decides of paragrph direction}
|
||||
dir:TDirection;
|
||||
LeftCursorPos, RightCursorPos, InsertPos:Integer;
|
||||
LeftCursorPos, RightCursorPos:Integer;
|
||||
uLen:Integer;
|
||||
begin
|
||||
dir := drNONE;
|
||||
uLen := Length(uString);
|
||||
uLen := Length(UTF8Str);
|
||||
LeftCursorPos := 1;
|
||||
RightCursorPos := 1;
|
||||
InsertPos := 1;
|
||||
Result := 1;
|
||||
if(uLen > 0) then
|
||||
repeat
|
||||
case uString[InsertPos] of
|
||||
case UTF8Str[Result] of
|
||||
#32,'{','}','/'://Does not change direction, this is a neutral character;
|
||||
begin
|
||||
if(dir = drLTR) then
|
||||
if(pDir = drLTR) then
|
||||
Inc(RightCursorPos);
|
||||
end;
|
||||
#$d8,#$d9://Arabic
|
||||
begin
|
||||
dir := drRTL;
|
||||
Inc(InsertPos);//Consume control character
|
||||
pDir := drRTL;
|
||||
Inc(Result);//Consume control character
|
||||
end;
|
||||
else //Latin
|
||||
begin
|
||||
dir := drLTR;
|
||||
pDir := drLTR;
|
||||
RightCursorPos := LeftCursorPos + 1;
|
||||
end;
|
||||
end;
|
||||
Inc(LeftCursorPos);
|
||||
Inc(InsertPos);
|
||||
until(InsertPos > uLen) or
|
||||
((dir = drLTR) and (LeftCursorPos > CursorPos)) or
|
||||
((dir = drRTL) and (RightCursorPos > CursorPos));
|
||||
//WriteLn('uLen=',uLen,' InsertPos=',InsertPos,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
||||
if(InsertPos > uLen)
|
||||
Inc(Result);
|
||||
until(Result > uLen) or
|
||||
((pDir = drLTR) and (LeftCursorPos > vp)) or
|
||||
((pDir = drRTL) and (RightCursorPos > vp));
|
||||
//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
||||
if(Result > uLen)
|
||||
then begin
|
||||
if(CursorPos > LeftCursorPos) then begin
|
||||
Inc(InsertPos, CursorPos - LeftCursorPos);
|
||||
LeftCursorPos := CursorPos;
|
||||
if(vp > LeftCursorPos) then begin
|
||||
Inc(Result, vp - LeftCursorPos);
|
||||
LeftCursorPos := vp;
|
||||
end;
|
||||
Inc(LeftCursorPos);
|
||||
if(CursorPos > RightCursorPos) then
|
||||
if(dir = drLTR) then
|
||||
RightCursorPos := CursorPos;
|
||||
uString := uString + StringOfChar(' ', InsertPos - uLen);
|
||||
if(vp > RightCursorPos) then
|
||||
if(pDir = drLTR) then
|
||||
RightCursorPos := vp;
|
||||
end;
|
||||
//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
||||
System.insert(CharToInsert, uString,InsertPos);
|
||||
case CharToInsert[1] of
|
||||
Result := Result;
|
||||
end;
|
||||
|
||||
function LCharOf(UTF8Str:TUTF8String; lp:Cardinal):TUTF8Char;
|
||||
begin
|
||||
while(lp > 0) and (UTF8Str[lp] > #128) do
|
||||
Dec(lp);
|
||||
if lp = 0
|
||||
then
|
||||
Exit('');
|
||||
Move(Result, UTF8Str[lp], SizeOf(Result));
|
||||
SetLength(Result, ComputeCharLength(@Result[1]));
|
||||
end;
|
||||
{****************************Visual aspects************************************}
|
||||
function VLength(const UTF8Str:TUTF8String):Cardinal;
|
||||
begin
|
||||
Result := VLength(PChar(UTF8Str),LLength(UTF8Str));
|
||||
end;
|
||||
|
||||
function VLength(p:PChar; Count:Cardinal):Cardinal;
|
||||
var
|
||||
CharLen: LongInt;
|
||||
begin
|
||||
VLength:=0;
|
||||
while (Count>0) do begin
|
||||
inc(Result);
|
||||
CharLen:=ComputeCharLength(p);
|
||||
inc(p,CharLen);
|
||||
dec(Count,CharLen);
|
||||
end;
|
||||
end;
|
||||
|
||||
function VPos(const UTF8Str:TUTF8String; lp:Cardinal; pDir, cDir:TDirection):Cardinal;
|
||||
var
|
||||
{At beginning of the line we don't know which direction, thus the first
|
||||
character usually decides of paragrph direction}
|
||||
LeftCursorPos, RightCursorPos:Integer;
|
||||
uLen:Integer;
|
||||
begin
|
||||
uLen := Length(UTF8Str);
|
||||
LeftCursorPos := 1;
|
||||
RightCursorPos := 1;
|
||||
Result := 1;
|
||||
if(uLen > 0) then
|
||||
repeat
|
||||
case UTF8Str[Result] of
|
||||
#32,'{','}','/'://Does not change direction, this is a neutral character;
|
||||
begin
|
||||
if(pDir = drLTR) then
|
||||
Inc(RightCursorPos);
|
||||
end;
|
||||
#$d8,#$d9://Arabic
|
||||
begin
|
||||
pDir := drRTL;
|
||||
Inc(Result);//Consume control character
|
||||
end;
|
||||
else //Latin
|
||||
begin
|
||||
pDir := drLTR;
|
||||
RightCursorPos := LeftCursorPos + 1;
|
||||
end;
|
||||
end;
|
||||
Inc(LeftCursorPos);
|
||||
Inc(Result);
|
||||
until(Result > uLen) or
|
||||
((pDir = drLTR) and (LeftCursorPos > lp)) or
|
||||
((pDir = drRTL) and (RightCursorPos > lp));
|
||||
//WriteLn('uLen=',uLen,' Result=',Result,' CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
||||
if(Result > uLen)
|
||||
then begin
|
||||
if(lp > LeftCursorPos) then begin
|
||||
Inc(Result, lp - LeftCursorPos);
|
||||
LeftCursorPos := lp;
|
||||
end;
|
||||
Inc(LeftCursorPos);
|
||||
if(lp > RightCursorPos) then
|
||||
if(pDir = drLTR) then
|
||||
RightCursorPos := lp;
|
||||
end;
|
||||
//WriteLn('CursorPos=',CursorPos,' LeftCursorPos=',LeftCursorPos,' RightCursorPos=',RightCursorPos);
|
||||
Result := Result;
|
||||
{ case dir of
|
||||
#32:
|
||||
CursorPos := LeftCursorPos;
|
||||
#$d8,#$d9:
|
||||
CursorPos := RightCursorPos;
|
||||
else
|
||||
CursorPos := LeftCursorPos;
|
||||
end;
|
||||
//WriteLn('InsertPos=',InsertPos,' New CursorPos=',CursorPos);
|
||||
end;}
|
||||
//WriteLn('Result=',Result,' New CursorPos=',CursorPos);
|
||||
end;
|
||||
|
||||
function VPos(UTF8Char:PChar; Len:integer; BytePos:integer):Cardinal;
|
||||
begin
|
||||
end;
|
||||
|
||||
|
||||
function VCharOf(UTF8Str:TUTF8String; vp:Cardinal; dir:TDirection):TUTF8Char;
|
||||
var
|
||||
CharLen: LongInt;
|
||||
begin
|
||||
Result:=LCharOf(UTF8Str,LPos(UTF8Str, vp, dir));
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user