mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-13 09:29:10 +02:00
LazReport, fix word wrapping/breaking of UTF-8 strings
git-svn-id: trunk@23057 -
This commit is contained in:
parent
3ccb54ad89
commit
b5ca260573
@ -2449,7 +2449,7 @@ begin
|
|||||||
CanBreak := True;
|
CanBreak := True;
|
||||||
end;
|
end;
|
||||||
if CanBreak then
|
if CanBreak then
|
||||||
Result := Result + Chr(UTF8CharToByteIndex(pchar(s),length(s),i));
|
Result := Result + Chr(i);
|
||||||
Inc(i);
|
Inc(i);
|
||||||
until i > Len - 2;
|
until i > Len - 2;
|
||||||
end;
|
end;
|
||||||
@ -2463,6 +2463,7 @@ var
|
|||||||
size, size1, maxwidth: Integer;
|
size, size1, maxwidth: Integer;
|
||||||
b: TWordBreaks;
|
b: TWordBreaks;
|
||||||
WCanvas: TCanvas;
|
WCanvas: TCanvas;
|
||||||
|
desc: string;
|
||||||
|
|
||||||
procedure OutLine(const str: String);
|
procedure OutLine(const str: String);
|
||||||
var
|
var
|
||||||
@ -2481,9 +2482,11 @@ var
|
|||||||
|
|
||||||
procedure WrapLine(const s: String);
|
procedure WrapLine(const s: String);
|
||||||
var
|
var
|
||||||
i, cur, beg, last: Integer;
|
i, cur, beg, last, len: Integer;
|
||||||
WasBreak, CRLF: Boolean;
|
WasBreak, CRLF: Boolean;
|
||||||
|
ch: TUTF8char;
|
||||||
begin
|
begin
|
||||||
|
|
||||||
CRLF := False;
|
CRLF := False;
|
||||||
for i := 1 to Length(s) do
|
for i := 1 to Length(s) do
|
||||||
begin
|
begin
|
||||||
@ -2493,108 +2496,107 @@ var
|
|||||||
break;
|
break;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
last := 1; beg := 1;
|
last := 1; beg := 1;
|
||||||
{$IFDEF DebugLR}
|
|
||||||
debugLn(' WrapLine: init "',dbgstr(s),'" wcanvas.txtw=',dbgs(WCanvas.TextWidth(s)),
|
|
||||||
' maxwidth=',dbgs(maxwidth),' crlf=',dbgs(crlf));
|
|
||||||
{$ENDIF}
|
|
||||||
if not CRLF and ((Length(s) <= 1) or (WCanvas.TextWidth(s) <= maxwidth)) then
|
if not CRLF and ((Length(s) <= 1) or (WCanvas.TextWidth(s) <= maxwidth)) then
|
||||||
begin
|
begin
|
||||||
{$IFDEF DebugLR}
|
|
||||||
debugLn(' WrapLine: fast crlf=',dbgs(crlf),' wcanvas.txtw=',dbgs(Wcanvas.TextWidth(s)));
|
|
||||||
{$ENDIF}
|
|
||||||
OutLine(s + #1)
|
OutLine(s + #1)
|
||||||
end else
|
end else
|
||||||
begin
|
begin
|
||||||
|
|
||||||
cur := 1;
|
cur := 1;
|
||||||
while cur <= Length(s) do
|
Len := UTF8Desc(S, Desc);
|
||||||
|
|
||||||
|
while cur <= Len do
|
||||||
begin
|
begin
|
||||||
if s[cur] in [#10, #13] then
|
Ch := UTF8Char(s, cur, Desc);
|
||||||
|
|
||||||
|
// check for items with soft-breaks
|
||||||
|
if (Ch=#10) or (Ch=#13) then
|
||||||
begin
|
begin
|
||||||
{$IFDEF DebugLR}
|
OutLine(UTF8Range(s, beg, cur - beg, Desc) + #1);
|
||||||
debugLn(' Wrapline: crlf beg=',dbgs(beg),' cur=',dbgs(cur));
|
while (cur<Len) and UTF8CharIn(ch, [#10,#13]) do
|
||||||
{$ENDIF}
|
begin
|
||||||
OutLine(Copy(s, beg, cur - beg) + #1);
|
inc(cur);
|
||||||
while (cur < Length(s)) and (s[cur] in [#10, #13]) do Inc(cur);
|
if cur<=len then
|
||||||
|
ch := UTF8Char(s, cur, desc);
|
||||||
|
end;
|
||||||
|
|
||||||
beg := cur; last := beg;
|
beg := cur; last := beg;
|
||||||
if s[cur] in [#13, #10] then begin
|
if UTF8CharIn(Ch,[#13, #10]) then
|
||||||
{$IFDEF DebugLR}
|
exit
|
||||||
debugln(' Wrapline: Exiting as cur is in crlf cur=',dbgs(cur));
|
else
|
||||||
{$ENDIF}
|
|
||||||
Exit;
|
|
||||||
end else
|
|
||||||
continue;
|
continue;
|
||||||
end;
|
end;
|
||||||
if s[cur] <> ' ' then
|
|
||||||
if WCanvas.TextWidth(Copy(s, beg, cur - beg + 1)) > maxwidth then
|
if ch <> ' ' then
|
||||||
|
if WCanvas.TextWidth(UTF8Range(s, beg, cur - beg + 1, Desc)) > maxwidth then
|
||||||
begin
|
begin
|
||||||
|
|
||||||
WasBreak := False;
|
WasBreak := False;
|
||||||
if (Flags and flWordBreak) <> 0 then
|
if (Flags and flWordBreak) <> 0 then
|
||||||
begin
|
begin
|
||||||
|
|
||||||
|
// in case of breaking in the middle, get the full word
|
||||||
i := cur;
|
i := cur;
|
||||||
while (i <= Length(s)) and not (s[i] in spaces) do
|
while (i <= Len) and not UTF8CharIn(ch, [' ', '.', ',', '-']) do
|
||||||
|
begin
|
||||||
Inc(i);
|
Inc(i);
|
||||||
{$IFDEF DebugLR}
|
if i<=len then
|
||||||
debugln(' wrapline: to BreakWord: ini=',dbgs(last+1),' fin=',dbgs(i-last-1));
|
ch := UTF8Char(s, i, Desc);
|
||||||
{$ENDIF}
|
end;
|
||||||
b := BreakWord(Copy(s, last + 1, i - last - 1));
|
|
||||||
|
// find word's break points using some simple hyphenator algorithm
|
||||||
|
// TODO: implement interface so users can use their own hyphenator
|
||||||
|
// algorithm
|
||||||
|
b := BreakWord(UTF8Range(s, last, i - last, Desc));
|
||||||
|
|
||||||
|
// if word can be broken in many segments, find the last segment that
|
||||||
|
// fits within maxwidth
|
||||||
if Length(b) > 0 then
|
if Length(b) > 0 then
|
||||||
begin
|
begin
|
||||||
i := 1;
|
i := 1;
|
||||||
cur := last;
|
|
||||||
while (i <= Length(b)) and
|
while (i <= Length(b)) and
|
||||||
(WCanvas.TextWidth(Copy(s, beg, last - beg + 1 + Ord(b[i])) + '-') <= maxwidth) do
|
(WCanvas.TextWidth(UTF8Range(s, beg, last - beg + Ord(b[i]), Desc) + '-') <= maxwidth) do
|
||||||
begin
|
begin
|
||||||
WasBreak := True;
|
WasBreak := True;
|
||||||
cur := last + Ord(b[i]);
|
cur := last + Ord(b[i]); // cur now points to next char after breaking word
|
||||||
Inc(i);
|
Inc(i);
|
||||||
end;
|
end;
|
||||||
last := cur;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
// last now points to nex char to be processed
|
||||||
|
last := cur;
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
if last = beg then last := cur;
|
|
||||||
if WasBreak then begin
|
|
||||||
{$IFDEF DebugLR}
|
|
||||||
debugln(' wrapline: wasbreak=true, beg=',dbgs(beg),' fin=',dbgs(last-beg+1));
|
|
||||||
{$ENDIF}
|
|
||||||
OutLine(Copy(s, beg, last - beg + 1) + '-');
|
|
||||||
end else if s[last] = ' ' then begin
|
|
||||||
{$IFDEF DebugLR}
|
|
||||||
debugln(' wrapline: s[last]=" ", beg=',dbgs(beg),' fin=',dbgs(last-beg));
|
|
||||||
{$ENDIF}
|
|
||||||
OutLine(Copy(s, beg, last - beg))
|
|
||||||
end else begin
|
|
||||||
{$IFDEF DebugLR}
|
|
||||||
debugln(' wrapline: s[last]<>" ", beg=',dbgs(beg),' fin=',dbgs(last-beg));
|
|
||||||
{$ENDIF}
|
|
||||||
OutLine(Copy(s, beg, last - beg + 1));
|
|
||||||
end;
|
|
||||||
if ((Flags and flWordBreak) <> 0) and not WasBreak and (last = cur) then
|
|
||||||
begin
|
begin
|
||||||
beg := cur + 1;
|
if last = beg then
|
||||||
cur := Length(s);
|
last := cur;
|
||||||
break;
|
|
||||||
end;
|
end;
|
||||||
if (Flags and flWordBreak) = 0 then
|
|
||||||
|
if WasBreak then
|
||||||
begin
|
begin
|
||||||
if last = cur then
|
// if word has been broken, output the partial word plus an hyphen
|
||||||
begin
|
OutLine(UTF8Range(s, beg, last - beg, Desc) + '-');
|
||||||
beg := cur;
|
end else
|
||||||
break;
|
begin
|
||||||
end;
|
// output the portion of word that fits maxwidth
|
||||||
|
OutLine(UTF8Range(s, beg, last - beg, Desc));
|
||||||
|
// if space was found, advance to next no space char
|
||||||
|
if s[last] = ' ' then
|
||||||
|
last := last + 1;
|
||||||
end;
|
end;
|
||||||
beg := last + 1; last := beg;
|
|
||||||
|
beg := last;
|
||||||
end;
|
end;
|
||||||
if s[cur] in spaces then last := cur;
|
|
||||||
|
if UTF8CharIn(Ch, [' ', '.', ',', '-']) then
|
||||||
|
last := cur;
|
||||||
Inc(cur);
|
Inc(cur);
|
||||||
end;
|
end;
|
||||||
if beg <> cur then begin
|
|
||||||
{$IFDEF DebugLR}
|
if beg <> cur then
|
||||||
debugln(' wrapline: beg<>cur, beg=',dbgs(beg),' fin=',dbgs(cur-beg+1));
|
OutLine(UTF8Range(s, beg, cur - beg + 1, Desc) + #1);
|
||||||
{$ENDIF}
|
|
||||||
OutLine(Copy(s, beg, cur - beg + 1) + #1);
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
@ -53,10 +53,20 @@ function StrTofrTypeObject(St : string) : Byte;
|
|||||||
|
|
||||||
function lrGetUnBrackedStr(const S:string):string; //remove '[' from begion of string and ']' from end
|
function lrGetUnBrackedStr(const S:string):string; //remove '[' from begion of string and ']' from end
|
||||||
|
|
||||||
|
// utf8 tools
|
||||||
|
function UTF8Desc(S:string; var Desc: string): Integer;
|
||||||
|
function UTF8Char(S:string; index:Integer; Desc:string): TUTF8Char;
|
||||||
|
function UTF8Range(S:string; index,count:Integer; Desc:String):string;
|
||||||
|
function UTF8Index(S:string; index:integer; desc:string): Integer;
|
||||||
|
function UTF8CharIn(ch:TUTF8Char; const arrstr:array of string): boolean;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses LR_Class;
|
uses LR_Class;
|
||||||
|
|
||||||
|
var
|
||||||
|
LocalDescri: string;
|
||||||
|
|
||||||
procedure frInitFont(aFont : TFont; aColor : TColor; aSize : Integer; aStyle : TFontStyles);
|
procedure frInitFont(aFont : TFont; aColor : TColor; aSize : Integer; aStyle : TFontStyles);
|
||||||
begin
|
begin
|
||||||
with aFont do
|
with aFont do
|
||||||
@ -530,4 +540,92 @@ begin
|
|||||||
Result:=S;
|
Result:=S;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function UTF8Desc(S: string; var Desc: string): Integer;
|
||||||
|
var
|
||||||
|
i,b: Integer;
|
||||||
|
begin
|
||||||
|
i := 1;
|
||||||
|
Result := 0;
|
||||||
|
SetLength(Desc, Length(S));
|
||||||
|
while i<=Length(s) do begin
|
||||||
|
b := UTF8CharacterStrictLength(@S[i]);
|
||||||
|
inc(i,b);
|
||||||
|
inc(Result);
|
||||||
|
Desc[Result] := Char(b);
|
||||||
|
end;
|
||||||
|
Setlength(Desc, Result);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function UTF8Char(S: string; index: Integer; Desc: string): TUTF8Char;
|
||||||
|
var
|
||||||
|
i,j: Integer;
|
||||||
|
begin
|
||||||
|
Result := '';
|
||||||
|
if (index<1) or (index>Length(Desc)) then begin
|
||||||
|
//Result := #$EF#$BF#$BD // replacement character
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
i:=0; j:=1;
|
||||||
|
while i<Length(Desc) do begin
|
||||||
|
inc(i);
|
||||||
|
if i=index then begin
|
||||||
|
Move(S[j],Result[1],ord(Desc[i]));
|
||||||
|
Result[0]:=Desc[i];
|
||||||
|
break;
|
||||||
|
end;
|
||||||
|
inc(j, ord(Desc[i]));
|
||||||
|
end;
|
||||||
|
|
||||||
|
end;
|
||||||
|
|
||||||
|
function UTF8Range(S: string; index, count: Integer; Desc: String
|
||||||
|
): string;
|
||||||
|
var
|
||||||
|
c,i: Integer;
|
||||||
|
begin
|
||||||
|
result := '';
|
||||||
|
c := 0;
|
||||||
|
i := index;
|
||||||
|
while (Count>0) and (i<=Length(Desc)) do begin
|
||||||
|
c := c + ord(Desc[i]);
|
||||||
|
inc(i);
|
||||||
|
Dec(Count);
|
||||||
|
end;
|
||||||
|
i := UTF8Index(S, Index, Desc);
|
||||||
|
if i>0 then begin
|
||||||
|
SetLength(Result, c);
|
||||||
|
Move(S[i],Result[1],c);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// this assume index is in valid range
|
||||||
|
function UTF8Index(S: string; index: integer; desc: string): Integer;
|
||||||
|
var
|
||||||
|
i,c: integer;
|
||||||
|
begin
|
||||||
|
result := 0;
|
||||||
|
c := 0;
|
||||||
|
for i:=1 to Length(Desc) do begin
|
||||||
|
inc(c);
|
||||||
|
if i=index then begin
|
||||||
|
result := c;
|
||||||
|
break;
|
||||||
|
end;
|
||||||
|
c := c + ord(Desc[i]) - 1;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function UTF8CharIn(ch:TUTF8Char; const arrstr: array of string): boolean;
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
begin
|
||||||
|
result := false;
|
||||||
|
for i:=low(arrstr) to high(arrstr) do
|
||||||
|
if arrstr[i]=ch then begin
|
||||||
|
result := true;
|
||||||
|
break;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
Loading…
Reference in New Issue
Block a user