diff --git a/components/lazreport/source/lr_class.pas b/components/lazreport/source/lr_class.pas index c7cc89a8d5..28eb02d49c 100644 --- a/components/lazreport/source/lr_class.pas +++ b/components/lazreport/source/lr_class.pas @@ -2449,7 +2449,7 @@ begin CanBreak := True; end; if CanBreak then - Result := Result + Chr(UTF8CharToByteIndex(pchar(s),length(s),i)); + Result := Result + Chr(i); Inc(i); until i > Len - 2; end; @@ -2463,6 +2463,7 @@ var size, size1, maxwidth: Integer; b: TWordBreaks; WCanvas: TCanvas; + desc: string; procedure OutLine(const str: String); var @@ -2481,9 +2482,11 @@ var procedure WrapLine(const s: String); var - i, cur, beg, last: Integer; + i, cur, beg, last, len: Integer; WasBreak, CRLF: Boolean; + ch: TUTF8char; begin + CRLF := False; for i := 1 to Length(s) do begin @@ -2493,108 +2496,107 @@ var break; end; end; + last := 1; beg := 1; - {$IFDEF DebugLR} - debugLn(' WrapLine: init "',dbgstr(s),'" wcanvas.txtw=',dbgs(WCanvas.TextWidth(s)), - ' maxwidth=',dbgs(maxwidth),' crlf=',dbgs(crlf)); - {$ENDIF} if not CRLF and ((Length(s) <= 1) or (WCanvas.TextWidth(s) <= maxwidth)) then begin - {$IFDEF DebugLR} - debugLn(' WrapLine: fast crlf=',dbgs(crlf),' wcanvas.txtw=',dbgs(Wcanvas.TextWidth(s))); - {$ENDIF} OutLine(s + #1) end else begin + cur := 1; - while cur <= Length(s) do + Len := UTF8Desc(S, Desc); + + while cur <= Len do begin - if s[cur] in [#10, #13] then + Ch := UTF8Char(s, cur, Desc); + + // check for items with soft-breaks + if (Ch=#10) or (Ch=#13) then begin - {$IFDEF DebugLR} - debugLn(' Wrapline: crlf beg=',dbgs(beg),' cur=',dbgs(cur)); - {$ENDIF} - OutLine(Copy(s, beg, cur - beg) + #1); - while (cur < Length(s)) and (s[cur] in [#10, #13]) do Inc(cur); + OutLine(UTF8Range(s, beg, cur - beg, Desc) + #1); + while (cur maxwidth then + + if ch <> ' ' then + if WCanvas.TextWidth(UTF8Range(s, beg, cur - beg + 1, Desc)) > maxwidth then begin + WasBreak := False; if (Flags and flWordBreak) <> 0 then begin + + // in case of breaking in the middle, get the full word i := cur; - while (i <= Length(s)) and not (s[i] in spaces) do + while (i <= Len) and not UTF8CharIn(ch, [' ', '.', ',', '-']) do + begin Inc(i); - {$IFDEF DebugLR} - debugln(' wrapline: to BreakWord: ini=',dbgs(last+1),' fin=',dbgs(i-last-1)); - {$ENDIF} - b := BreakWord(Copy(s, last + 1, i - last - 1)); + if i<=len then + ch := UTF8Char(s, i, Desc); + end; + + // find word's break points using some simple hyphenator algorithm + // TODO: implement interface so users can use their own hyphenator + // algorithm + b := BreakWord(UTF8Range(s, last, i - last, Desc)); + + // if word can be broken in many segments, find the last segment that + // fits within maxwidth if Length(b) > 0 then begin i := 1; - cur := last; while (i <= Length(b)) and - (WCanvas.TextWidth(Copy(s, beg, last - beg + 1 + Ord(b[i])) + '-') <= maxwidth) do + (WCanvas.TextWidth(UTF8Range(s, beg, last - beg + Ord(b[i]), Desc) + '-') <= maxwidth) do begin WasBreak := True; - cur := last + Ord(b[i]); + cur := last + Ord(b[i]); // cur now points to next char after breaking word Inc(i); end; - last := cur; end; + + // last now points to nex char to be processed + last := cur; end else - if last = beg then last := cur; - if WasBreak then begin - {$IFDEF DebugLR} - debugln(' wrapline: wasbreak=true, beg=',dbgs(beg),' fin=',dbgs(last-beg+1)); - {$ENDIF} - OutLine(Copy(s, beg, last - beg + 1) + '-'); - end else if s[last] = ' ' then begin - {$IFDEF DebugLR} - debugln(' wrapline: s[last]=" ", beg=',dbgs(beg),' fin=',dbgs(last-beg)); - {$ENDIF} - OutLine(Copy(s, beg, last - beg)) - end else begin - {$IFDEF DebugLR} - debugln(' wrapline: s[last]<>" ", beg=',dbgs(beg),' fin=',dbgs(last-beg)); - {$ENDIF} - OutLine(Copy(s, beg, last - beg + 1)); - end; - if ((Flags and flWordBreak) <> 0) and not WasBreak and (last = cur) then begin - beg := cur + 1; - cur := Length(s); - break; + if last = beg then + last := cur; end; - if (Flags and flWordBreak) = 0 then + + if WasBreak then begin - if last = cur then - begin - beg := cur; - break; - end; + // if word has been broken, output the partial word plus an hyphen + OutLine(UTF8Range(s, beg, last - beg, Desc) + '-'); + end else + begin + // output the portion of word that fits maxwidth + OutLine(UTF8Range(s, beg, last - beg, Desc)); + // if space was found, advance to next no space char + if s[last] = ' ' then + last := last + 1; end; - beg := last + 1; last := beg; + + beg := last; end; - if s[cur] in spaces then last := cur; + + if UTF8CharIn(Ch, [' ', '.', ',', '-']) then + last := cur; Inc(cur); end; - if beg <> cur then begin - {$IFDEF DebugLR} - debugln(' wrapline: beg<>cur, beg=',dbgs(beg),' fin=',dbgs(cur-beg+1)); - {$ENDIF} - OutLine(Copy(s, beg, cur - beg + 1) + #1); - end; + + if beg <> cur then + OutLine(UTF8Range(s, beg, cur - beg + 1, Desc) + #1); end; end; diff --git a/components/lazreport/source/lr_utils.pas b/components/lazreport/source/lr_utils.pas index bba6fd1ec0..2243a11238 100644 --- a/components/lazreport/source/lr_utils.pas +++ b/components/lazreport/source/lr_utils.pas @@ -53,10 +53,20 @@ function StrTofrTypeObject(St : string) : Byte; function lrGetUnBrackedStr(const S:string):string; //remove '[' from begion of string and ']' from end +// utf8 tools +function UTF8Desc(S:string; var Desc: string): Integer; +function UTF8Char(S:string; index:Integer; Desc:string): TUTF8Char; +function UTF8Range(S:string; index,count:Integer; Desc:String):string; +function UTF8Index(S:string; index:integer; desc:string): Integer; +function UTF8CharIn(ch:TUTF8Char; const arrstr:array of string): boolean; + implementation uses LR_Class; +var + LocalDescri: string; + procedure frInitFont(aFont : TFont; aColor : TColor; aSize : Integer; aStyle : TFontStyles); begin with aFont do @@ -530,4 +540,92 @@ begin Result:=S; end; +function UTF8Desc(S: string; var Desc: string): Integer; +var + i,b: Integer; +begin + i := 1; + Result := 0; + SetLength(Desc, Length(S)); + while i<=Length(s) do begin + b := UTF8CharacterStrictLength(@S[i]); + inc(i,b); + inc(Result); + Desc[Result] := Char(b); + end; + Setlength(Desc, Result); +end; + +function UTF8Char(S: string; index: Integer; Desc: string): TUTF8Char; +var + i,j: Integer; +begin + Result := ''; + if (index<1) or (index>Length(Desc)) then begin + //Result := #$EF#$BF#$BD // replacement character + exit; + end; + + i:=0; j:=1; + while i0) and (i<=Length(Desc)) do begin + c := c + ord(Desc[i]); + inc(i); + Dec(Count); + end; + i := UTF8Index(S, Index, Desc); + if i>0 then begin + SetLength(Result, c); + Move(S[i],Result[1],c); + end; +end; + +// this assume index is in valid range +function UTF8Index(S: string; index: integer; desc: string): Integer; +var + i,c: integer; +begin + result := 0; + c := 0; + for i:=1 to Length(Desc) do begin + inc(c); + if i=index then begin + result := c; + break; + end; + c := c + ord(Desc[i]) - 1; + end; +end; + +function UTF8CharIn(ch:TUTF8Char; const arrstr: array of string): boolean; +var + i: Integer; +begin + result := false; + for i:=low(arrstr) to high(arrstr) do + if arrstr[i]=ch then begin + result := true; + break; + end; +end; + end.