codetools: SplitStringConstant: added UTF8 support

git-svn-id: trunk@30442 -
This commit is contained in:
mattias 2011-04-23 22:50:43 +00:00
parent f529fa5679
commit 2dd6f59bd1
2 changed files with 84 additions and 17 deletions

View File

@ -179,7 +179,7 @@ function StringToPascalConst(const s: string): string;
// string constants // string constants
function SplitStringConstant(const StringConstant: string; function SplitStringConstant(const StringConstant: string;
FirstLineLength, OtherLineLengths, Indent: integer; FirstLineLength, OtherLineLengths, Indent: integer;
const NewLine: string): string; const aLineBreak: string): string;
procedure ImproveStringConstantStart(const ACode: string; var StartPos: integer); procedure ImproveStringConstantStart(const ACode: string; var StartPos: integer);
procedure ImproveStringConstantEnd(const ACode: string; var EndPos: integer); procedure ImproveStringConstantEnd(const ACode: string; var EndPos: integer);
@ -4309,7 +4309,7 @@ end;
function SplitStringConstant(const StringConstant: string; function SplitStringConstant(const StringConstant: string;
FirstLineLength, OtherLineLengths, Indent: integer; FirstLineLength, OtherLineLengths, Indent: integer;
const NewLine: string): string; const aLineBreak: string): string;
{ Split long string constants { Split long string constants
If possible it tries to split on word boundaries. If possible it tries to split on word boundaries.
@ -4332,9 +4332,10 @@ const
stctStart = 'S'; // ' start char stctStart = 'S'; // ' start char
stctEnd = 'E'; // ' end char stctEnd = 'E'; // ' end char
stctWordStart = 'W'; // word char after non word char stctWordStart = 'W'; // word char after non word char
stctQuotation1 = 'Q'; // first ' of a double '' stctQuotation1 = '1'; // first ' of a double ''
stctQuotation2 = 'M'; // second ' of a double '' stctQuotation2 = '2'; // second ' of a double ''
stctChar = 'C'; // normal character stctChar = 'C'; // normal character
stctMBC = 'M'; // follow character of multi byte char
stctHash = '#'; // hash stctHash = '#'; // hash
stctHashNumber = '0'; // hash number stctHashNumber = '0'; // hash number
stctLineEnd10 = #10; // hash number is 10 stctLineEnd10 = #10; // hash number is 10
@ -4348,17 +4349,34 @@ var
ParsedSrc: string; ParsedSrc: string;
ParsedLen: integer; ParsedLen: integer;
SplitPos: integer; SplitPos: integer;
i: Integer;
procedure ParseSrc; procedure ParseSrc;
var var
APos: Integer; APos: Integer;
procedure MarkMBC;
var
l: LongInt;
begin
l:=UTF8CharacterLength(@Src[APos]);
inc(APos);
dec(l);
while (l>0) and (APos<ParsedLen) do begin
ParsedSrc[APos]:=stctMBC;
inc(APos);
dec(l);
end;
end;
var
NumberStart: Integer; NumberStart: Integer;
Number: Integer; Number: Integer;
begin begin
SetLength(ParsedSrc,CurLineMax+1);
APos:=1; APos:=1;
ParsedLen:=CurLineMax+1; ParsedLen:=CurLineMax+1;
if ParsedLen>SrcLen then ParsedLen:=SrcLen; if ParsedLen>SrcLen then ParsedLen:=SrcLen;
SetLength(ParsedSrc,CurLineMax+1);
while APos<=ParsedLen do begin while APos<=ParsedLen do begin
if Src[APos]='''' then begin if Src[APos]='''' then begin
ParsedSrc[APos]:=stctStart; ParsedSrc[APos]:=stctStart;
@ -4376,15 +4394,16 @@ var
ParsedSrc[APos-1]:=stctEnd; ParsedSrc[APos-1]:=stctEnd;
break; break;
end; end;
end else begin end else if Src[APos] in ['A'..'Z','a'..'z',#128..#255] then begin
// normal char // normal word char
if (Src[APos] in ['A'..'Z','a'..'z']) if (APos>1) and (Src[APos-1] in ['A'..'Z','a'..'z',#128..#255]) then
and (APos>1) ParsedSrc[APos]:=stctChar
and (ParsedSrc[APos-1]=stctChar)
and (not (Src[APos-1] in ['A'..'Z','a'..'z'])) then
ParsedSrc[APos]:=stctWordStart
else else
ParsedSrc[APos]:=stctChar; ParsedSrc[APos]:=stctWordStart;
MarkMBC;
end else begin
// other char in string constant
ParsedSrc[APos]:=stctWordStart;
inc(APos); inc(APos);
end; end;
end; end;
@ -4418,7 +4437,7 @@ var
end else begin end else begin
// junk // junk
ParsedSrc[APos]:=stctJunk; ParsedSrc[APos]:=stctJunk;
inc(APos); MarkMBC;
end; end;
end; end;
end; end;
@ -4444,7 +4463,7 @@ var
NewSplitPos: Integer; NewSplitPos: Integer;
begin begin
if SplitPos>0 then exit; if SplitPos>0 then exit;
// check if there is a newline character constant // check if there is a aLineBreak character constant
HashPos:=SearchCharLeftToRight(stctLineEnd10)-1; HashPos:=SearchCharLeftToRight(stctLineEnd10)-1;
if (HashPos<1) then begin if (HashPos<1) then begin
HashPos:=SearchCharLeftToRight(stctLineEnd13)-1; HashPos:=SearchCharLeftToRight(stctLineEnd13)-1;
@ -4528,7 +4547,7 @@ var
CurIndent:=CurLineMax-10; CurIndent:=CurLineMax-10;
if CurIndent<0 then CurIndent:=0; if CurIndent<0 then CurIndent:=0;
// add indent spaces to Result // add indent spaces to Result
Result:=Result+NewLine+GetIndentStr(CurIndent)+'+'; Result:=Result+aLineBreak+GetIndentStr(CurIndent)+'+';
// calculate next maximum line length // calculate next maximum line length
CurLineMax:=CurLineMax-CurIndent-1; CurLineMax:=CurLineMax-CurIndent-1;
end; end;
@ -4542,8 +4561,9 @@ begin
CurLineMax:=FirstLineLength; CurLineMax:=FirstLineLength;
//DebugLn('SplitStringConstant FirstLineLength=',FirstLineLength, //DebugLn('SplitStringConstant FirstLineLength=',FirstLineLength,
//' OtherLineLengths=',OtherLineLengths,' Indent=',Indent,' '); //' OtherLineLengths=',OtherLineLengths,' Indent=',Indent,' ');
i:=0;
repeat repeat
//DebugLn('SrcLen=',SrcLen,' CurMaxLine=',CurLineMax); //DebugLn(['SrcLen=',SrcLen,' CurMaxLine=',CurLineMax]);
//DebugLn('Src="',Src,'"'); //DebugLn('Src="',Src,'"');
//DebugLn('Result="',Result,'"'); //DebugLn('Result="',Result,'"');
if SrcLen<=CurLineMax then begin if SrcLen<=CurLineMax then begin
@ -4553,12 +4573,21 @@ begin
end; end;
// split line -> search nice split position // split line -> search nice split position
ParseSrc; ParseSrc;
//debugln(['ParsedSrc=',ParsedSrc]);
SplitPos:=0; SplitPos:=0;
SplitAtNewLineCharConstant; SplitAtNewLineCharConstant;
SplitBetweenConstants; SplitBetweenConstants;
SplitAtWordBoundary; SplitAtWordBoundary;
SplitDefault; SplitDefault;
if SplitPos<=1 then begin
// no split possible
Result:=Result+Src;
break;
end;
//debugln(['SplitStringConstant SplitPos=',SplitPos]);
Split; Split;
inc(i);
if i>10 then break;
until false; until false;
//DebugLn('END Result="',Result,'"'); //DebugLn('END Result="',Result,'"');
//DebugLn('SplitStringConstant END---------------------------------'); //DebugLn('SplitStringConstant END---------------------------------');

View File

@ -278,6 +278,7 @@ function NeedRTLAnsi: boolean;// true if system encoding is not UTF-8
procedure SetNeedRTLAnsi(NewValue: boolean); procedure SetNeedRTLAnsi(NewValue: boolean);
function UTF8ToSys(const s: string): string;// as UTF8ToAnsi but more independent of widestringmanager function UTF8ToSys(const s: string): string;// as UTF8ToAnsi but more independent of widestringmanager
function SysToUTF8(const s: string): string;// as AnsiToUTF8 but more independent of widestringmanager function SysToUTF8(const s: string): string;// as AnsiToUTF8 but more independent of widestringmanager
function UTF8CharacterLength(p: PChar): integer;
// file operations // file operations
function FileExistsUTF8(const Filename: string): boolean; function FileExistsUTF8(const Filename: string): boolean;
@ -531,6 +532,43 @@ begin
Result:=s; Result:=s;
end; end;
function UTF8CharacterLength(p: PChar): integer;
begin
if p<>nil then begin
if ord(p^)<%11000000 then begin
// regular single byte character (#0 is a character, this is pascal ;)
Result:=1;
end
else if ((ord(p^) and %11100000) = %11000000) then begin
// could be 2 byte character
if (ord(p[1]) and %11000000) = %10000000 then
Result:=2
else
Result:=1;
end
else if ((ord(p^) and %11110000) = %11100000) then begin
// could be 3 byte character
if ((ord(p[1]) and %11000000) = %10000000)
and ((ord(p[2]) and %11000000) = %10000000) then
Result:=3
else
Result:=1;
end
else if ((ord(p^) and %11111000) = %11110000) then begin
// could be 4 byte character
if ((ord(p[1]) and %11000000) = %10000000)
and ((ord(p[2]) and %11000000) = %10000000)
and ((ord(p[3]) and %11000000) = %10000000) then
Result:=4
else
Result:=1;
end
else
Result:=1
end else
Result:=0;
end;
function FileExistsUTF8(const Filename: string): boolean; function FileExistsUTF8(const Filename: string): boolean;
begin begin
Result:=SysUtils.FileExists(UTF8ToSys(Filename)); Result:=SysUtils.FileExists(UTF8ToSys(Filename));