mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-07-29 09:36:34 +02:00
codetools: SplitStringConstant: added UTF8 support
git-svn-id: trunk@30442 -
This commit is contained in:
parent
f529fa5679
commit
2dd6f59bd1
@ -179,7 +179,7 @@ function StringToPascalConst(const s: string): string;
|
|||||||
// string constants
|
// string constants
|
||||||
function SplitStringConstant(const StringConstant: string;
|
function SplitStringConstant(const StringConstant: string;
|
||||||
FirstLineLength, OtherLineLengths, Indent: integer;
|
FirstLineLength, OtherLineLengths, Indent: integer;
|
||||||
const NewLine: string): string;
|
const aLineBreak: string): string;
|
||||||
procedure ImproveStringConstantStart(const ACode: string; var StartPos: integer);
|
procedure ImproveStringConstantStart(const ACode: string; var StartPos: integer);
|
||||||
procedure ImproveStringConstantEnd(const ACode: string; var EndPos: integer);
|
procedure ImproveStringConstantEnd(const ACode: string; var EndPos: integer);
|
||||||
|
|
||||||
@ -4309,7 +4309,7 @@ end;
|
|||||||
|
|
||||||
function SplitStringConstant(const StringConstant: string;
|
function SplitStringConstant(const StringConstant: string;
|
||||||
FirstLineLength, OtherLineLengths, Indent: integer;
|
FirstLineLength, OtherLineLengths, Indent: integer;
|
||||||
const NewLine: string): string;
|
const aLineBreak: string): string;
|
||||||
{ Split long string constants
|
{ Split long string constants
|
||||||
If possible it tries to split on word boundaries.
|
If possible it tries to split on word boundaries.
|
||||||
|
|
||||||
@ -4332,9 +4332,10 @@ const
|
|||||||
stctStart = 'S'; // ' start char
|
stctStart = 'S'; // ' start char
|
||||||
stctEnd = 'E'; // ' end char
|
stctEnd = 'E'; // ' end char
|
||||||
stctWordStart = 'W'; // word char after non word char
|
stctWordStart = 'W'; // word char after non word char
|
||||||
stctQuotation1 = 'Q'; // first ' of a double ''
|
stctQuotation1 = '1'; // first ' of a double ''
|
||||||
stctQuotation2 = 'M'; // second ' of a double ''
|
stctQuotation2 = '2'; // second ' of a double ''
|
||||||
stctChar = 'C'; // normal character
|
stctChar = 'C'; // normal character
|
||||||
|
stctMBC = 'M'; // follow character of multi byte char
|
||||||
stctHash = '#'; // hash
|
stctHash = '#'; // hash
|
||||||
stctHashNumber = '0'; // hash number
|
stctHashNumber = '0'; // hash number
|
||||||
stctLineEnd10 = #10; // hash number is 10
|
stctLineEnd10 = #10; // hash number is 10
|
||||||
@ -4348,17 +4349,34 @@ var
|
|||||||
ParsedSrc: string;
|
ParsedSrc: string;
|
||||||
ParsedLen: integer;
|
ParsedLen: integer;
|
||||||
SplitPos: integer;
|
SplitPos: integer;
|
||||||
|
i: Integer;
|
||||||
|
|
||||||
procedure ParseSrc;
|
procedure ParseSrc;
|
||||||
var
|
var
|
||||||
APos: Integer;
|
APos: Integer;
|
||||||
|
|
||||||
|
procedure MarkMBC;
|
||||||
|
var
|
||||||
|
l: LongInt;
|
||||||
|
begin
|
||||||
|
l:=UTF8CharacterLength(@Src[APos]);
|
||||||
|
inc(APos);
|
||||||
|
dec(l);
|
||||||
|
while (l>0) and (APos<ParsedLen) do begin
|
||||||
|
ParsedSrc[APos]:=stctMBC;
|
||||||
|
inc(APos);
|
||||||
|
dec(l);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
NumberStart: Integer;
|
NumberStart: Integer;
|
||||||
Number: Integer;
|
Number: Integer;
|
||||||
begin
|
begin
|
||||||
SetLength(ParsedSrc,CurLineMax+1);
|
|
||||||
APos:=1;
|
APos:=1;
|
||||||
ParsedLen:=CurLineMax+1;
|
ParsedLen:=CurLineMax+1;
|
||||||
if ParsedLen>SrcLen then ParsedLen:=SrcLen;
|
if ParsedLen>SrcLen then ParsedLen:=SrcLen;
|
||||||
|
SetLength(ParsedSrc,CurLineMax+1);
|
||||||
while APos<=ParsedLen do begin
|
while APos<=ParsedLen do begin
|
||||||
if Src[APos]='''' then begin
|
if Src[APos]='''' then begin
|
||||||
ParsedSrc[APos]:=stctStart;
|
ParsedSrc[APos]:=stctStart;
|
||||||
@ -4376,15 +4394,16 @@ var
|
|||||||
ParsedSrc[APos-1]:=stctEnd;
|
ParsedSrc[APos-1]:=stctEnd;
|
||||||
break;
|
break;
|
||||||
end;
|
end;
|
||||||
end else begin
|
end else if Src[APos] in ['A'..'Z','a'..'z',#128..#255] then begin
|
||||||
// normal char
|
// normal word char
|
||||||
if (Src[APos] in ['A'..'Z','a'..'z'])
|
if (APos>1) and (Src[APos-1] in ['A'..'Z','a'..'z',#128..#255]) then
|
||||||
and (APos>1)
|
ParsedSrc[APos]:=stctChar
|
||||||
and (ParsedSrc[APos-1]=stctChar)
|
|
||||||
and (not (Src[APos-1] in ['A'..'Z','a'..'z'])) then
|
|
||||||
ParsedSrc[APos]:=stctWordStart
|
|
||||||
else
|
else
|
||||||
ParsedSrc[APos]:=stctChar;
|
ParsedSrc[APos]:=stctWordStart;
|
||||||
|
MarkMBC;
|
||||||
|
end else begin
|
||||||
|
// other char in string constant
|
||||||
|
ParsedSrc[APos]:=stctWordStart;
|
||||||
inc(APos);
|
inc(APos);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
@ -4418,7 +4437,7 @@ var
|
|||||||
end else begin
|
end else begin
|
||||||
// junk
|
// junk
|
||||||
ParsedSrc[APos]:=stctJunk;
|
ParsedSrc[APos]:=stctJunk;
|
||||||
inc(APos);
|
MarkMBC;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
@ -4444,7 +4463,7 @@ var
|
|||||||
NewSplitPos: Integer;
|
NewSplitPos: Integer;
|
||||||
begin
|
begin
|
||||||
if SplitPos>0 then exit;
|
if SplitPos>0 then exit;
|
||||||
// check if there is a newline character constant
|
// check if there is a aLineBreak character constant
|
||||||
HashPos:=SearchCharLeftToRight(stctLineEnd10)-1;
|
HashPos:=SearchCharLeftToRight(stctLineEnd10)-1;
|
||||||
if (HashPos<1) then begin
|
if (HashPos<1) then begin
|
||||||
HashPos:=SearchCharLeftToRight(stctLineEnd13)-1;
|
HashPos:=SearchCharLeftToRight(stctLineEnd13)-1;
|
||||||
@ -4528,7 +4547,7 @@ var
|
|||||||
CurIndent:=CurLineMax-10;
|
CurIndent:=CurLineMax-10;
|
||||||
if CurIndent<0 then CurIndent:=0;
|
if CurIndent<0 then CurIndent:=0;
|
||||||
// add indent spaces to Result
|
// add indent spaces to Result
|
||||||
Result:=Result+NewLine+GetIndentStr(CurIndent)+'+';
|
Result:=Result+aLineBreak+GetIndentStr(CurIndent)+'+';
|
||||||
// calculate next maximum line length
|
// calculate next maximum line length
|
||||||
CurLineMax:=CurLineMax-CurIndent-1;
|
CurLineMax:=CurLineMax-CurIndent-1;
|
||||||
end;
|
end;
|
||||||
@ -4542,8 +4561,9 @@ begin
|
|||||||
CurLineMax:=FirstLineLength;
|
CurLineMax:=FirstLineLength;
|
||||||
//DebugLn('SplitStringConstant FirstLineLength=',FirstLineLength,
|
//DebugLn('SplitStringConstant FirstLineLength=',FirstLineLength,
|
||||||
//' OtherLineLengths=',OtherLineLengths,' Indent=',Indent,' ');
|
//' OtherLineLengths=',OtherLineLengths,' Indent=',Indent,' ');
|
||||||
|
i:=0;
|
||||||
repeat
|
repeat
|
||||||
//DebugLn('SrcLen=',SrcLen,' CurMaxLine=',CurLineMax);
|
//DebugLn(['SrcLen=',SrcLen,' CurMaxLine=',CurLineMax]);
|
||||||
//DebugLn('Src="',Src,'"');
|
//DebugLn('Src="',Src,'"');
|
||||||
//DebugLn('Result="',Result,'"');
|
//DebugLn('Result="',Result,'"');
|
||||||
if SrcLen<=CurLineMax then begin
|
if SrcLen<=CurLineMax then begin
|
||||||
@ -4553,12 +4573,21 @@ begin
|
|||||||
end;
|
end;
|
||||||
// split line -> search nice split position
|
// split line -> search nice split position
|
||||||
ParseSrc;
|
ParseSrc;
|
||||||
|
//debugln(['ParsedSrc=',ParsedSrc]);
|
||||||
SplitPos:=0;
|
SplitPos:=0;
|
||||||
SplitAtNewLineCharConstant;
|
SplitAtNewLineCharConstant;
|
||||||
SplitBetweenConstants;
|
SplitBetweenConstants;
|
||||||
SplitAtWordBoundary;
|
SplitAtWordBoundary;
|
||||||
SplitDefault;
|
SplitDefault;
|
||||||
|
if SplitPos<=1 then begin
|
||||||
|
// no split possible
|
||||||
|
Result:=Result+Src;
|
||||||
|
break;
|
||||||
|
end;
|
||||||
|
//debugln(['SplitStringConstant SplitPos=',SplitPos]);
|
||||||
Split;
|
Split;
|
||||||
|
inc(i);
|
||||||
|
if i>10 then break;
|
||||||
until false;
|
until false;
|
||||||
//DebugLn('END Result="',Result,'"');
|
//DebugLn('END Result="',Result,'"');
|
||||||
//DebugLn('SplitStringConstant END---------------------------------');
|
//DebugLn('SplitStringConstant END---------------------------------');
|
||||||
|
@ -278,6 +278,7 @@ function NeedRTLAnsi: boolean;// true if system encoding is not UTF-8
|
|||||||
procedure SetNeedRTLAnsi(NewValue: boolean);
|
procedure SetNeedRTLAnsi(NewValue: boolean);
|
||||||
function UTF8ToSys(const s: string): string;// as UTF8ToAnsi but more independent of widestringmanager
|
function UTF8ToSys(const s: string): string;// as UTF8ToAnsi but more independent of widestringmanager
|
||||||
function SysToUTF8(const s: string): string;// as AnsiToUTF8 but more independent of widestringmanager
|
function SysToUTF8(const s: string): string;// as AnsiToUTF8 but more independent of widestringmanager
|
||||||
|
function UTF8CharacterLength(p: PChar): integer;
|
||||||
|
|
||||||
// file operations
|
// file operations
|
||||||
function FileExistsUTF8(const Filename: string): boolean;
|
function FileExistsUTF8(const Filename: string): boolean;
|
||||||
@ -531,6 +532,43 @@ begin
|
|||||||
Result:=s;
|
Result:=s;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function UTF8CharacterLength(p: PChar): integer;
|
||||||
|
begin
|
||||||
|
if p<>nil then begin
|
||||||
|
if ord(p^)<%11000000 then begin
|
||||||
|
// regular single byte character (#0 is a character, this is pascal ;)
|
||||||
|
Result:=1;
|
||||||
|
end
|
||||||
|
else if ((ord(p^) and %11100000) = %11000000) then begin
|
||||||
|
// could be 2 byte character
|
||||||
|
if (ord(p[1]) and %11000000) = %10000000 then
|
||||||
|
Result:=2
|
||||||
|
else
|
||||||
|
Result:=1;
|
||||||
|
end
|
||||||
|
else if ((ord(p^) and %11110000) = %11100000) then begin
|
||||||
|
// could be 3 byte character
|
||||||
|
if ((ord(p[1]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[2]) and %11000000) = %10000000) then
|
||||||
|
Result:=3
|
||||||
|
else
|
||||||
|
Result:=1;
|
||||||
|
end
|
||||||
|
else if ((ord(p^) and %11111000) = %11110000) then begin
|
||||||
|
// could be 4 byte character
|
||||||
|
if ((ord(p[1]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[2]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[3]) and %11000000) = %10000000) then
|
||||||
|
Result:=4
|
||||||
|
else
|
||||||
|
Result:=1;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
Result:=1
|
||||||
|
end else
|
||||||
|
Result:=0;
|
||||||
|
end;
|
||||||
|
|
||||||
function FileExistsUTF8(const Filename: string): boolean;
|
function FileExistsUTF8(const Filename: string): boolean;
|
||||||
begin
|
begin
|
||||||
Result:=SysUtils.FileExists(UTF8ToSys(Filename));
|
Result:=SysUtils.FileExists(UTF8ToSys(Filename));
|
||||||
|
Loading…
Reference in New Issue
Block a user