mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-11-05 11:59:24 +01:00
lazutf8: utf8trim: trim no break spaces
git-svn-id: trunk@35903 -
This commit is contained in:
parent
0e0af79ac7
commit
9bb0b8ea71
@ -74,6 +74,7 @@ type
|
|||||||
u8tKeepEnd,
|
u8tKeepEnd,
|
||||||
u8tKeepTabs,
|
u8tKeepTabs,
|
||||||
u8tKeepLineBreaks,
|
u8tKeepLineBreaks,
|
||||||
|
u8tKeepNoBreakSpaces,
|
||||||
u8tKeepControlCodes // excluding tabs and line breaks
|
u8tKeepControlCodes // excluding tabs and line breaks
|
||||||
);
|
);
|
||||||
TUTF8TrimFlags = set of TUTF8TrimFlag;
|
TUTF8TrimFlags = set of TUTF8TrimFlag;
|
||||||
@ -2328,9 +2329,11 @@ var
|
|||||||
u: Cardinal;
|
u: Cardinal;
|
||||||
StartP: PtrUInt;
|
StartP: PtrUInt;
|
||||||
l: Integer;
|
l: Integer;
|
||||||
|
KeepAllNonASCII: boolean;
|
||||||
begin
|
begin
|
||||||
Result:=s;
|
Result:=s;
|
||||||
if Result='' then exit;
|
if Result='' then exit;
|
||||||
|
KeepAllNonASCII:=[u8tKeepControlCodes,u8tKeepNoBreakSpaces]*Flags=[u8tKeepControlCodes,u8tKeepNoBreakSpaces];
|
||||||
if not (u8tKeepStart in Flags) then begin
|
if not (u8tKeepStart in Flags) then begin
|
||||||
// trim start
|
// trim start
|
||||||
p:=PChar(Result);
|
p:=PChar(Result);
|
||||||
@ -2356,13 +2359,18 @@ begin
|
|||||||
break;
|
break;
|
||||||
#128..#255:
|
#128..#255:
|
||||||
begin
|
begin
|
||||||
if u8tKeepControlCodes in Flags then break;
|
if KeepAllNonASCII then break;
|
||||||
u:=UTF8CharacterToUnicode(p,l);
|
u:=UTF8CharacterToUnicode(p,l);
|
||||||
if (l<=1) then break; // invalid character
|
if (l<=1) then break; // invalid character
|
||||||
case u of
|
case u of
|
||||||
128..159, // C1 set of control codes
|
128..159, // C1 set of control codes
|
||||||
8206, 8207: // left-to-right, right-to-left mark
|
8206, 8207: // left-to-right, right-to-left mark
|
||||||
;
|
if u8tKeepControlCodes in Flags then break;
|
||||||
|
160, // no break space
|
||||||
|
$2007, // figure space
|
||||||
|
$2026, // narrow no-break space
|
||||||
|
$FEFF: // zero with no-break space
|
||||||
|
if u8tKeepNoBreakSpaces in Flags then break;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
end;
|
end;
|
||||||
@ -2398,17 +2406,23 @@ begin
|
|||||||
break;
|
break;
|
||||||
#128..#255:
|
#128..#255:
|
||||||
begin
|
begin
|
||||||
if u8tKeepControlCodes in Flags then break;
|
if KeepAllNonASCII then break;
|
||||||
StartP:=UTF8FindNearestCharStart(PChar(Result),length(Result),p-PChar(Result));
|
StartP:=UTF8FindNearestCharStart(PChar(Result),length(Result),p-PChar(Result));
|
||||||
u:=UTF8CharacterToUnicode(PChar(Result)+StartP,l);
|
u:=UTF8CharacterToUnicode(PChar(Result)+StartP,l);
|
||||||
if (l<=1) then break; // invalid character
|
if (l<=1) then break; // invalid character
|
||||||
case u of
|
case u of
|
||||||
128..159, // C1 set of control codes
|
128..159, // C1 set of control codes
|
||||||
8206, 8207: // left-to-right, right-to-left mark
|
8206, 8207: // left-to-right, right-to-left mark
|
||||||
p:=PChar(Result)+StartP;
|
if u8tKeepControlCodes in Flags then break;
|
||||||
|
160, // no break space
|
||||||
|
$2007, // figure space
|
||||||
|
$2026, // narrow no-break space
|
||||||
|
$FEFF: // zero with no-break space
|
||||||
|
if u8tKeepNoBreakSpaces in Flags then break;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
end;
|
end;
|
||||||
|
p:=PChar(Result)+StartP;
|
||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user