mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-12 17:49:07 +02:00
* synchronized with trunk
git-svn-id: branches/unicodekvm@48727 -
This commit is contained in:
commit
4a56b9eaf8
@ -1,3 +1,33 @@
|
||||
{ GraphemeBreakProperty Unicode data unit.
|
||||
|
||||
Copyright (C) 2021 Nikolay Nikolov <nickysn@users.sourceforge.net>
|
||||
|
||||
This library is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version with the following modification:
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent modules,and
|
||||
to copy and distribute the resulting executable under terms of your choice,
|
||||
provided that you also meet, for each linked independent module, the terms
|
||||
and conditions of the license of that module. An independent module is a
|
||||
module which is not derived from or based on this library. If you modify
|
||||
this library, you may extend this exception to your version of the library,
|
||||
but you are not obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with this library; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1335, USA.
|
||||
}
|
||||
|
||||
unit graphemebreakproperty;
|
||||
|
||||
{$MODE objfpc}
|
||||
@ -25,6 +55,29 @@ type
|
||||
gbpGlue_After_Zwj,
|
||||
gbpE_Base_GAZ);
|
||||
|
||||
{ TUnicodeStringExtendedGraphemeClustersEnumerator }
|
||||
|
||||
TUnicodeStringExtendedGraphemeClustersEnumerator = class
|
||||
private
|
||||
FStr: UnicodeString;
|
||||
FCurrentIndexStart: SizeInt;
|
||||
FCurrentIndexEnd: SizeInt;
|
||||
FNextIndexEnd: SizeInt;
|
||||
FNextGBP: TGraphemeBreakProperty;
|
||||
FNextCodePoint: UCS4Char;
|
||||
FCurrentGBP: TGraphemeBreakProperty;
|
||||
FCurrentCodePoint: UCS4Char;
|
||||
FRI_Sequence_Length: Integer;
|
||||
FE_Base_EBG_Extend_Sequence: Boolean;
|
||||
function GetCurrent: UnicodeString;
|
||||
procedure FetchNextChar;
|
||||
public
|
||||
constructor Create(const S: UnicodeString);
|
||||
function GetEnumerator: TUnicodeStringExtendedGraphemeClustersEnumerator;
|
||||
function MoveNext: Boolean;
|
||||
property Current: UnicodeString read GetCurrent;
|
||||
end;
|
||||
|
||||
function GetGraphemeBreakProperty(Ch: UCS4Char): TGraphemeBreakProperty;
|
||||
|
||||
implementation
|
||||
@ -34,4 +87,94 @@ begin
|
||||
{$I graphemebreakproperty_code.inc}
|
||||
end;
|
||||
|
||||
{ TUnicodeStringExtendedGraphemeClustersEnumerator }
|
||||
|
||||
function TUnicodeStringExtendedGraphemeClustersEnumerator.GetCurrent: UnicodeString;
|
||||
begin
|
||||
Result := Copy(FStr, FCurrentIndexStart, FCurrentIndexEnd - FCurrentIndexStart + 1);
|
||||
end;
|
||||
|
||||
procedure TUnicodeStringExtendedGraphemeClustersEnumerator.FetchNextChar;
|
||||
begin
|
||||
Inc(FNextIndexEnd);
|
||||
if FNextIndexEnd <= Length(FStr) then
|
||||
begin
|
||||
FNextCodePoint := Ord(FStr[FNextIndexEnd]);
|
||||
{ high surrogate, followed by low surrogate? }
|
||||
if (FNextCodePoint >= $D800) and (FNextCodePoint <= $DBFF) and ((FNextIndexEnd + 1) <= Length(FStr)) and
|
||||
(Ord(FStr[FNextIndexEnd + 1]) >= $DC00) and (Ord(FStr[FNextIndexEnd + 1]) <= $DFFF) then
|
||||
begin
|
||||
Inc(FNextIndexEnd);
|
||||
FNextCodePoint := $10000 + (((FNextCodePoint - $D800) shl 10) or (Ord(FStr[FNextIndexEnd]) - $DC00));
|
||||
end;
|
||||
end
|
||||
else
|
||||
FNextCodePoint := 0;
|
||||
FNextGBP := GetGraphemeBreakProperty(FNextCodePoint);
|
||||
end;
|
||||
|
||||
constructor TUnicodeStringExtendedGraphemeClustersEnumerator.Create(const S: UnicodeString);
|
||||
begin
|
||||
FStr := S;
|
||||
FCurrentIndexStart := 0;
|
||||
FCurrentIndexEnd := 0;
|
||||
FNextIndexEnd := 0;
|
||||
FRI_Sequence_Length := 0;
|
||||
FE_Base_EBG_Extend_Sequence := False;
|
||||
FetchNextChar;
|
||||
end;
|
||||
|
||||
function TUnicodeStringExtendedGraphemeClustersEnumerator.GetEnumerator: TUnicodeStringExtendedGraphemeClustersEnumerator;
|
||||
begin
|
||||
Result := Self;
|
||||
end;
|
||||
|
||||
function TUnicodeStringExtendedGraphemeClustersEnumerator.MoveNext: Boolean;
|
||||
begin
|
||||
FCurrentIndexStart := FCurrentIndexEnd + 1;
|
||||
if FCurrentIndexStart > Length(FStr) then
|
||||
Exit(false);
|
||||
repeat
|
||||
FCurrentGBP := FNextGBP;
|
||||
FCurrentCodePoint := FNextCodePoint;
|
||||
FCurrentIndexEnd := FNextIndexEnd;
|
||||
if FCurrentGBP = gpbRegional_Indicator then
|
||||
Inc(FRI_Sequence_Length)
|
||||
else
|
||||
FRI_Sequence_Length := 0;
|
||||
FE_Base_EBG_Extend_Sequence := (FCurrentGBP in [gbpE_Base, gbpE_Base_GAZ]) or (FE_Base_EBG_Extend_Sequence and (FCurrentGBP = gbpExtend));
|
||||
FetchNextChar;
|
||||
if FNextIndexEnd > Length(FStr) then
|
||||
Exit(True);
|
||||
|
||||
{ Do not break between a CR and LF. Otherwise, break before and after controls. }
|
||||
if (FCurrentGBP = gbpCR) and (FNextGBP = gbpLF) then
|
||||
continue
|
||||
else if (FCurrentGBP in [gbpControl, gbpCR, gbpLF]) or (FNextGBP in [gbpControl, gbpCR, gbpLF]) then
|
||||
Exit(True)
|
||||
{ Do not break Hangul syllable sequences. }
|
||||
else if ((FCurrentGBP = gbpL) and (FNextGBP in [gbpL, gbpV, gbpLV, gbpLVT])) or
|
||||
((FCurrentGBP in [gbpLV, gbpV]) and (FNextGBP in [gbpV, gbpT])) or
|
||||
((FCurrentGBP in [gbpLVT, gbpT]) and (FNextGBP = gbpT)) then
|
||||
continue
|
||||
{ Do not break before extending characters or ZWJ. }
|
||||
else if FNextGBP in [gbpExtend, gbpZWJ] then
|
||||
continue
|
||||
{ Only for extended grapheme clusters:
|
||||
Do not break before SpacingMarks, or after Prepend characters. }
|
||||
else if (FCurrentGBP = gbpPrepend) or (FNextGBP = gbpSpacingMark) then
|
||||
continue
|
||||
{ Do not break within emoji modifier sequences or emoji zwj sequences. }
|
||||
else if ((FCurrentGBP = gbpZWJ) and (FNextGBP in [gbpGlue_After_Zwj, gbpE_Base_GAZ])) or
|
||||
(FE_Base_EBG_Extend_Sequence and (FNextGBP = gbpE_Modifier)) then
|
||||
continue
|
||||
{ Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. }
|
||||
else if (FCurrentGBP = gpbRegional_Indicator) and (FNextGBP = gpbRegional_Indicator) and Odd(FRI_Sequence_Length) then
|
||||
continue
|
||||
{ Otherwise, break everywhere. }
|
||||
else
|
||||
Exit(True);
|
||||
until False;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
Loading…
Reference in New Issue
Block a user