mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-05-14 16:12:41 +02:00
347 lines
9.5 KiB
XML
347 lines
9.5 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<fpdoc-descriptions>
|
|
<package name="lazutils">
|
|
|
|
<!--
|
|
====================================================================
|
|
lazutf16
|
|
====================================================================
|
|
-->
|
|
|
|
<module name="lazutf16">
|
|
<short>
|
|
Contains routines used for UTF-16 character and string operations.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<file>lazutf16.pas</file> includes string routines which are based on UTF-16
|
|
implementations, although it might also include routines for other encodings.
|
|
</p>
|
|
<p>
|
|
A UTF-16 based implementation for LowerCase, for example, is faster in
|
|
WideString and UnicodeString then the default UTF-8 implementation.
|
|
</p>
|
|
<p>
|
|
Currently this unit includes only UTF8LowerCaseViaTables which is based on
|
|
a UTF-16 table, but it might be extended to include various UTF-16 routines.
|
|
</p>
|
|
<p>
|
|
<file>lazutf16.pas</file> is part of the <file>LazUtils</file> package.
|
|
</p>
|
|
</descr>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16CharacterLength">
|
|
<short>
|
|
Gets the length of the UTF-16 character in the specified PWideChar value.
|
|
</short>
|
|
<descr>
|
|
Uses the endian-ness for the platform. Returns 0, 1, or 2.
|
|
</descr>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16CharacterLength.Result">
|
|
<short>Length of the UTF-16 character in the value, or 0 when Nil.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterLength.p">
|
|
<short>PWideChar value examined in the routine.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16Length">
|
|
<short>Gets the length for the specified value in UTF-16 characters.</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16Length.Result">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.s">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.p">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.WordCount">
|
|
<short></short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16Copy">
|
|
<short>
|
|
Copies a number of UTF-16 characters at the given character position in the
|
|
specified value.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16Copy.Result">
|
|
<short>UnicodeString with the values copied in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Copy.s">
|
|
<short>UnicodeString with the values examined in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Copy.StartCharIndex">
|
|
<short>
|
|
1-based staring character (code point) position in the Unicode string.
|
|
</short>
|
|
</element>
|
|
<element name="UTF16Copy.CharCount">
|
|
<short>Number of characters (code points) copied in the the routine.</short>
|
|
</element>
|
|
|
|
<element name="UTF16CharStart">
|
|
<short/>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16CharStart.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="UTF16CharStart.P">
|
|
<short>PWideChar value with the values examined in the routine.</short>
|
|
</element>
|
|
<element name="UTF16CharStart.Len">
|
|
<short>Len is the length in words of P.</short>
|
|
</element>
|
|
<element name="UTF16CharStart.CharIndex">
|
|
<short>
|
|
CharIndex is the position of the desired UnicodeChar (starting at 0).
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF16Pos">
|
|
<short>Pos implemented for UTF-16-encoded values.</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF16Pos</var> is a <var>PtrInt</var> function used to get the character
|
|
index in SearchInText where the value in SearchForText is located. StartPos
|
|
allows the search to begin at a specific character (code point).
|
|
</p>
|
|
<p>
|
|
The return value is the 1-based UTF-16 character index where the SearchForText
|
|
starts in SearchInText, or 0 when not found.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16Pos.Result">
|
|
<short>
|
|
Character index where the SearchForText starts in SearchInText, or 0 when not
|
|
found.
|
|
</short>
|
|
</element>
|
|
<element name="UTF16Pos.SearchForText">
|
|
<short>UTF-16-encoded value to locate in SearchInText.</short>
|
|
</element>
|
|
<element name="UTF16Pos.SearchInText">
|
|
<short>UTF-16-encoded value searched in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Pos.StartPos">
|
|
<short>
|
|
Optional starting position (in UTF-16 code points, not in words).
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF16CharacterToUnicode">
|
|
<short>
|
|
Converts ordinal values for UTF-16 code points in p to its Unicode equivalent.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
UTF16CharacterToUnicode converts 16-bit values in p to the equivalent Unicode
|
|
value.
|
|
</p>
|
|
<p>
|
|
Unpaired surrogates are invalid in any UTFs. These include any value in the
|
|
range $D800..$DBFF not followed by a value in the range $DC00..$DFFF, or any
|
|
value in the range $DC00..$DFFF not preceded by a value in the range
|
|
$D800..$DBFF.
|
|
</p>
|
|
<p>
|
|
UTF16CharacterToUnicode ensures that ordinal value(s) in the reserved
|
|
range(s) are converted to the correct Unicode value. CharLen is updated to
|
|
reflect whether the values in p are a character represented by a single
|
|
UTF-16 code point (1), or requires 2 code points for the surrogate pair (2).
|
|
It is set to 0 when p contains an invalid UTF-16 code point.
|
|
</p>
|
|
<p>
|
|
The return value contains the Cardinal value for the Unicode code point, or 0
|
|
when p contains an invalid UTF-16 code point.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.Result">
|
|
<short>Unicode code point for the values in p.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.p">
|
|
<short>UTF-16 code points examined and converted in the routine.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.CharLen">
|
|
<short>Number of UTF-16 code points for the converted character.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UnicodeToUTF16">
|
|
<short>
|
|
Converts a Unicode character value to its UTF-16 equivalent as a WideString
|
|
value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Cardinal values below $10000 result in a single WideChar code value for the
|
|
code point. Other cardinal values result in 2 WideChar values in the result to
|
|
represent the UTF-16 code point.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UnicodeToUTF16.Result">
|
|
<short>WideString value with UTF-16 code point the Unicode character.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UnicodeToUTF16.u">
|
|
<short>Unicode character value converted in the routine.</short>
|
|
</element>
|
|
|
|
<element name="IsUTF16CharValid">
|
|
<short></short>
|
|
<descr>
|
|
<p>
|
|
Based on the specification defined by the Unicode consortium, at:
|
|
</p>
|
|
<p>
|
|
<url href="http://unicode.org/faq/utf_bom.html#utf16-7">
|
|
http://unicode.org/faq/utf_bom.html#utf16-7
|
|
</url>
|
|
</p>
|
|
<p>
|
|
Q: Are there any 16-bit values that are invalid?
|
|
</p>
|
|
<p>
|
|
A: Unpaired surrogates are invalid in UTFs. These include any value in the
|
|
range D800 to DBFF not followed by a value in the range DC00 to DFFF, or
|
|
any value in the range DC00 to DFFF not preceded by a value in the range
|
|
D800 to DBFF. [AF]
|
|
</p>
|
|
<p>
|
|
Use ANextChar = #0 to indicate that there is no next char.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.AChar">
|
|
<short/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.ANextChar">
|
|
<short/>
|
|
</element>
|
|
|
|
<element name="IsUTF16StringValid">
|
|
<short>
|
|
Determines if the specified WideString contains valid UTF-16 code points.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Examines the content in AWideStr for valid UTF-16 characters. Calls
|
|
IsUTF16CharValid for consecutive code point pairs.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="IsUTF16StringValid.Result">
|
|
<short>
|
|
True if the specified WideString contains valid UTF-16 code points.
|
|
</short>
|
|
</element>
|
|
<element name="IsUTF16StringValid.AWideStr">
|
|
<short>WideString examined in the routine.</short>
|
|
</element>
|
|
|
|
<element name="Utf16StringReplace">
|
|
<short/>
|
|
<descr>
|
|
<p>
|
|
Same as <var>SysUtil.StringReplace</var> but for WideStrings and
|
|
UnicodeStrings, since it's not available in FPC yet.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.S">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.OldPattern">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.NewPattern">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Flags">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Count">
|
|
<short/>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UnicodeLowercase">
|
|
<short>Converts a Unicode character value to its lowercase equivalent.</short>
|
|
<descr>
|
|
<p>
|
|
Uses internal tables to map Unicode character ranges common to both UTF-16
|
|
and UTF-32.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UnicodeLowercase.Result">
|
|
<short>Cardinal value for the lowercase equivalent of u.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UnicodeLowercase.u">
|
|
<short>Unicode character vale converted to lowercase in the routine.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables">
|
|
<short>
|
|
Converts a UTF-8-encoded string to lowercase Unicode values using internal
|
|
case tables.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables.Result">
|
|
<short>String with the lowercase Unicode values for s.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables.s">
|
|
<short>
|
|
String with UTF-8 values converted to lowercase Unicode in the routine.
|
|
</short>
|
|
</element>
|
|
|
|
</module>
|
|
<!-- lazutf16 -->
|
|
</package>
|
|
</fpdoc-descriptions>
|