mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-08 05:18:25 +02:00
320 lines
9.4 KiB
XML
320 lines
9.4 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<fpdoc-descriptions>
|
|
<package name="lazutils">
|
|
|
|
<!--
|
|
====================================================================
|
|
lazutf16
|
|
====================================================================
|
|
-->
|
|
|
|
<module name="lazutf16">
|
|
<short>Contains routines used for UTF-16 character and string operations.</short>
|
|
<descr>
|
|
<p>
|
|
<file>lazutf16.pas</file> includes string routines which are based on UTF-16
|
|
implementations, although it might also include routines for other encodings.
|
|
</p>
|
|
<p>
|
|
A UTF-16 based implementation for LowerCase, for example, is faster in
|
|
WideString and UnicodeString then the default UTF-8 implementation.
|
|
</p>
|
|
<p>
|
|
Currently this unit includes only UTF8LowerCaseViaTables which is based on
|
|
a UTF-16 table, but it might be extended to include various UTF-16 routines.
|
|
</p>
|
|
<p>
|
|
This file is part of the <file>LazUtils</file> package.
|
|
</p>
|
|
</descr>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16CharacterLength">
|
|
<short>Gets the length of the UTF-16 character in the specified PWideChar value.</short>
|
|
<descr>
|
|
Uses the endian-ness for the platform. Returns 0, 1, or 2.
|
|
</descr>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16CharacterLength.Result">
|
|
<short>Length of the UTF-16 character in the value, or 0 when Nil.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterLength.p">
|
|
<short>PWideChar value examined in the routine.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16Length">
|
|
<short>Gets the length for the specified value in UTF-16 characters.</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16Length.Result">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.s">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.p">
|
|
<short></short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16Length.WordCount">
|
|
<short></short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF16Copy">
|
|
<short>
|
|
Copies a number of UTF-16 characters at the given character position in the specified value.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16Copy.Result">
|
|
<short>UnicodeString with the values copied in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Copy.s">
|
|
<short>UnicodeString with the values examined in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Copy.StartCharIndex">
|
|
<short>1-based staring character (code point) position in the Unicode string.</short>
|
|
</element>
|
|
<element name="UTF16Copy.CharCount">
|
|
<short>Number of characters (code points) copied in the the routine.</short>
|
|
</element>
|
|
|
|
<element name="UTF16CharStart">
|
|
<short/>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16CharStart.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="UTF16CharStart.P">
|
|
<short>PWideChar value with the values examined in the routine.</short>
|
|
</element>
|
|
<element name="UTF16CharStart.Len">
|
|
<short>Len is the length in words of P.</short>
|
|
</element>
|
|
<element name="UTF16CharStart.CharIndex">
|
|
<short>CharIndex is the position of the desired UnicodeChar (starting at 0).</short>
|
|
</element>
|
|
|
|
<element name="UTF16Pos">
|
|
<short>Pos implemented for UTF-16-encoded values.</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF16Pos</var> is a <var>PtrInt</var> function used to get the character
|
|
index in SearchInText where the value in SearchForText is located. StartPos
|
|
allows the search to begin at a specific character (code point).
|
|
</p>
|
|
<p>
|
|
The return value is the 1-based UTF-16 character index where the SearchForText
|
|
starts in SearchInText, or 0 when not found.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF16Pos.Result">
|
|
<short>
|
|
Character index where the SearchForText starts in SearchInText, or 0 when not found.
|
|
</short>
|
|
</element>
|
|
<element name="UTF16Pos.SearchForText">
|
|
<short>UTF-16-encoded value to locate in SearchInText.</short>
|
|
</element>
|
|
<element name="UTF16Pos.SearchInText">
|
|
<short>UTF-16-encoded value searched in the routine.</short>
|
|
</element>
|
|
<element name="UTF16Pos.StartPos">
|
|
<short>Optional starting position (in UTF-16 code points, not in words).</short>
|
|
</element>
|
|
|
|
<element name="UTF16CharacterToUnicode">
|
|
<short>Converts ordinal values for UTF-16 code points in p to its Unicode equivalent.</short>
|
|
<descr>
|
|
<p>
|
|
UTF16CharacterToUnicode converts 16-bit values in p to the equivalent Unicode value.
|
|
</p>
|
|
<p>
|
|
Unpaired surrogates are invalid in any UTFs. These include any value in the range
|
|
$D800..$DBFF not followed by a value in the range $DC00..$DFFF, or any value in
|
|
the range $DC00..$DFFF not preceded by a value in the range $D800..$DBFF.
|
|
</p>
|
|
<p>
|
|
UTF16CharacterToUnicode ensures that ordinal value(s) in the reserved range(s)
|
|
are converted to the correct Unicode value. CharLen is updated to reflect whether
|
|
the values in p are a character represented by a single UTF-16 code point (1), or
|
|
requires 2 code points for the surrogate pair (2). It is set to 0 when p contains an
|
|
invalid UTF-16 code point.
|
|
</p>
|
|
<p>
|
|
The return value contains the Cardinal value for the Unicode code point, or 0 when p
|
|
contains an invalid UTF-16 code point.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.Result">
|
|
<short>Unicode code point for the values in p.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.p">
|
|
<short>UTF-16 code points examined and converted in the routine.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF16CharacterToUnicode.CharLen">
|
|
<short>Number of UTF-16 code points for the converted character.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UnicodeToUTF16">
|
|
<short>Converts a Unicode character value to its UTF-16 equivalent as a WideString value.</short>
|
|
<descr>
|
|
<p>
|
|
Cardinal values below $10000 result in a single WideChar code value for the
|
|
code point. Other cardinal values result in 2 WideChar values in the result to
|
|
represent the UTF-16 code point.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UnicodeToUTF16.Result">
|
|
<short>WideString value with UTF-16 code point the Unicode character.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UnicodeToUTF16.u">
|
|
<short>Unicode character value converted in the routine.</short>
|
|
</element>
|
|
|
|
<element name="IsUTF16CharValid">
|
|
<short></short>
|
|
<descr>
|
|
<p>
|
|
Based on the specification defined by the Unicode consortium, at:
|
|
</p>
|
|
<p>
|
|
<url href="http://unicode.org/faq/utf_bom.html#utf16-7">
|
|
http://unicode.org/faq/utf_bom.html#utf16-7
|
|
</url>
|
|
</p>
|
|
<p>
|
|
Q: Are there any 16-bit values that are invalid?
|
|
</p>
|
|
<p>
|
|
A: Unpaired surrogates are invalid in UTFs. These include any value in the
|
|
range D800 to DBFF not followed by a value in the range DC00 to DFFF, or
|
|
any value in the range DC00 to DFFF not preceded by a value in the range
|
|
D800 to DBFF. [AF]
|
|
</p>
|
|
<p>
|
|
Use ANextChar = #0 to indicate that there is no next char.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.AChar">
|
|
<short/>
|
|
</element>
|
|
<element name="IsUTF16CharValid.ANextChar">
|
|
<short/>
|
|
</element>
|
|
|
|
<element name="IsUTF16StringValid">
|
|
<short>Determines if the specified WideString contains valid UTF-16 code points.</short>
|
|
<descr>
|
|
<p>
|
|
Examines the content in AWideStr for valid UTF-16 characters. Calls
|
|
IsUTF16CharValid for consecutive code point pairs.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="IsUTF16StringValid.Result">
|
|
<short>True if the specified WideString contains valid UTF-16 code points.</short>
|
|
</element>
|
|
<element name="IsUTF16StringValid.AWideStr">
|
|
<short>WideString examined in the routine.</short>
|
|
</element>
|
|
|
|
<element name="Utf16StringReplace">
|
|
<short/>
|
|
<descr>
|
|
<p>
|
|
Same as <var>SysUtil.StringReplace</var> but for WideStrings and UnicodeStrings,
|
|
since it's not available in FPC yet.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Result">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.S">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.OldPattern">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.NewPattern">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Flags">
|
|
<short/>
|
|
</element>
|
|
<element name="Utf16StringReplace.Count">
|
|
<short/>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UnicodeLowercase">
|
|
<short>Converts a Unicode character value to its lowercase equivalent.</short>
|
|
<descr>
|
|
<p>
|
|
Uses internal tables to map Unicode character ranges common to both UTF-16 and UTF-32.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UnicodeLowercase.Result">
|
|
<short>Cardinal value for the lowercase equivalent of u.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UnicodeLowercase.u">
|
|
<short>Unicode character vale converted to lowercase in the routine.</short>
|
|
</element>
|
|
|
|
<!-- function Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables">
|
|
<short>
|
|
Converts a UTF-8-encoded string to lowercase Unicode values using internal case tables.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<!-- function result Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables.Result">
|
|
<short>String with the lowercase Unicode values for s.</short>
|
|
</element>
|
|
<!-- argument Visibility: default -->
|
|
<element name="UTF8LowerCaseViaTables.s">
|
|
<short>String with UTF-8 values converted to lowercase Unicode in the routine.</short>
|
|
</element>
|
|
|
|
</module>
|
|
<!-- lazutf16 -->
|
|
</package>
|
|
</fpdoc-descriptions>
|