lazarus/docs/xml/lazutils/lazutf16.xml

<?xml version="1.0" encoding="UTF-8"?>
<fpdoc-descriptions>
<package name="lazutils">

<!--
====================================================================
lazutf16
====================================================================
-->

<module name="lazutf16">
<short>
Contains routines used for UTF-16 character and string operations.
</short>
<descr>
<p>
<file>lazutf16.pas</file> includes string routines which are based on UTF-16
implementations, although it might also include routines for other encodings.
</p>
<p>
A UTF-16 based implementation for LowerCase, for example, is faster in
WideString and UnicodeString then the default UTF-8 implementation.
</p>
<p>
Currently this unit includes only UTF8LowerCaseViaTables which is based on
a UTF-16 table, but it might be extended to include various UTF-16 routines.
</p>
<p>
<file>lazutf16.pas</file> is part of the <file>LazUtils</file> package.
</p>
</descr>

<!-- function Visibility: default -->
<element name="UTF16CharacterLength">
<short>
Gets the length of the UTF-16 character in the specified PWideChar value.
</short>
<descr>
Uses the endian-ness for the platform. Returns 0, 1, or 2.
</descr>
</element>
<!-- function result Visibility: default -->
<element name="UTF16CharacterLength.Result">
<short>Length of the UTF-16 character in the value, or 0 when Nil.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterLength.p">
<short>PWideChar value examined in the routine.</short>
</element>

<!-- function Visibility: default -->
<element name="UTF16Length">
<short>Gets the length for the specified value in UTF-16 characters.</short>
<descr/>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF16Length.Result">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.s">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.p">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.WordCount">
<short></short>
</element>

<!-- function Visibility: default -->
<element name="UTF16Copy">
<short>
Copies a number of UTF-16 characters at the given character position in the
specified value.
</short>
<descr/>
<seealso/>
</element>
<element name="UTF16Copy.Result">
<short>UnicodeString with the values copied in the routine.</short>
</element>
<element name="UTF16Copy.s">
<short>UnicodeString with the values examined in the routine.</short>
</element>
<element name="UTF16Copy.StartCharIndex">
<short>
1-based staring character (code point) position in the Unicode string.
</short>
</element>
<element name="UTF16Copy.CharCount">
<short>Number of characters (code points) copied in the the routine.</short>
</element>

<element name="UTF16CharStart">
<short/>
<descr/>
<seealso/>
</element>
<element name="UTF16CharStart.Result">
<short/>
</element>
<element name="UTF16CharStart.P">
<short>PWideChar value with the values examined in the routine.</short>
</element>
<element name="UTF16CharStart.Len">
<short>Len is the length in words of P.</short>
</element>
<element name="UTF16CharStart.CharIndex">
<short>
CharIndex is the position of the desired UnicodeChar (starting at 0).
</short>
</element>

<element name="UTF16Pos">
<short>Pos implemented for UTF-16-encoded values.</short>
<descr>
<p>
<var>UTF16Pos</var> is a <var>PtrInt</var> function used to get the character
index in SearchInText where the value in SearchForText is located. StartPos
allows the search to begin at a specific character (code point).
</p>
<p>
The return value is the 1-based UTF-16 character index where the SearchForText
starts in SearchInText, or 0 when not found.
</p>
</descr>
<seealso/>
</element>
<element name="UTF16Pos.Result">
<short>
Character index where the SearchForText starts in SearchInText, or 0 when not
found.
</short>
</element>
<element name="UTF16Pos.SearchForText">
<short>UTF-16-encoded value to locate in SearchInText.</short>
</element>
<element name="UTF16Pos.SearchInText">
<short>UTF-16-encoded value searched in the routine.</short>
</element>
<element name="UTF16Pos.StartPos">
<short>
Optional starting position (in UTF-16 code points, not in words).
</short>
</element>

<element name="UTF16CharacterToUnicode">
<short>
Converts ordinal values for UTF-16 code points in p to its Unicode equivalent.
</short>
<descr>
<p>
UTF16CharacterToUnicode converts 16-bit values in p to the equivalent Unicode
value.
</p>
<p>
Unpaired surrogates are invalid in any UTFs. These include any value in the
range $D800..$DBFF not followed by a value in the range $DC00..$DFFF, or any
value in the range $DC00..$DFFF not preceded by a value in the range
$D800..$DBFF.
</p>
<p>
UTF16CharacterToUnicode ensures that ordinal value(s) in the reserved
range(s) are converted to the correct Unicode value. CharLen is updated to
reflect whether the values in p are a character represented by a single
UTF-16 code point (1), or requires 2 code points for the surrogate pair (2).
It is set to 0 when p contains an invalid UTF-16 code point.
</p>
<p>
The return value contains the Cardinal value for the Unicode code point, or 0
when p contains an invalid UTF-16 code point.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF16CharacterToUnicode.Result">
<short>Unicode code point for the values in p.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterToUnicode.p">
<short>UTF-16 code points examined and converted in the routine.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterToUnicode.CharLen">
<short>Number of UTF-16 code points for the converted character.</short>
</element>

<!-- function Visibility: default -->
<element name="UnicodeToUTF16">
<short>
Converts a Unicode character value to its UTF-16 equivalent as a WideString
value.
</short>
<descr>
<p>
Cardinal values below $10000 result in a single WideChar code value for the
code point. Other cardinal values result in 2 WideChar values in the result to
represent the UTF-16 code point.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UnicodeToUTF16.Result">
<short>WideString value with UTF-16 code point the Unicode character.</short>
</element>
<!-- argument Visibility: default -->
<element name="UnicodeToUTF16.u">
<short>Unicode character value converted in the routine.</short>
</element>

<element name="IsUTF16CharValid">
<short></short>
<descr>
<p>
Based on the specification defined by the Unicode consortium, at:
</p>
<p>
<url href="http://unicode.org/faq/utf_bom.html#utf16-7">
http://unicode.org/faq/utf_bom.html#utf16-7
</url>
</p>
<p>
Q: Are there any 16-bit values that are invalid?
</p>
<p>
A: Unpaired surrogates are invalid in UTFs. These include any value in the
range D800 to DBFF not followed by a value in the range DC00 to DFFF, or
any value in the range DC00 to DFFF not preceded by a value in the range
D800 to DBFF. [AF]
</p>
<p>
Use ANextChar = #0 to indicate that there is no next char.
</p>
</descr>
<seealso/>
</element>
<element name="IsUTF16CharValid.Result">
<short/>
</element>
<element name="IsUTF16CharValid.AChar">
<short/>
</element>
<element name="IsUTF16CharValid.ANextChar">
<short/>
</element>

<element name="IsUTF16StringValid">
<short>
Determines if the specified WideString contains valid UTF-16 code points.
</short>
<descr>
<p>
Examines the content in AWideStr for valid UTF-16 characters. Calls
IsUTF16CharValid for consecutive code point pairs.
</p>
</descr>
<seealso/>
</element>
<element name="IsUTF16StringValid.Result">
<short>
True if the specified WideString contains valid UTF-16 code points.
</short>
</element>
<element name="IsUTF16StringValid.AWideStr">
<short>WideString examined in the routine.</short>
</element>

<element name="Utf16StringReplace">
<short/>
<descr>
<p>
Same as <var>SysUtil.StringReplace</var> but for WideStrings and
UnicodeStrings, since it's not available in FPC yet.
</p>
</descr>
<seealso/>
</element>
<element name="Utf16StringReplace.Result">
<short/>
</element>
<element name="Utf16StringReplace.S">
<short/>
</element>
<element name="Utf16StringReplace.OldPattern">
<short/>
</element>
<element name="Utf16StringReplace.NewPattern">
<short/>
</element>
<element name="Utf16StringReplace.Flags">
<short/>
</element>
<element name="Utf16StringReplace.Count">
<short/>
</element>

<!-- function Visibility: default -->
<element name="UnicodeLowercase">
<short>Converts a Unicode character value to its lowercase equivalent.</short>
<descr>
<p>
Uses internal tables to map Unicode character ranges common to both UTF-16
and UTF-32.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UnicodeLowercase.Result">
<short>Cardinal value for the lowercase equivalent of u.</short>
</element>
<!-- argument Visibility: default -->
<element name="UnicodeLowercase.u">
<short>Unicode character vale converted to lowercase in the routine.</short>
</element>

<!-- function Visibility: default -->
<element name="UTF8LowerCaseViaTables">
<short>
Converts a UTF-8-encoded string to lowercase Unicode values using internal
case tables.
</short>
<descr/>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF8LowerCaseViaTables.Result">
<short>String with the lowercase Unicode values for s.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF8LowerCaseViaTables.s">
<short>
String with UTF-8 values converted to lowercase Unicode in the routine.
</short>
</element>

</module>
<!-- lazutf16 -->
</package>
</fpdoc-descriptions>