Docs: LazUtils/lazutf8. Adds UTF8CodepointCount topics for changes in c8a1f93a,

2025-12-05 05:17:25 +01:00 · 2024-08-29 17:01:04 +01:00 · 2024-08-29 17:01:04 +01:00 · 108411ee2d
commit 108411ee2d
parent 589b5c0aa8
1 changed files with 64 additions and 0 deletions
--- a/docs/xml/lazutils/lazutf8.xml
+++ b/docs/xml/lazutils/lazutf8.xml
@ -632,6 +632,70 @@ Even faster UTF-8 character counting
 <short>Number of byte values in the UTF-8-encoded string.</short>
 </element>

+<element name="UTF8CodepointCount">
+<short>
+Gets the number of UTF-8-encoded codepoints in the specified value.
+</short>
+<descr>
+<p>
+<var>UTF8CodepointCount</var> is an overloaded <var>PtrInt</var> function used 
+to determine the number of UTF-8 codepoints found in the specified value. The 
+overloaded variants allow the value to be specified using either the String or 
+the PChar type.
+</p>
+<p>
+UTF8CodepointCount iterates over the byte values in the s or p arguments, and 
+increments the return value when a valid UTF-8 codepoint is found. Valid 
+codepoints include those represented by combining character combinations. 
+UTF8CodepointLen (in system.pp) is called to the get the size for each 
+of the UTF-8 codepoints. The process is repeated until all of the bytes in the 
+input value have been examined, or a codepoint with a length of zero (0) is encountered.
+</p>
+<p>
+The return value is zero (0) if the s or p arguments are empty, or when the 
+ByteCount argument is zero (0).
+</p>
+</descr>
+<version>
+Added in LazUtils version 4.0. (c8a1f93a)
+</version>
+<notes>
+<note>
+I wrote a test application to compare the results for UTF8Length and 
+UTF8CodepointCount. They return exactly the same values for the UTF-8 strings I 
+cribbed from the Unicode web site. 
+So basically, why is this routine needed?
+</note>
+</notes>
+<seealso>
+<link id="UTF8CodepointSize"/>
+<link id="UTF8Length"/>
+<link id="UTF8LengthFast"/>
+<link id="UTF8CharacterLength"/>
+<link id="#rtl.system.UTF8CodepointLen">UTF8CodepointLen</link>
+</seealso>
+</element>
+<element name="UTF8CodepointCount.Result">
+<short>
+Pointer to the Integer value with the number of codepoints including combining characters.
+</short>
+</element>
+<element name="UTF8CodepointCount.s">
+<short>
+String with the codepoints examined in the routine.
+</short>
+</element>
+<element name="UTF8CodepointCount.p">
+<short>
+PChar type with the codepoints examined in the routine.
+</short>
+</element>
+<element name="UTF8CodepointCount.ByteCount">
+<short>
+Number of bytes in the PChar value.
+</short>
+</element>
+
 <element name="UTF8CodepointToUnicode">
 <short>
 Converts a UTF-8-encoded character to its unique Unicode U+XXXX character