mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-06 00:58:04 +02:00
3213 lines
96 KiB
XML
3213 lines
96 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
|
|
Documentation for LCL (Lazarus Component Library) and LazUtils (Lazarus
|
|
Utilities) are published under the Creative Commons Attribution-ShareAlike 4.0
|
|
International public license.
|
|
|
|
https://creativecommons.org/licenses/by-sa/4.0/legalcode.txt
|
|
https://gitlab.com/freepascal.org/lazarus/lazarus/-/blob/main/docs/cc-by-sa-4-0.txt
|
|
|
|
Copyright (c) 1997-2025, by the Lazarus Development Team.
|
|
|
|
-->
|
|
<fpdoc-descriptions>
|
|
<package name="lazutils">
|
|
<!--
|
|
====================================================================
|
|
LazUTF8
|
|
====================================================================
|
|
-->
|
|
<module name="LazUTF8">
|
|
<short>
|
|
Routines for managing UTF-8-encoded strings.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<file>lazutf8.pas</file> contains useful routines for managing UTF-8-encoded
|
|
strings. All routines are thread-safe unless explicitly stated.
|
|
</p>
|
|
<p>
|
|
<file>lazutf8.pas</file> is part of the <file>LazUtils</file> package.
|
|
</p>
|
|
</descr>
|
|
|
|
<!-- unresolved externals -->
|
|
<element name="cwstring"/>
|
|
<element name="FPCAdds"/>
|
|
<element name="Windows"/>
|
|
<element name="Classes"/>
|
|
<element name="SysUtils"/>
|
|
<element name="StrUtils"/>
|
|
|
|
<element name="NeedRTLAnsi">
|
|
<short>
|
|
Indicates if the OS requires use of AnsiToUTF8 and UTF8ToAnsi for the RTL.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>NeedRTLAnsi</var> is a <var>Boolean</var> function that indicates if the
|
|
OS requires use of <var>AnsiToUTF8</var> and <var>UTF8ToAnsi</var> for the
|
|
RTL. AnsiToUTF8 and UTF8ToAnsi need a widestring manager under Linux, BSD,
|
|
and Mac OSX. Normally these OS's use UTF-8 as the system encoding so the
|
|
<var>WideStringManager</var> is not needed.
|
|
</p>
|
|
<p>
|
|
For the Windows environment, NeedRTLAnsi is <b>True</b> if the default system
|
|
code page is not <var>CP_UTF8</var>. For UNIX-like environments, NeedRTLAnsi
|
|
is <b>True</b> when any of the <b>LC_ALL</b>, <b>LC_MESSAGES</b>, or
|
|
<b>LANG</b> environment variables contain a language code other than UTF-8.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="#rtl.system.DefaultSystemCodePage">DefaultSystemCodePage</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="NeedRTLAnsi.Result">
|
|
<short>True when the system encoding is not UTF-8.</short>
|
|
</element>
|
|
|
|
<element name="SetNeedRTLAnsi">
|
|
<short>Sets the value for the unit global variable.</short>
|
|
<descr></descr>
|
|
<seealso>
|
|
<link id="NeedRTLAnsi"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="SetNeedRTLAnsi.NewValue">
|
|
<short>New value for the variable.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToSys">
|
|
<short>
|
|
Ensures UTF-8 characters (or format settings) are converted to the system
|
|
code page.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ToSys</var> is an overloaded function used to convert the specified
|
|
string value (or format settings) to the system codepage for the platform.
|
|
UTF8ToSys works like <var>UTF8ToAnsi</var>, but is more independent of
|
|
WideStringManager. For platforms where UTF8_RTL is not defined, and
|
|
NeedRTLAnsi returns <b>True</b>, UTF8ToAnsi is called to convert non-ASCII
|
|
values in <var>s</var>. For platforms where UTF8_RTL is defined, the value in
|
|
s is used without modification.
|
|
</p>
|
|
<p>
|
|
An overloaded variant of the function handles <var>TFormatSettings</var> for
|
|
the platform. The return value for the function is the specified values in
|
|
<var>AFormatSettings</var> after being updated to reflect the system codepage
|
|
for the platform. For platforms where UTF8_RTL is not defined, the values in
|
|
the following format settings are updated: <var>CurrencyString</var>,
|
|
<var>LongMonthNames</var>, <var>ShortMonthNames</var>,
|
|
<var>LongDayNames</var>, and <var>ShortDayNames</var>.
|
|
</p>
|
|
<p>
|
|
No actions are needed for platforms where UTF8_RTL is defined.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="#rtl.system.Utf8ToAnsi">Utf8ToAnsi</link>
|
|
<link id="#rtl.sysutils.TFormatSettings">TFormatSettings</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8ToSys.Result">
|
|
<short>Value for the string after conversion.</short>
|
|
</element>
|
|
<element name="UTF8ToSys.s">
|
|
<short>Value to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8ToSys.AFormatSettings">
|
|
<short>Format settings to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="SysToUTF8">
|
|
<short>
|
|
Converts strings (and format settings) from the system codepage to UTF-8.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>SysToUTF8</var> is an overloaded function used to convert strings (and
|
|
format settings) from the system codepage to UTF-8. SysToUTF8 works like
|
|
<var>AnsiToUTF8</var>, but has no reliance on the widestring manager on
|
|
platforms where UTF8_RTL is defined. For platforms where UTF8_RTL is not
|
|
defined, and NeedRTLAnsi contains <b>True</b>, non-ASCII values are converted
|
|
to UTF-8 by calling <var>AnsiToUTF8</var>.
|
|
</p>
|
|
<p>
|
|
An overloaded variant of the function handles <var>TFormatSettings</var> for
|
|
the platform. The return value for the function is the values specified in
|
|
AFormatSettings after conversion from the system codepage to UTF-8. The
|
|
values in the following format settings are updated:
|
|
<var>CurrencyString</var>, <var>LongMonthNames</var>,
|
|
<var>ShortMonthNames</var>, <var>LongDayNames</var>, and
|
|
<var>ShortDayNames</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="#rtl.system.AnsiToUTF8">AnsiToUTF8</link>
|
|
<link id="#rtl.sysutils.TFormatSettings">TFormatSettings</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="SysToUTF8.Result">
|
|
<short>Values after conversion to UTF-8.</short>
|
|
</element>
|
|
<element name="SysToUTF8.s">
|
|
<short>Values to examine in the function.</short>
|
|
</element>
|
|
<element name="SysToUTF8.AFormatSettings">
|
|
<short>Format settings to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="ConsoleToUTF8">
|
|
<short>
|
|
Converts an OEM-encoded string to UTF8.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>ConsoleToUTF8</var> is a <var>String</var> function used to converts an
|
|
OEM-encoded string to UTF8. The implementation of ConsoleToUTF8 is
|
|
OS-specific, and essentially handles differences between various Windows
|
|
platforms where use of <var>OemToChar</var> and <var>WinCPToUTF8</var> are
|
|
required. For UNIX-like environments, the value in s is converted by calling
|
|
<var>SysToUTF8</var>.
|
|
</p>
|
|
<p>
|
|
ConsoleToUTF8 is used in the implementation of the
|
|
<var>GetEnvironmentStringUTF8</var> and <var>GetEnvironmentVariableUTF8</var>
|
|
functions.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="SysToUTF8"/>
|
|
<link id="WinCPToUTF8"/>
|
|
<link id="GetEnvironmentStringUTF8"/>
|
|
<link id="GetEnvironmentVariableUTF8"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="ConsoleToUTF8.Result">
|
|
<short>UTF-8-encoded value for the specified string.</short>
|
|
</element>
|
|
<element name="ConsoleToUTF8.s">
|
|
<short>Value to convert in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToConsole">
|
|
<short>
|
|
Converts a UTF-8-encoded string to console (OEM) encoding.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ToConsole</var> converts a UTF-8-encoded string to console (OEM)
|
|
encoding as used in <var>Write</var> and <var>WriteLn</var>. The
|
|
implementation is platform specific.
|
|
</p>
|
|
<p>
|
|
For the Windows environment, either <var>UTF8ToSys</var> or
|
|
<var>UTF8ToWinCP</var> is used to convert the value to the codepage or
|
|
character set needed in RTL. The Windows <var>CharToOem</var> API is used to
|
|
prepare the return value. In UNIX-like environments, <var>UTF8ToSys</var> is
|
|
used to get the return value .
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8ToSys"/>
|
|
<link id="UTF8ToWinCP"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8ToConsole.Result">
|
|
<short>OEM-encoded value for the string.</short>
|
|
</element>
|
|
<element name="UTF8ToConsole.s">
|
|
<short>UTF-8-encode input values.</short>
|
|
</element>
|
|
|
|
<element name="WinCPToUTF8">
|
|
<short>
|
|
Converts the string from Windows code page to UTF-8.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Converts the string from Windows code page to UTF-8. Used with some
|
|
Windows-specific functions. For all Windows versions supporting 8-bit
|
|
codepages (but not WinCE).
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="WinCPToUTF8.Result">
|
|
<short>UTF-8-encoded values for the string.</short>
|
|
</element>
|
|
<element name="WinCPToUTF8.s">
|
|
<short>Input values in Windows codepage encoding.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToWinCP">
|
|
<short>
|
|
Converts the UTF-8-encoded string to the Windows code page encoding.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Converts the UTF-8-encoded string to the Windows code page encoding Used by
|
|
<var>Write</var> and <var>WriteLn</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8ToWinCP.Result">
|
|
<short>UTF-8-encoded input values.</short>
|
|
</element>
|
|
<element name="UTF8ToWinCP.s">
|
|
<short>Values in the Windows codepage encoding.</short>
|
|
</element>
|
|
|
|
<element name="ParamStrUTF8">
|
|
<short>
|
|
Gets the specified command line parameter and converts it to a UTF-8-encoded
|
|
string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>ParamStrUTF8</var> is a <var>String</var> function used to convert the
|
|
specified command line parameter to a UTF-8-encoded string. The
|
|
implementation for ParamStrUTF8 is OS- or platform-specific. For UNIX-like
|
|
environments, SysToUTF8 is called to convert the value for the command line
|
|
parameter at the position in Param. For Windows platforms, the stub for the
|
|
Ansi or WideString version of ParamStrUTF8 is called. ParamStrUTF8 is the
|
|
UTF-8-enabled counterpart to the ParamStr routine in RTL.
|
|
</p>
|
|
<p>
|
|
<var>Index</var> is the ordinal position for the requested parameter value.
|
|
Index should be in the range <b>0..<var>ParamCount</var></b>. Values in Index
|
|
outside this range cause an empty string ('') to be returned for a parameter
|
|
value.
|
|
</p>
|
|
<p>
|
|
In most cases, the parameter at position <b>0</b> contains the name and
|
|
optional path to the executable file for the application. For cross-platform
|
|
compatibility, use the ExeName property in TCustomApplication to get the path
|
|
and name for the binary instead. Subsequent index positions contain any command
|
|
line arguments passed to the executable.
|
|
</p>
|
|
<p>
|
|
The return value contains the UTF-8-encoded string with the value for the
|
|
parameter at the specified position, or an empty string when not present.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="SysToUTF8"/>
|
|
<link id="#fcl.custapp.TCustomApplication.ExeName">TCustomApplication.ExeName</link>
|
|
<link id="#rtl.system.ParamStr">ParamStr</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="ParamStrUTF8.Result">
|
|
<short>
|
|
UTF-8-encoded value for the specified command line parameter.
|
|
</short>
|
|
</element>
|
|
<element name="ParamStrUTF8.Param">
|
|
<short>
|
|
Ordinal position for the command line parameter retrieved in the routine.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="GetFormatSettingsUTF8">
|
|
<short>
|
|
Gets the TFormatSettings for the platform.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>GetFormatSettingsUTF8</var> is a procedure used to get the
|
|
<var>TFormatSettings</var> for the Locale or Language Code for the platform.
|
|
GetFormatSettingsUTF8 is defined for Windows environments only, and calls
|
|
<var>GetLocaleFormatSettingsUTF8</var> using the ThreadLocale or Language
|
|
Code ID needed for the platform.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="GetFormatSettingsUTF8"/>
|
|
<link id="GetLocaleFormatSettingsUTF8"/>
|
|
</seealso>
|
|
</element>
|
|
|
|
<element name="GetLocaleFormatSettingsUTF8">
|
|
<short>
|
|
Gets format settings for a specific Language Code ID.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>GetLocaleFormatSettingsUTF8</var> is a procedure used to get the
|
|
<var>TFormatSettings</var> for the Locale or Language Code for the platform.
|
|
GetLocaleFormatSettingsUTF8 is defined for Windows environments only.
|
|
</p>
|
|
<p>
|
|
GetLocaleFormatSettingsUTF8 ensures that values in the format settings use
|
|
the Language Code ID for the platform. The following format settings are
|
|
converted to their Locale-specific values:
|
|
</p>
|
|
<ul>
|
|
<li>ShortMonthNames</li>
|
|
<li>LongMonthNames</li>
|
|
<li>ShortDayName</li>
|
|
<li>LongDayName</li>
|
|
<li>DateSeparator</li>
|
|
<li>ShortDateFormat</li>
|
|
<li>LongDateFormat</li>
|
|
<li>TimeSeparator</li>
|
|
<li>TimeAMString</li>
|
|
<li>TimePMString</li>
|
|
<li>ShortTimeFormat</li>
|
|
<li>LongTimeFormat</li>
|
|
<li>CurrencyString</li>
|
|
<li>CurrencyFormat</li>
|
|
<li>NegCurrFormat</li>
|
|
<li>ThousandSeparator</li>
|
|
<li>DecimalSeparator</li>
|
|
<li>CurrencyDecimals</li>
|
|
<li>ListSeparator</li>
|
|
</ul>
|
|
<p>
|
|
In LCL version 3.0 or higher, LongTimeFormat and ShortTimeFormat can contain
|
|
AM / PM format specifiers; i. e. 'hh:nn:ss AMPM'
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Modified in LCL version 3.0 to return 12-hour time formats using AM / PM in
|
|
format settings.
|
|
</version>
|
|
<seealso>
|
|
<link id="#rtl.sysutils.TFormatSettings">TFormatSettings</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="GetLocaleFormatSettingsUTF8.LCID">
|
|
<short>Language Code ID.</short>
|
|
</element>
|
|
<element name="GetLocaleFormatSettingsUTF8.AFormatSettings">
|
|
<short>The locale-specific format settings for the platform.</short>
|
|
</element>
|
|
|
|
<element name="GetEnvironmentVariableCountUTF8">
|
|
<short>
|
|
Returns the number of system environment variables.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Returns the number of UTF-8-encoded system environment variables. Used
|
|
together with <var>GetEnvironmentStringUTF8</var>.
|
|
</p>
|
|
</descr>
|
|
</element>
|
|
<element name="GetEnvironmentVariableCountUTF8.Result">
|
|
<short>Number of variables in the system environment.</short>
|
|
</element>
|
|
|
|
<element name="GetEnvironmentStringUTF8">
|
|
<short>
|
|
Returns a system environment string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Returns a UTF-8-encoded system environment string stored at the specified
|
|
position. The value in <var>Index</var> is in the range
|
|
1..GetEnvironmentVariableCountUTF8. For Unix and Windows the string normally
|
|
is in the form 'name=value'. Beware that Windows knows some special formats,
|
|
e.g. '=C:=SomePath'. Nota bene: Raymond Chen called these "bookkeeping
|
|
variables" which emulate the MS-DOS tracking mechanism for the current
|
|
directory on different drives.
|
|
</p>
|
|
<p>
|
|
Use <var>GetEnvironmentVariableUTF8</var> to lookup values for environment
|
|
variables by name.
|
|
</p>
|
|
</descr>
|
|
</element>
|
|
<element name="GetEnvironmentStringUTF8.Result">
|
|
<short>Value for the environment variable at the specified position.</short>
|
|
</element>
|
|
<element name="GetEnvironmentStringUTF8.Index">
|
|
<short>Position for the environment variable.</short>
|
|
</element>
|
|
|
|
<element name="GetEnvironmentVariableUTF8">
|
|
<short>
|
|
Returns the value of a system environment variable.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Returns the value of an environment variable stored in the form
|
|
'EnvVar=value'. See <var>GetEnvironmentStringUTF8</var> to retrieve the whole
|
|
list of environment strings.
|
|
</p>
|
|
</descr>
|
|
</element>
|
|
<element name="GetEnvironmentVariableUTF8.Result">
|
|
<short>Value for the specified environment variable name.</short>
|
|
</element>
|
|
<element name="GetEnvironmentVariableUTF8.EnvVar">
|
|
<short>Environment variable with the value retrieved in the routine.</short>
|
|
</element>
|
|
|
|
<element name="SysErrorMessageUTF8">
|
|
<short>
|
|
Gets the UTF-8-encoded system error message for the specified error code.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>SysErrorMessageUTF8</var> is used to get the UTF-8-encoded system error
|
|
message for the specified error code. SysErrorMessageUTF8 calls the
|
|
<var>SysUtils.SysErrorMessage</var> function and converts the error message
|
|
using <var>SysToUTF8</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="SysErrorMessageUTF8.Result">
|
|
<short>UTF-8-encoded value for the system error message.</short>
|
|
</element>
|
|
<element name="SysErrorMessageUTF8.ErrorCode">
|
|
<short>Numeric system error code for the message.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointSize">
|
|
<short>
|
|
Returns the size of the UTF-8 codepoint in bytes.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Returns the size of the UTF-8 codepoint in bytes. The return value is for a
|
|
single codepoint.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CodepointSize.Result">
|
|
<short>Number of bytes for the codepoint.</short>
|
|
</element>
|
|
<element name="UTF8CodepointSize.p">
|
|
<short>UTF-8-encoded value to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointSizeFast">
|
|
<short>
|
|
Fast version of UTF8CodepointSize.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Fast version of <var>UTF8CodepointSize</var>. Assumes the UTF-8 codepoint is
|
|
valid. The return value is for a single codepoint.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CodepointSizeFast.Result">
|
|
<short>Number of bytes for the codepoint.</short>
|
|
</element>
|
|
<element name="UTF8CodepointSizeFast.p">
|
|
<short>Encoded values to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CharacterLength">
|
|
<short>
|
|
Returns the number of bytes needed for the UTF-8 codepoint starting at p.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use UTF8CodepointSize instead.
|
|
</remark>
|
|
<p>
|
|
It returns 0 if p is nil. It returns 1 if p is a 1-byte UTF-8 codepoint or p
|
|
is an invalid UTF-8 sequence. Otherwise it returns a number 2..4. It does not
|
|
check for malicious codepoints like #$c0#$80, nor for undefined codepoints
|
|
like #$f3#$a0#$87#$b9. Use UTF8CharacterLength to step through a string with
|
|
a simple loop:
|
|
</p>
|
|
<code>
|
|
while p^ <> #0 do
|
|
begin
|
|
inc(p, UTF8CharacterLength(p));
|
|
end;
|
|
</code>
|
|
<p>
|
|
Even if p contains invalid UTF-8 codepoints it will run through the string
|
|
without overflow.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CharacterStrictLength">UTF8CharacterStrictLength</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8CharacterLength.Result">
|
|
<short>Number of bytes required for the UTF-8 codepoint, or 0 (zero).</short>
|
|
</element>
|
|
<element name="UTF8CharacterLength.p">
|
|
<short>Pointer to the value examined in the routine.</short>
|
|
</element>
|
|
|
|
<element name="UTF8Length">
|
|
<short>
|
|
Gets the length of a UTF-8-encoded string in codepoints.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8Length</var> is a function used to get the character length for the
|
|
specified UTF-8-encoded string. The return value contains the number of
|
|
UTF-8-encoded characters (or codepoints) found in the byte values for the
|
|
string.
|
|
</p>
|
|
<p>
|
|
An overloaded variant of the function is provided which uses the
|
|
<var>PChar</var> type to specify the byte values in the string. Internally,
|
|
the String variant casts its value a PChar type and calls the overloaded
|
|
variant.
|
|
</p>
|
|
<p>
|
|
UTF8Length iterates over the bytes in the UTF-8-encoded string data, and
|
|
calls UTF8CodepointSize to determine the number of bytes needed for each
|
|
codepoint. Use UTF8LengthFast for a version of the routine optimized for
|
|
speed.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CodepointSize"/>
|
|
<link id="UTF8LengthFast"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8Length.Result">
|
|
<short>Number of codepoints in the byte values for the string.</short>
|
|
</element>
|
|
<element name="UTF8Length.s">
|
|
<short>UTF-8-encoded string to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8Length.p">
|
|
<short>Pointer to the UTF-8-encoded string to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8Length.ByteCount">
|
|
<short>Number of byte values in the UTF-8-encoded string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8LengthFast">
|
|
<short>
|
|
Fast version of UTF8Length.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8LengthFast</var> is an overloaded <var>PtrInt</var> function used to
|
|
get the length of a UTF-8-encoded string in codepoints. UTF8LengthFast is the
|
|
fast version of <var>UTF8Length</var>. It does not call the UTF8CodepointSize
|
|
function. The UTF-8-encoded string data is assumed to be valid. The native
|
|
data size for the CPU is used to process blocks of UTF-8-encoded data. For a
|
|
64-bit CPU, this means that 8 bytes are read and processed at once.
|
|
</p>
|
|
<p>
|
|
The overloaded variants allow the UTF-8-encoded data to be specified as
|
|
either a String type, or a null-terminated PChar type. Internally, the
|
|
String-based variant casts its data to a PChar type and calls the overloaded
|
|
variant.
|
|
</p>
|
|
<p>
|
|
UTF8LengthFast is a Free Pascal implementation of the C routine provided by
|
|
Colin Percival:
|
|
</p>
|
|
<p>
|
|
<url
|
|
href="https://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html">
|
|
Even faster UTF-8 character counting
|
|
</url>
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8Length"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8LengthFast.Result">
|
|
<short>Number of codepoints in the string.</short>
|
|
</element>
|
|
<element name="UTF8LengthFast.s">
|
|
<short>String with UTF-8-encoded values.</short>
|
|
</element>
|
|
<element name="UTF8LengthFast.p">
|
|
<short>Pointer to the String with UTF-8-encoded values.</short>
|
|
</element>
|
|
<element name="UTF8LengthFast.ByteCount">
|
|
<short>Number of byte values in the UTF-8-encoded string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointCount">
|
|
<short>
|
|
Gets the number of valid UTF-8 codepoints in the specified value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CodepointCount</var> is an overloaded <var>SizeInt</var> function used
|
|
to determine the number of UTF-8 codepoints found in the specified value. It is
|
|
similar to the UTF8Length routine, but excludes any invalid codepoints found in
|
|
the input value from the count in the return value. The overloaded variants
|
|
allow the input value to be specified using either the String or the PChar type.
|
|
</p>
|
|
<p>
|
|
UTF8CodepointCount iterates over the byte values in the s or p arguments, and
|
|
increments the return value when a valid UTF-8 codepoint is found.
|
|
UTF8CodepointLen (in system.pp) is called to the get the size for each of the
|
|
UTF-8 codepoints. Valid codepoints include those represented using combining
|
|
characters. The process is repeated until all of the bytes in the input value
|
|
have been examined, or until a codepoint with a length of zero (0) is
|
|
encountered.
|
|
</p>
|
|
<p>
|
|
The return value is zero (0) if the s or p arguments are empty, or when the
|
|
ByteCount argument is zero (0).
|
|
</p>
|
|
<p>
|
|
For example:
|
|
</p>
|
|
<code>
|
|
// var
|
|
// Utf8Str, InvalidUtf8Str: String;
|
|
// Cnt, Len: Integer;
|
|
|
|
{A macron (decomposed)}
|
|
Utf8Str := 'A' + #$CC#$84;
|
|
|
|
{invalid single byte UTF-8}
|
|
InvalidUtf8Str := #$C0#$C1#$F5#$F6#$F7#$F8#$F9#$FA#$FB#$FC#$FD#$FE#$FF;
|
|
|
|
Cnt := UTF8CodePointCount(Utf8Str); // Cnt = 2
|
|
Len := UTF8Length(Utf8Str); // Len = 2
|
|
|
|
Cnt := UTF8CodePointCount(InvalidUtf8Str); // Cnt = 0
|
|
Len := UTF8Length(InvalidUtf8Str); // Len = 13
|
|
|
|
Cnt := UTF8CodePointCount(InvalidUtf8Str + Utf8Str); // Cnt = 2
|
|
Len := UTF8Length(InvalidUtf8Str + Utf8Str); // Len = 15
|
|
</code>
|
|
</descr>
|
|
<version>
|
|
Added in LazUtils version 4.0.
|
|
</version>
|
|
<seealso>
|
|
<link id="UTF8Length"/>
|
|
<link id="UTF8CodepointSize"/>
|
|
<link id="UTF8LengthFast"/>
|
|
<link id="UTF8CharacterLength"/>
|
|
<link id="#rtl.system.UTF8CodepointLen">UTF8CodepointLen</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8CodepointCount.Result">
|
|
<short>
|
|
Integer value with the number of valid codepoints including combining
|
|
characters.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointCount.s">
|
|
<short>
|
|
String with the codepoints examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointCount.p">
|
|
<short>
|
|
PChar type with the codepoints examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointCount.ByteCount">
|
|
<short>
|
|
Number of bytes in the PChar value.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointToUnicode">
|
|
<short>
|
|
Converts a UTF-8-encoded character to its unique Unicode U+XXXX character
|
|
value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CodepointToUnicode</var> is a <var>Cardinal</var> function used to
|
|
convert a UTF-8-encoded character to its representation as a unique Unicode
|
|
U+XXXX hexadecimal character value. For example: The letter 'A' (Decimal 65)
|
|
is expressed in Unicode as U+0041.
|
|
</p>
|
|
<p>
|
|
<var>CodepointLen</var> is an output variable used to store the number of
|
|
UTF-8-encoded bytes needed for the codepoint. It will normally contain a
|
|
value in the range 1..4 (the number of possible bytes used in the UTF-8
|
|
encoding scheme). It can contain 0 (zero) when p is an empty PChar value.
|
|
</p>
|
|
<p>
|
|
The return value for the function contains the hexadecimal Unicode character
|
|
value as a Cardinal data type. It can contain 0 (zero) when the value in p is
|
|
not a valid UTF-8-encoded character.
|
|
</p>
|
|
<p>
|
|
Use <var>UTF8FixBroken</var> to fix invalid UTF-8 encoding in the string.
|
|
</p>
|
|
<p>
|
|
Use UnicodeToUTF8 to convert a Unicode character value to its UTF-8-encoded
|
|
value.
|
|
</p>
|
|
<remark>
|
|
UTF8CodepointToUnicode does not check whether the codepoint is actually
|
|
defined in Unicode tables.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CodepointToUnicode.Result">
|
|
<short>Unicode character value for the UTF-8 character.</short>
|
|
</element>
|
|
<element name="UTF8CodepointToUnicode.p">
|
|
<short>The UTF-8-encode string value.</short>
|
|
</element>
|
|
<element name="UTF8CodepointToUnicode.CodepointLen">
|
|
<short>Number of bytes needed for the codepoint.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CharacterToUnicode">
|
|
<short>
|
|
Returns the codepoint at p and the number of bytes to skip.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use Use UTF8CodepointToUnicode instead.
|
|
</remark>
|
|
<p>
|
|
If p=nil then CharLen and result are 0 otherwise CharLen>0. If there is an
|
|
encoding error the Result is 0 and CharLen=1 to skip forward. It is safe to
|
|
do:
|
|
</p>
|
|
<code>
|
|
var
|
|
s: string;
|
|
p:=1;
|
|
while p <= length(s) do
|
|
begin
|
|
UTF8CharacterToUnicode(@s[p], CharLen);
|
|
inc(p, CharLen);
|
|
end;
|
|
</code>
|
|
<p>
|
|
For speed reasons, this function only checks for 1, 2, 3, or 4 byte encoding
|
|
errors. It does not check whether the codepoint is defined in the Unicode
|
|
table.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CharacterToUnicode.Result"/>
|
|
<element name="UTF8CharacterToUnicode.p"/>
|
|
<element name="UTF8CharacterToUnicode.CharLen"/>
|
|
|
|
<element name="UnicodeToUTF8">
|
|
<short>
|
|
Encodes the given code point as an UTF-8 sequence of 1 to 4 bytes.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UnicodeToUTF8</var> is an <var>Integer</var> function used to convert
|
|
the Unicode character value in CodePoint to the sequence of bytes needed for
|
|
the UTF-8 encoding. UnicodeToUTF8 stores the UTF-8-encoded byte values for
|
|
the Unicode character in the <var>Buf</var> parameter.
|
|
</p>
|
|
<p>
|
|
The return value contains the number of bytes required for the UTF-8-encoded
|
|
value (in the range 1..4). If it contains 0 (zero), the Unicode codepoint was
|
|
invalid and an <var>Exception</var> is raised.
|
|
</p>
|
|
<remark>
|
|
UnicodeToUTF8 does not process #0 byte values for the codepoint, as done for
|
|
UTF-32.
|
|
</remark>
|
|
</descr>
|
|
<errors>
|
|
<p>
|
|
Raises an <var>Exception</var> when Utf8TryFindCodepointStartCodePoint is an
|
|
invalid Unicode character value. Raised with the message 'UnicodeToUTF8:
|
|
invalid Unicode: XXXXXXXX'.
|
|
</p>
|
|
</errors>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UnicodeToUTF8.Result">
|
|
<short>Number of bytes needed for the UTF-8-encoded value.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8.Codepoint">
|
|
<short>Unicode character value to convert in the function.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8.Buf">
|
|
<short>Stores the UTF-8-encoded byte values for the codepoint.</short>
|
|
</element>
|
|
|
|
<element name="UnicodeToUTF8SkipErrors">
|
|
<short>
|
|
Stores a single Unicode codepoint as a UTF-8-encoded value in the buffer.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UnicodeToUTF8SkipErrors</var> is a simple and fast function used to
|
|
write a single Unicode codepoint as a UTF-8-encoded value in Buf. It returns
|
|
the number of bytes written. It does not append a terminating null (#0)
|
|
character. It does not check if the codepoint actually exists in Unicode
|
|
tables. It returns 0 if the codepoint can not be represented as a 1 to 4 byte
|
|
UTF-8 sequence.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UnicodeToUTF8SkipErrors.Result">
|
|
<short>UTF-8-encoded value for the codepoint.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8SkipErrors.Codepoint">
|
|
<short>Codepoint (Unicode character) to convert in the function.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8SkipErrors.Buf">
|
|
<short>Buffer where the converted value is stored.</short>
|
|
</element>
|
|
|
|
<element name="UnicodeToUTF8Inline">
|
|
<short>
|
|
Encodes the given code point as an UTF-8 sequence of 1 to 4 bytes.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UnicodeToUTF8Inline</var> is an <var>Integer</var> function used to
|
|
convert the Unicode character value in <var>CodePoint</var> to the sequence
|
|
of bytes needed for the UTF-8 encoding. UnicodeToUTF8Inline stores the
|
|
UTF-8-encoded byte values for the Unicode character in the <var>Buf</var>
|
|
parameter.
|
|
</p>
|
|
<p>
|
|
The return value contains the number of bytes required for the UTF-8-encoded
|
|
value (in the range 1..4).
|
|
</p>
|
|
<p>
|
|
Used in the implementation of <var>UnicodeToUTF8</var> and
|
|
<var>UnicodeToUTF8SkipErrors</var>.
|
|
</p>
|
|
<remark>
|
|
UnicodeToUTF8Inline does not process #0 byte values for the codepoint, as
|
|
done for UTF-32.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UnicodeToUTF8Inline.Result">
|
|
<short>Number of bytes required for the UTF-8-encoded value.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8Inline.CodePoint">
|
|
<short>Unicode character value to convert.</short>
|
|
</element>
|
|
<element name="UnicodeToUTF8Inline.Buf">
|
|
<short>Destination where encoded byte values are stored.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToDoubleByteString">
|
|
<short>
|
|
Converts UTF-8 values to their DBCS equivalent.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ToDoubleByteString</var> is a <var>String</var> function used to
|
|
convert UTF-8-encoded values to the representation used in Double Byte
|
|
Character Sets (DBCS).
|
|
</p>
|
|
<p>
|
|
UTF8ToDoubleByteString calls <var>UTF8Length</var> to get the number of
|
|
codepoints (or characters) in s, and calls <var>UTF8ToDoubleByte</var> to
|
|
perform the conversion. Each codepoint is converted to Unicode by calling
|
|
<var>UTF8CodepointToUnicode</var>.
|
|
</p>
|
|
<p>
|
|
The return value is a String type with the byte values from the conversion,
|
|
or an empty string ('') when s does not contain a valid UTF-8-encoded string.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8ToDoubleByteString.Result">
|
|
<short>DBCS values for the specified codepoints.</short>
|
|
</element>
|
|
<element name="UTF8ToDoubleByteString.s">
|
|
<short>UTF-8-encoded values to convert in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToDoubleByte">
|
|
<short>
|
|
Converts a UTF-8-encode string to its DBCS representation.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ToDoubleByte</var> is used to convert UTF-8-encoded values to the
|
|
representation used in Double Byte Character Sets (DBCS). UTF8ToDoubleByte
|
|
calls <var>UTF8CodepointToUnicode</var> to process each of the codepoints in
|
|
<var>UTF8Str</var>.
|
|
</p>
|
|
<p>
|
|
The return value contains the byte values from the conversion.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8ToDoubleByte.Result">
|
|
<short>Number of double bytes converted in the function.</short>
|
|
</element>
|
|
<element name="UTF8ToDoubleByte.UTF8Str">
|
|
<short>UTF-8-encoded values to convert in the function.</short>
|
|
</element>
|
|
<element name="UTF8ToDoubleByte.Len">
|
|
<short>Length of the UTF-8-encoded input values.</short>
|
|
</element>
|
|
<element name="UTF8ToDoubleByte.DBStr">
|
|
<short>Storage for the Double Byte values.</short>
|
|
</element>
|
|
|
|
<element name="UTF8FindNearestCharStart">
|
|
<short>
|
|
Finds the start of the UTF-8 character at the specified position.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Find the start of the UTF-8 character which contains <var>BytePos</var>. If
|
|
BytePos is not part of a valid UTF-8 Codepoint the function returns BytePos.
|
|
BytePos values starts at position 0.
|
|
</p>
|
|
<p>
|
|
Len is the length in bytes.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8FindNearestCharStart.Result">
|
|
<short>Position where the next codepoint begins.</short>
|
|
</element>
|
|
<element name="UTF8FindNearestCharStart.UTF8Str">
|
|
<short>Values to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8FindNearestCharStart.Len">
|
|
<short>Length of the input values.</short>
|
|
</element>
|
|
<element name="UTF8FindNearestCharStart.BytePos">
|
|
<short>Offset into UTF8Str for the initial byte value.</short>
|
|
</element>
|
|
|
|
<element name="Utf8TryFindCodepointStart">
|
|
<short>
|
|
Tries to find the start of a valid UTF-8 codepoint in a string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>Utf8TryFindCodepointStart</var> is a <var>Boolean</var> function which
|
|
tries to find the start of a valid UTF-8 codepoint at the specified position
|
|
in <var>AString</var>.
|
|
</p>
|
|
<p>
|
|
The return value contains <b>True</b> if the bytes at the specified position
|
|
are a valid UTF-8 codepoint (1 - 4 bytes). When the return value is
|
|
<b>True</b>, the value in CurPos is updated to contain the position in
|
|
AString where the UTF-8 codepoint begins. Otherwise, the value in CurPos is
|
|
unchanged. Please note, when CurPos points beyond the end of AString you will
|
|
get a crash!
|
|
</p>
|
|
<remark>
|
|
UTF8CodepointStrictSize will <b>NOT</b> "look" beyond the terminating #0 in a
|
|
PChar, so this is safe with AnsiString values.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.Result">
|
|
<short>
|
|
True when the bytes at the specified position are a valid UTF-8 codepoint.
|
|
</short>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.AString">
|
|
<short>Pointer to the string to examine in the function.</short>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.CurPos">
|
|
<short>
|
|
Pointer to the first position in the string examined in the function.
|
|
</short>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.CodepointLen">
|
|
<short>Number of bytes in the codepoint, or 0 when invalid.</short>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.Index">
|
|
<short>Initial position in the string examined in the function.</short>
|
|
</element>
|
|
<element name="Utf8TryFindCodepointStart.CharLen">
|
|
<short>Number of bytes required for the UTF-8 codepoint.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointStart">
|
|
<short>
|
|
Finds the n-th UTF-8 codepoint.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Finds the n-th UTF-8 codepoint, ignoring BIDI.
|
|
</p>
|
|
<p>
|
|
Len is the length in bytes for the values in UTF8Str. CodepointIndex is the
|
|
position of the desired codepoint (starting at 0), in characters.
|
|
</p>
|
|
<p>
|
|
The return value contains the byte values for the codepoint, or Nil when a
|
|
valid codepoint was not found.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CodepointStart.Result">
|
|
<short>Byte values for the codepoint, or Nil.</short>
|
|
</element>
|
|
<element name="UTF8CodepointStart.UTF8Str">
|
|
<short>Values to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8CodepointStart.Len">
|
|
<short>Length in bytes for the input values.</short>
|
|
</element>
|
|
<element name="UTF8CodepointStart.CodepointIndex">
|
|
<short>Character position for the desired codepoint (zero-based).</short>
|
|
</element>
|
|
|
|
<element name="UTF8CharStart">
|
|
<short>
|
|
Deprecated. Use UTF8CodepointStart instead.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use UTF8CodepointStart instead.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CharStart.Result"/>
|
|
<element name="UTF8CharStart.UTF8Str"/>
|
|
<element name="UTF8CharStart.Len"/>
|
|
<element name="UTF8CharStart.CharIndex"/>
|
|
|
|
<element name="UTF8CodepointToByteIndex">
|
|
<short>
|
|
Finds the byte index of the n-th UTF-8 codepoint.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CodepointToByteIndex</var> is a <var>PtrInt</var> function used to
|
|
find the byte index in UTF8Str where the n-th UTF-8 codepoint is located. It
|
|
calls UTF8CodepointStart to get a pointer to the requested codepoint position.
|
|
</p>
|
|
<p>
|
|
The return value contains the difference between the pointer offsets in each
|
|
of the PChar values. The return value is -1 when a codepoint is not found at
|
|
the specified position.
|
|
</p>
|
|
<p>
|
|
UTF8CodepointToByteIndex ignores BIDI mode.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CodepointStart"/>
|
|
<link id="UTF8CharToByteIndex"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8CodepointToByteIndex.Result">
|
|
<short>
|
|
Byte position where the requested UTF-8 codepoint is located, or -1 when a codepoint is not available for the index value.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointToByteIndex.UTF8Str">
|
|
<short>
|
|
PChar with the multi-byte UTF-8-encoded values examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointToByteIndex.Len">
|
|
<short>
|
|
Length of the PChar value in UTF8Str in bytes.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CodepointToByteIndex.CodepointIndex">
|
|
<short>
|
|
Position of the codepoint requested in the routine. This is 1-based, like a character index in String.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8CharToByteIndex">
|
|
<short>
|
|
Deprecated. Use UTF8CodepointToByteIndex instead.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use UTF8CodepointToByteIndex instead.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CharToByteIndex.Result"/>
|
|
<element name="UTF8CharToByteIndex.UTF8Str"/>
|
|
<element name="UTF8CharToByteIndex.Len"/>
|
|
<element name="UTF8CharToByteIndex.CharIndex"/>
|
|
|
|
<element name="UTF8FixBroken">
|
|
<short>
|
|
Replaces all invalid UTF-8 characters in a string with the specified character.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8FixBroken</var> is an overloaded routine used to replace all invalid
|
|
UTF-8 characters in the specified value with a replacement character. The
|
|
overloaded variants allow the UTF-8-encoded content to be specified using
|
|
either a PChar or a String type.
|
|
</p>
|
|
<p>
|
|
<var>ReplaceChar</var> contains the character used to replace any invalid UTF-8
|
|
characters found in the input value. The default value for ReplaceChar is the
|
|
Space character (Hex $20 Decimal 32).
|
|
</p>
|
|
<p>
|
|
The PChar variant examines the specified byte values to determine when an
|
|
invalid UTF-8 codepoint is found. This includes 1, 2, or 3 byte values, those
|
|
that fall outside of the ranges allowed in UTF-8, and common byte sequences
|
|
used to inject XSS vulnerabilities. UTF8FixBroken stops processing at the first
|
|
occurrence of the byte value #0 (Decimal 0). UTF-8 byte sequences updated in
|
|
the routine are stored in the original PChar argument.
|
|
</p>
|
|
<p>
|
|
The String variant converts the input argument to a PChar type and calls
|
|
FindInvalidUTF8Codepoint to locate invalid UTF-8 byte sequences. If invalid
|
|
bytes are found, UniqueString is called to get a new reference-counted String
|
|
for the return value generated by calling the overloaded PChar variant.
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Modified in LazUtils version 4.0 to include the ReplaceChar argument.
|
|
</version>
|
|
<seealso>
|
|
<link id="FindInvalidUTF8Codepoint"/>
|
|
<link id="#rtl.system.UniqueString">UniqueString</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8FixBroken.P">
|
|
<short>
|
|
PChar with the UTF-8-encoded values examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8FixBroken.S">
|
|
<short>
|
|
String with the UTF-8-encoded values examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8FixBroken.ReplaceChar">
|
|
<short>
|
|
Character used to replace invalid codepoints in the input argument. The default
|
|
value for the argument is the Space character (decimal 32 hex $20).
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8CodepointStrictSize">
|
|
<short>Gets the number of bytes needed for the UTF-8 codepoint.</short>
|
|
<descr>
|
|
<p>
|
|
Gets the number of bytes needed for the UTF-8 codepoint in <var>P</var>. The
|
|
return value contains the number of bytes need for the codepoint (in the
|
|
range 1..4), or 0 (zero) when P is not assigned or the codepoint is invalid.
|
|
</p>
|
|
<remark>
|
|
UTF8CodepointStrictSize stops examining the byte values in P when #0 (Decimal
|
|
0) is encountered.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CodepointStrictSize.Result">
|
|
<short>Number of bytes needed for the codepoint.</short>
|
|
</element>
|
|
<element name="UTF8CodepointStrictSize.P">
|
|
<short>UTF-8-encoded values to examine.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CharacterStrictLength">
|
|
<short>
|
|
Returns the length in bytes (1..4) for a valid UTF-8 character. Otherwise 0.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use UTF8CodepointStrictSize instead.
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CharacterStrictLength.Result"/>
|
|
<element name="UTF8CharacterStrictLength.P"/>
|
|
|
|
<element name="UTF8CStringToUTF8String">
|
|
<short>
|
|
Copies from a C-style string with UTF-8 encoding to UTF-8 string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CStringToUTF8String</var> is a <var>String</var> function used to
|
|
copy the specified number of characters (codepoints) from a C-style string
|
|
with UTF-8 encoding. The return value is a UTF-encoded string with C-style
|
|
specials characters converted to their common equivalents. The following
|
|
C-style quoted characters are handled in the function:
|
|
</p>
|
|
<dl>
|
|
<dt>\t</dt>
|
|
<dd>Converted to a Tab character (Decimal 9)</dd>
|
|
<dt>\"</dt>
|
|
<dd>Converted to a Double Quote character (Decimal 34)</dd>
|
|
<dt>\\</dt>
|
|
<dd>Converted to a Reverse Solidus character (Decimal 92)</dd>
|
|
<dt>\n</dt>
|
|
<dd>Converted to the LineEnding ending for the OS or platform</dd>
|
|
</dl>
|
|
<p>
|
|
The return value is a string which contains the number of codepoints in
|
|
<var>SourceStart</var> specified in <var>SourceLen</var>, or an empty string
|
|
('') when SourceLen is 0 (zero).
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CStringToUTF8String.Result">
|
|
<short>UTF-8-encode string with C-style quoting removed.</short>
|
|
</element>
|
|
<element name="UTF8CStringToUTF8String.SourceStart">
|
|
<short>PChar with the UTF-8-encoded C-style string.</short>
|
|
</element>
|
|
<element name="UTF8CStringToUTF8String.SourceLen">
|
|
<short>Number of codepoints to copy in the method.</short>
|
|
</element>
|
|
|
|
<element name="UTF8Pos">
|
|
<short>
|
|
Returns the character index where the search text starts in the string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Returns the character index where <var>SearchForText</var> starts in
|
|
<var>SearchInText</var>. An optional <var>StartPos</var> can be given to
|
|
start searching at a given character index. StartPos starts at 1.
|
|
</p>
|
|
<p>
|
|
Returns 0 if the search text is not found in the string.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8Pos.Result">
|
|
<short>Character position where the search text was located.</short>
|
|
</element>
|
|
<element name="UTF8Pos.SearchForText">
|
|
<short>Value to locate in the string.</short>
|
|
</element>
|
|
<element name="UTF8Pos.SearchInText">
|
|
<short>String to search for the specified value.</short>
|
|
</element>
|
|
|
|
<element name="UTF8PosP">
|
|
<short>
|
|
Returns a pointer to the position where SearchForText starts in SearchInText,
|
|
or Nil when not found.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8PosP.Result">
|
|
<short>
|
|
Pointer to the character value where SearchForText was located in
|
|
SearchInText, or Nil when not found.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8PosP.SearchForText">
|
|
<short>
|
|
Pointer to the character(s) to locate in SearchInText.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8PosP.SearchForTextLen">
|
|
<short>
|
|
Number of bytes in SearchForText.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8PosP.SearchInText">
|
|
<short>
|
|
Pointer to the character values examined in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8PosP.SearchInTextLen">
|
|
<short>
|
|
Number of bytes in SearchInText.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8Copy">
|
|
<short>
|
|
Copies the specified number of codepoints from the UTF-8-encoded string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8Copy</var> is a <var>String</var> function used copy to
|
|
UTF-8-encoded values from <var>s</var> starting at the position in
|
|
<var>StartCharIndex</var>. <var>CharCount</var> specifies the number of
|
|
multi-byte characters (or codepoints) to include in the return value. The
|
|
return value is an empty string ('') when s is not a valid UTF-8-encoded
|
|
string.
|
|
</p>
|
|
<p>
|
|
UTF8Copy behaves like a substring function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8Copy.Result">
|
|
<short>String with codepoints copied from the specified source.</short>
|
|
</element>
|
|
<element name="UTF8Copy.s">
|
|
<short>String with values to copy in the function.</short>
|
|
</element>
|
|
<element name="UTF8Copy.StartCharIndex">
|
|
<short>Initial character position for the copy operation.</short>
|
|
</element>
|
|
<element name="UTF8Copy.CharCount">
|
|
<short>Number of characters (codepoints) to copy in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8Delete">
|
|
<short>
|
|
Deletes characters (or codepoints) in a UTF-8-encoded string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8Delete</var> is an overloaded procedure used to delete characters
|
|
(or codepoints) in a UTF-8-encoded string starting at a given position.
|
|
</p>
|
|
<p>
|
|
<var>StartCharIndex</var> contains the character position in s where values
|
|
will be removed. <var>StartCharIndex</var> refers to codepoints and not
|
|
individual byte or character values. A single character can be expressed as
|
|
1-4 byte values in UTF-8 encoding. <var>CharCount</var> indicates the number
|
|
of codepoints to remove in the function.
|
|
</p>
|
|
<p>
|
|
The value in <var>s</var> is updated directly in the function.
|
|
</p>
|
|
<p>
|
|
An overloaded variant of the procedure is provided for platforms where the
|
|
Win1252 code page is used. On these platforms, raw byte values values in s
|
|
are converted to the UTF-8 code page prior to performing the delete operation.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8Delete.s">
|
|
<short>String with values to delete in the procedure.</short>
|
|
</element>
|
|
<element name="UTF8Delete.StartCharIndex">
|
|
<short>Initial character position where values will be deleted.</short>
|
|
</element>
|
|
<element name="UTF8Delete.CharCount">
|
|
<short>
|
|
Number of characters (or codepoints) to remove in the procedure.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8Insert">
|
|
<short>
|
|
Inserts the specified UTF-8 values into a string at the specified character
|
|
position.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8Insert</var> inserts the specified values into a string at the
|
|
specified character position. UTF8Insert is an overloaded procedure. The
|
|
variants allow the string arguments to be specified as either String or
|
|
UTF8String types.
|
|
</p>
|
|
<p>
|
|
<var>source</var> is the UTF-8-encoded values inserted in the routine.
|
|
</p>
|
|
<p>
|
|
<var>s</var> is the string where the values from source are inserted at the
|
|
specified character position.
|
|
</p>
|
|
<p>
|
|
The value in <var>StartCharIndex</var> starts at <b>1</b>, and represents the
|
|
n-th codepoint (or character) in the destination string (s) where the values
|
|
are inserted.
|
|
</p>
|
|
<p>
|
|
UTF8Insert calls UTF8CodepointStart to determine the position in s where the
|
|
codepoint represented by StartCharIndex is located. No actions are performed in
|
|
the routine if a valid codepoint is not found at the position specified in
|
|
StartCharIndex. The RTL Insert routine is called to insert the UTF-8-encoded
|
|
values from source into s.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CodepointStart"/>
|
|
<link id="UTF8Delete"/>
|
|
<link id="UTF8Pos"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8Insert.source">
|
|
<short>
|
|
String with the values inserted in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8Insert.s">
|
|
<short>
|
|
String where the values from source are inserted.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8Insert.StartCharIndex">
|
|
<short>
|
|
Starting character position (1-based) where the inserted values from source are
|
|
stored in s.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8StringReplace">
|
|
<short>
|
|
Replaces one or more values in a UTF-8-encoded string which match a given
|
|
pattern.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8StringReplace</var> is an overloaded <var>String</var> function
|
|
which replaces values in a string matching a given pattern.
|
|
</p>
|
|
<p>
|
|
<var>S</var> is the UTF-8-encoded string to update in the function.
|
|
<var>OldPattern</var> is a pattern with the values to be replaced in S.
|
|
<var>NewPattern</var> is the replacement value for OldPattern in S.
|
|
</p>
|
|
<p>
|
|
<var>Flags</var> contains <var>TReplaceFlags</var> values and control the
|
|
options enabled in the operation. <var>rfIgnoreCase</var> causes
|
|
case-insensitive comparisons to be used for values in S and OldPattern; both
|
|
values are converted to lowercase copies for the purpose.
|
|
<var>rfReplaceAll</var> causes all occurrences of OldPattern to be replaced
|
|
with NewPattern in S. If the flag is omitted, only the first occurrence of
|
|
OldPattern in S is replaced in the routine.
|
|
</p>
|
|
<p>
|
|
<var>ALanguage</var> is the 2-digit ISO 639-1 Language Code, like 'es' or
|
|
'de', used when converting values to lowercase for case-insensitive search.
|
|
The default value is an empty string ('') and offers maximum speed when the
|
|
language is not significant.
|
|
</p>
|
|
<p>
|
|
<var>Count</var> is an output variable used to return the actual number of
|
|
replacements performed in the function.
|
|
</p>
|
|
<p>
|
|
UTF8StringReplace provides support for UTF-8 codepoints which have different
|
|
sizes (byte counts) for the uppercase and lowercase variants of patterns. It
|
|
ensures that the return value is resized (when needed) to account for
|
|
individual codepoint sizes altered in S due to case conversion.
|
|
</p>
|
|
<p>
|
|
The return value is a UTF-8-encoded string with the updated values from S
|
|
following replacements.
|
|
</p>
|
|
<p>
|
|
No actions are performed in the routine if OldPattern is an empty string ('').
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8StringReplace.Result">
|
|
<short>UTF-8-encoded values after the replace operation.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.S">
|
|
<short>Original UTF-8-encoded values to examine.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.OldPattern">
|
|
<short>Pattern to replace in the function.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.NewPattern">
|
|
<short>Replacement values for the operation.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.Flags">
|
|
<short>Replace options enabled in the function.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.ALanguage">
|
|
<short>Language Code used for locale-specific lowercase conversions.</short>
|
|
</element>
|
|
<element name="UTF8StringReplace.Count">
|
|
<short>Number of times the search pattern was replaced in the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8LowerCase">
|
|
<short>
|
|
Converts the specified string to lowercase using Unicode case mapping rules.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8LowerCase</var> is a <var>String</var> function used to convert the
|
|
UTF-8-encoded value in AInStr to its lowercase equivalent. UTF8LowerCase uses
|
|
Unicode Data defined on on the Unicode.org website at
|
|
ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt. FTP lik removed.
|
|
The conversion is performed using the Case Mapping Rules defined
|
|
in https://www.ksu.ru/eng/departments/ktk/test/perl/lib/unicode/UCDFF301.html#CaseMappings [dead link renoved].
|
|
</p>
|
|
<p>
|
|
ALanguage indicates the language code to use for the conversion. ALanguage
|
|
should be specified using ISO 639-1 format, which uses 2 characters to
|
|
represent each language. If the language has no code in ISO 639-1, then the
|
|
3-chars code from ISO 639-2 should be used. For example: "tr"for the Turkish
|
|
language locale. Special handling is provided in the function for Turkish
|
|
('tr') and Azeri ('az') language codes. ALanguage can be set to an empty
|
|
string ('') for maximum speed in the conversion.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8LowerCase.Result">
|
|
<short>Lowercase values for the specified string.</short>
|
|
</element>
|
|
<element name="UTF8LowerCase.AInStr">
|
|
<short>Values to convert in the function.</short>
|
|
</element>
|
|
<element name="UTF8LowerCase.ALanguage">
|
|
<short>Language code for the operation.</short>
|
|
</element>
|
|
|
|
<element name="UTF8LowerString">
|
|
<short>
|
|
Converts the specified string to lowercase using Unicode case mapping rules.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Calls UTF8LowerCase to get the return value for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8LowerString.Result">
|
|
<short>Lowercase values for the specified string.</short>
|
|
</element>
|
|
<element name="UTF8LowerString.S">
|
|
<short>String value to convert in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8UpperCase">
|
|
<short>
|
|
Converts the specified string to uppercase using Unicode case mapping rules.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8UpperCase</var> is a <var>String</var> function used to convert the
|
|
UTF-8-encoded value in AInStr to its uppercase equivalent. UTF8UpperCase uses
|
|
Unicode Data as defined at the Unicode.org website.
|
|
[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt] FTP link removed.
|
|
The conversion is performed using the Case Mapping Rules defined at
|
|
https://www.ksu.ru/eng/departments/ktk/test/perl/lib/unicode/UCDFF301.html#CaseMappings. Dead link removed.
|
|
</p>
|
|
<p>
|
|
ALanguage indicates the language code to use for the conversion. ALanguage
|
|
should be specified using ISO 639-1 format, which uses 2 characters to
|
|
represent each language. If the language has no code in ISO 639-1, then the
|
|
3-chars code from ISO 639-2 should be used. For example: "tr"for the Turkish
|
|
language locale. Special handling is provided in the function for Turkish
|
|
('tr') and Azeri ('az') language codes.ALanguage can be set to an empty
|
|
string ('') for maximum speed in the conversion.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8UpperCase.Result">
|
|
<short>Uppercase values for the specified string.</short>
|
|
</element>
|
|
<element name="UTF8UpperCase.AInStr">
|
|
<short>Values to convert in the function.</short>
|
|
</element>
|
|
<element name="UTF8UpperCase.ALanguage">
|
|
<short>Language code for the operation.</short>
|
|
</element>
|
|
|
|
<element name="UTF8UpperString">
|
|
<short>
|
|
Inline variant of UTF8UpperCase.
|
|
</short>
|
|
<descr>
|
|
Inline variant of UTF8UpperCase.
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8UpperString.Result">
|
|
<short>Uppercase values for the string.</short>
|
|
</element>
|
|
<element name="UTF8UpperString.s">
|
|
<short>Values to convert in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8UpperCaseFast">
|
|
<short>
|
|
Gets the uppercase value for the specified text. Optimized to improve speed for ASCII content in the argument.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8UpperCaseFast</var> examines the ordinal values for the characters in
|
|
<var>AText</var> to determine how the return value is derived. It is optimized
|
|
for ASCII content (byte values in the range 1..128). It converts individual
|
|
characters in the range ['a'..'z'] by subtracting 32 from their ordinal values.
|
|
</p>
|
|
<p>
|
|
If a non-ASCII byte value is found in AText, the return value is derived by
|
|
calling UTF8UpperCase with the value in AText as an argument.
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Added in LazUtils version 4.0.
|
|
</version>
|
|
<seealso>
|
|
<link id="UTF8UpperCase"/>
|
|
<link id="UTF8LowerCaseFast"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8UpperCaseFast.Result">
|
|
<short>
|
|
Uppercase value for the specified text.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8UpperCaseFast.AText">
|
|
<short>
|
|
String with the content examined and converted to its uppercase representation.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8LowerCaseFast">
|
|
<short>
|
|
Gets the lowercase value for the specified text. Optimized to improve speed for ASCII content in the argument.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8LowerCaseFast</var> examines the ordinal values for the characters in
|
|
<var>AText</var> to determine how the return value is derived. It is optimized
|
|
for ASCII content (byte values in the range 1..128). It converts individual
|
|
characters in the range ['A'..'Z'] by adding 32 to their ordinal values.
|
|
</p>
|
|
<p>
|
|
If a non-ASCII byte value is found in AText, the return value is derived by
|
|
calling UTF8LowerCase with the value in AText as an argument.
|
|
</p>
|
|
</descr><version>
|
|
Added in LazUtils version 4.0.
|
|
</version>
|
|
<seealso>
|
|
<link id="UTF8LowerCase"/>
|
|
<link id="UTF8UpperCase"/>
|
|
<link id="UTF8UpperCaseFast"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8LowerCaseFast.Result">
|
|
<short>
|
|
Lowercase value for the specified text.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8LowerCaseFast.AText">
|
|
<short>
|
|
String with the content examined and converted to its lowercase representation.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8SwapCase">
|
|
<short>
|
|
Provides a simplistic implementation of UTF8UpperCase and UTF8LowerCase.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8SwapCase</var> provides a "naive" implementation that uses
|
|
<var>UTF8UpperCase</var> and <var>UTF8LowerCase</var>. Performance is
|
|
acceptable for short and reasonably long strings, but it could benefit from
|
|
better performance and lower memory consumption.
|
|
</p>
|
|
<p>
|
|
AInStr contains a UTF-8-encoded string with values to convert it the method.
|
|
Each character in AInStr will have its case "toggled" in the function. In
|
|
other words, an uppercase character is converted to lowercase, and vice versa.
|
|
</p>
|
|
<p>
|
|
ALanguage indicates the language code to use for the conversion. ALanguage
|
|
should be specified using ISO 639-1 format, which uses 2 characters to
|
|
represent each language. If the language has no code in ISO 639-1, then the
|
|
3-character code from ISO 639-2 should be used. For example: "tr"for the
|
|
Turkish language locale. Special handling is provided in the function for
|
|
Turkish ('tr') and Azeri ('az') language codes. ALanguage can be set to an
|
|
empty string ('') for maximum speed in the conversion.
|
|
</p>
|
|
<p>
|
|
No actions are performed in the method when the number of bytes for the
|
|
converted value differs from the number of bytes in the original value. In
|
|
this case, the return value contains the unmodified string in AInStr. The
|
|
return value is an empty string ('') when AInStr is an empty string ('').
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8SwapCase.Result">
|
|
<short>String with the converted case values.</short>
|
|
</element>
|
|
<element name="UTF8SwapCase.AInStr">
|
|
<short>Original values for the conversion.</short>
|
|
</element>
|
|
<element name="UTF8SwapCase.ALanguage">
|
|
<short>Language code for the locale used in the conversion.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ProperCase">
|
|
<short>
|
|
Capitalizes the first letter of each word in the string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ProperCase</var> is a <var>String</var> function used to capitalize
|
|
the first letter of each word in the specified string. WordDelims is set
|
|
which contains the system characters used as word boundaries in the string.
|
|
</p>
|
|
<p>
|
|
UTF8ProperCase converts all of the values in AInStr to their lowercase
|
|
equivalents, before converting letters following a word delimiter to
|
|
uppercase.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8ProperCase.Result">
|
|
<short>Converting values for the string.</short>
|
|
</element>
|
|
<element name="UTF8ProperCase.AInStr">
|
|
<short>Values to convert in the function.</short>
|
|
</element>
|
|
<element name="UTF8ProperCase.WordDelims">
|
|
<short>Characters used as word delimiters.</short>
|
|
</element>
|
|
|
|
<element name="FindInvalidUTF8Codepoint">
|
|
<short>
|
|
Finds the position where an invalid UTF-8 codepoint is found in the string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>FindInvalidUTF8Codepoint</var> is a <var>PtrInt</var> function used to
|
|
find the position where an invalid UTF-8 codepoint is located in the
|
|
specified value. The return value contains <b>-1</b> when none of the values
|
|
in p are invalid, or the zero-based offset into p where the invalid encoding
|
|
was located.
|
|
</p>
|
|
<p>
|
|
<var>StopOnNonUTF8</var> indicates if the function should exit when an
|
|
encoded value is found that is not defined for the UTF-8 encoding, or for
|
|
single byte characters inserted in the middle of a UTF-8 encoding (used in
|
|
XSS attacks).
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="FindInvalidUTF8Codepoint.Result">
|
|
<short>Offset into the string for the error.</short>
|
|
</element>
|
|
<element name="FindInvalidUTF8Codepoint.p">
|
|
<short>Values to examine in the function.</short>
|
|
</element>
|
|
<element name="FindInvalidUTF8Codepoint.Count">
|
|
<short>Length of the input values.</short>
|
|
</element>
|
|
<element name="FindInvalidUTF8Codepoint.StopOnNonUTF8">
|
|
<short>True to exit on an malformed codepoint.</short>
|
|
</element>
|
|
|
|
<element name="FindInvalidUTF8Character">
|
|
<short>
|
|
Returns -1 if OK, otherwise byte index of invalid UTF-8 codepoint.
|
|
</short>
|
|
<descr>
|
|
<remark>
|
|
Deprecated. Use FindInvalidUTF8Codepoint instead.
|
|
</remark>
|
|
<p>
|
|
It always stops on irregular codepoints. For example Codepoint 0 is normally
|
|
encoded as #0, but it can also be encoded as #192#0. Because most software
|
|
does not check this, it can be exploited and is a security risk. If
|
|
StopOnNonUTF8 is <b>False</b> it will ignore undefined codes. For example
|
|
#128. By default it stops on such codes.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="FindInvalidUTF8Character.Result"/>
|
|
<element name="FindInvalidUTF8Character.p"/>
|
|
<element name="FindInvalidUTF8Character.Count"/>
|
|
<element name="FindInvalidUTF8Character.StopOnNonASCII"/>
|
|
|
|
<element name="UTF8StringOfChar">
|
|
<short>
|
|
Creates a string filled with the specified number of given codepoints.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8StringOfChar</var> is a function used to create a UTF-8-encoded
|
|
string filled with the specified number of occurrences of the given
|
|
codepoint. <var>AUtf8Char</var> is the UTF-8 codepoint to reproduce in the
|
|
function. No actions are performed if AUtf8Char is an empty string (''), or
|
|
contains a malformed UTF-8 codepoint.
|
|
</p>
|
|
<p>
|
|
The return value is filled with byte values for the codepoint (1 to 4 bytes
|
|
as per the UTF-8 encoding). The process is repeated until the number of
|
|
codepoints in <var>N</var> have been stored in the return value.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8StringOfChar.Result`">
|
|
<short>
|
|
String with the specified number of occurrence of the codepoint.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8StringOfChar.AUtf8Char">
|
|
<short>Codepoint to reproduce in the function.</short>
|
|
</element>
|
|
<element name="UTF8StringOfChar.N">
|
|
<short>Number of occurrences to include in the return value.</short>
|
|
</element>
|
|
|
|
<element name="UTF8AddChar">
|
|
<short>
|
|
Adds the specified number of UTF-8 codepoints to a string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8AddChar</var> is a <var>String</var> function used to add the
|
|
specified number of UTF-8 codepoints to a string. <var>AUtf8Char</var> is the
|
|
UTF-8-encoded codepoint to add to string value in <var>S</var>. <var>N</var>
|
|
indicates the number of times the codepoint should be added to the string.
|
|
</p>
|
|
<p>
|
|
No actions are performed in the function when AUtf8Char is an empty string
|
|
(''), or contains a malformed UTF-8 codepoint.
|
|
</p>
|
|
<remark>
|
|
Values added to the string in S are inserted at the beginning of the string
|
|
(prepended).
|
|
</remark>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8AddChar.Result">
|
|
<short>Updated value for the string.</short>
|
|
</element>
|
|
<element name="UTF8AddChar.AUtf8Char">
|
|
<short>Codepoint to prepend to the string value.</short>
|
|
</element>
|
|
<element name="UTF8AddChar.S">
|
|
<short>Original values for the string.</short>
|
|
</element>
|
|
<element name="UTF8AddChar.N">
|
|
<short>Number of codepoints to prepend to the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8AddCharR">
|
|
<short>
|
|
Appends the specified number of UTF-8 codepoints to a string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8AddChar</var> is a <var>String</var> function used to append the
|
|
specified number of UTF-8 codepoints to a string. <var>AUtf8Char</var> is the
|
|
UTF-8-encoded codepoint to add to string value in <var>S</var>. <var>N</var>
|
|
indicates the number of times the codepoint should be appended to the string.
|
|
</p>
|
|
<p>
|
|
No actions are performed in the function when AUtf8Char is an empty string
|
|
(''), or contains a malformed UTF-8 codepoint.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8AddCharR.Result">
|
|
<short>Updated value for the string.</short>
|
|
</element>
|
|
<element name="UTF8AddCharR.AUtf8Char">
|
|
<short>Codepoint to append to the string value.</short>
|
|
</element>
|
|
<element name="UTF8AddCharR.S">
|
|
<short>Original values for the string.</short>
|
|
</element>
|
|
<element name="UTF8AddCharR.N">
|
|
<short>Number of codepoints to append to the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8PadLeft">
|
|
<short>
|
|
Adds the specified number of values in AUtf8Char to the beginning of a string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8PadLeft</var> is used to add the specified number of values in
|
|
<var>AUtf8Char</var> to the beginning of a string. The default value for
|
|
AUtf8Char is #32 ([SPACE]), but can contain any valid UTF-8 codepoint (1 to 4
|
|
bytes). UTF8PadLeft calls <var>Utf8AddChar</var> to create the return value
|
|
for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8PadLeft.Result">
|
|
<short>
|
|
Updated value for the string with characters inserted at the beginning.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8PadLeft.S">
|
|
<short>Original string value to modify in the function.</short>
|
|
</element>
|
|
<element name="UTF8PadLeft.N">
|
|
<short>Number of codepoints desired in the modified string.</short>
|
|
</element>
|
|
<element name="UTF8PadLeft.AUtf8Char">
|
|
<short>UTF-8 codepoint to insert into the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8PadRight">
|
|
<short>
|
|
Appends the specified number of UTF-8 codepoints to the end of a string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8PadRight</var> is used to append the specified number of UTF-8
|
|
codepoints to the end of a string. The default value for <var>AUtf8Char</var>
|
|
is #32 ([SPACE]), but can contain any valid UTF-8 codepoint (1 to 4 bytes).
|
|
UTF8PadRight calls <var>Utf8AddCharR</var> to create the return value for the
|
|
function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8PadRight.Result">
|
|
<short>Updated value for the string.</short>
|
|
</element>
|
|
<element name="UTF8PadRight.S">
|
|
<short>Original string to modify in the function.</short>
|
|
</element>
|
|
<element name="UTF8PadRight.N">
|
|
<short>Number of codepoints desired in the modified string.</short>
|
|
</element>
|
|
<element name="UTF8PadRight.AUtf8Char">
|
|
<short>Codepoint to append to the string value.</short>
|
|
</element>
|
|
|
|
<element name="UTF8PadCenter">
|
|
<short>
|
|
Center aligns a string to the specified length.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8PadCenter</var> is used to center align a string to the specified
|
|
length (number of codepoints). <var>N</var> indicates the length of the
|
|
modified string after padding on the left and right with the UTF-8 codepoint
|
|
in <var>AUtf8Char</var>. The default value for AUtf8Char is #32 ([SPACE]),
|
|
but can contains any valid UTF-8 codepoint (1 to 4 bytes).
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8PadCenter.Result">
|
|
<short>Modified value for the string after center alignment.</short>
|
|
</element>
|
|
<element name="UTF8PadCenter.S">
|
|
<short>Original string value.</short>
|
|
</element>
|
|
<element name="UTF8PadCenter.N">
|
|
<short>Desired length for the string (in codepoints).</short>
|
|
</element>
|
|
<element name="UTF8PadCenter.AUtf8Char">
|
|
<short>UTF-8 codepoint used as a padding character.</short>
|
|
</element>
|
|
|
|
<element name="UTF8LeftStr">
|
|
<short>
|
|
Gets the specified number of characters (codepoints) at the start of the
|
|
string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8LeftStr</var> is used to get the specified number of characters
|
|
(codepoints) at the beginning of the UTF-8-encoded string. UTF8LeftStr calls
|
|
<var>Utf8Copy</var> to get the return value for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8LeftStr.Result">
|
|
<short>Values from the specified string.</short>
|
|
</element>
|
|
<element name="UTF8LeftStr.AText">
|
|
<short>Original string to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8LeftStr.ACount">
|
|
<short>Number of characters (codepoints) to get from the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8RightStr">
|
|
<short>
|
|
Gets the specified number of characters (codepoints) at the end of the string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8RightStr</var> is used to get the specified number of characters
|
|
(codepoints) at the end of the UTF-8-encoded string. UTF8RightStr calls
|
|
<var>Utf8Copy</var> to get the return value for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8RightStr.Result">
|
|
<short>Values from the string.</short>
|
|
</element>
|
|
<element name="UTF8RightStr.AText">
|
|
<short>Original string to examine in the function.</short>
|
|
</element>
|
|
<element name="UTF8RightStr.ACount">
|
|
<short>Number of characters (codepoints) to get from the string.</short>
|
|
</element>
|
|
|
|
<element name="UTF8QuotedStr">
|
|
<short>
|
|
Performs safe quoting for the specified UTF-8-encoded string value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8QuotedStr</var> is a <var>String</var> function used to double all
|
|
occurrences of the byte sequence in the Quote argument. It works like the
|
|
QuotedStr or AnsiQuotedStr routines from the RTL <file>sysutils</file> unit,
|
|
but allows the Quote character to contain a valid multi-byte UTF-8 codepoint.
|
|
Processing in the routine is halted when the #0 (Decimal 0) character is
|
|
encountered.
|
|
</p>
|
|
<p>
|
|
Like its counterparts, UTF8QuotedStr encloses the return value with the
|
|
character specified in the Quote argument.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="#rtl.sysutils.QuotedStr">QuotedStr</link>
|
|
<link id="#rtl.sysutils.AnsiQuotedStr">AnsiQuotedStr</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8QuotedStr.Result">
|
|
<short>
|
|
Value in S after safe UTF-8 quoting has been applied.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8QuotedStr.S">
|
|
<short>
|
|
String with the values examined and quoted in the routine.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8QuotedStr.Quote">
|
|
<short>
|
|
Byte sequence with the quote character used in the routine.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8StartsText">
|
|
<short>
|
|
Determines if a string starts with the specified value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8StartsText</var> determines if the value in <var>AText</var> begins
|
|
with the value in <var>ASubText</var>. Both values can contain a valid
|
|
UTF-8-encoded string. The return value is <b>False</b> when ASubText is an
|
|
empty string (''), or ASubText contains more characters (codepoints) than the
|
|
value in AText.
|
|
</p>
|
|
<p>
|
|
UTF8StartsText calls <var>Utf8Copy</var> and
|
|
<var>UTF8CompareLatinTextFast</var> to perform a case-insensitive comparison
|
|
between the values.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8StartsText.Result">
|
|
<short>True when the strings starts with the specified text.</short>
|
|
</element>
|
|
<element name="UTF8StartsText.ASubText">
|
|
<short>Value to locate at the start of the string.</short>
|
|
</element>
|
|
<element name="UTF8StartsText.AText">
|
|
<short>String to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8EndsText">
|
|
<short>
|
|
Determines if a string ends with the specified value.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8EndsText</var> determines if the value in <var>AText</var> ends with
|
|
the value in <var>ASubText</var>. Both values can contain a valid
|
|
UTF-8-encoded string. The return value is <b>False</b> when ASubText is an
|
|
empty string (''), or ASubText contains more characters (codepoints) than the
|
|
value in AText.
|
|
</p>
|
|
<p>
|
|
UTF8StartsText calls <var>Utf8Copy</var> and
|
|
<var>UTF8CompareLatinTextFast</var> to perform a case-insensitive comparison
|
|
between the values.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8EndsText.Result">
|
|
<short>True when the strings ends with the specified text.</short>
|
|
</element>
|
|
<element name="UTF8EndsText.ASubText">
|
|
<short>Value to locate at the end of the string.</short>
|
|
</element>
|
|
<element name="UTF8EndsText.AText">
|
|
<short>String to examine in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8ReverseString">
|
|
<short>
|
|
Reverses the order of codepoints in the specified string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8ReverseString</var> is used to create a string with the specified
|
|
content in reverse order. p contains the UTF-8-encoded values for the
|
|
original string.
|
|
</p>
|
|
<p>
|
|
ByteCount indicates the total number of bytes needed to represent the
|
|
codepoints in <var>p</var>.
|
|
</p>
|
|
<p>
|
|
UTF8ReverseString calls <var>UTF8CodepointSize</var> and moves the needed
|
|
number of byte values in p to the return value for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CodepointSize"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8ReverseString.Result">
|
|
<short>String with the reversed text values.</short>
|
|
</element>
|
|
<element name="UTF8ReverseString.p">
|
|
<short>PChar type with values reversed in the routine.</short>
|
|
</element>
|
|
<element name="UTF8ReverseString.ByteCount">
|
|
<short>Number of bytes reversed in the routine.</short>
|
|
</element>
|
|
<element name="UTF8ReverseString.AText">
|
|
<short>String with the values reversed in the routine.</short>
|
|
</element>
|
|
|
|
<element name="UTF8RPos">
|
|
<short>
|
|
Gets the right-most position in the Source string for the value in Substr.
|
|
</short>
|
|
<descr></descr>
|
|
<seealso>
|
|
<link id="UTF8Length"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8RPos.Result">
|
|
<short>Pointer to the position in Source.</short>
|
|
</element>
|
|
<element name="UTF8RPos.Substr">
|
|
<short>Value to locate in Source.</short>
|
|
</element>
|
|
<element name="UTF8RPos.Source">
|
|
<short>String with values examined in the routine.</short>
|
|
</element>
|
|
|
|
<element name="UTF8WrapText">
|
|
<short>
|
|
Creates a word-wrapped version of the specified string.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8WrapText</var> is an overloaded <var>String</var> function used to
|
|
wrap lines of text in <var>S</var> at the number of characters (codepoints)
|
|
specified in <var>MaxCol</var>.
|
|
</p>
|
|
<p>
|
|
The overloaded variant allow additional parameters to be provided with the
|
|
EOL character sequence and a set of characters where a line break can be
|
|
inserted. Default characters are used in <var>BreakChars</var> for the
|
|
variant without a BreakChars argument. They include: ' ' (Space), '-' (Dash),
|
|
and #9 (Tab). <var>BreakStr</var> contains the end-of-line sequence used to
|
|
represent a line break inserted into the return value.
|
|
</p>
|
|
<p>
|
|
Use <var>Indent</var> to specify the number of Space (#32) characters inserted
|
|
as indentation at the beginning of each word-wrapped line. The default value is
|
|
0 (zero) and omits indentation in the word-wrapped lines. A negative value in
|
|
Indent causes the argument to be set to 0.
|
|
</p>
|
|
<p>
|
|
The Indent argument affects the number of UTF-8 characters allowed in each
|
|
word-wrapped line. When set to a positive non-zero value, the maximum number of
|
|
characters allowed per line is <b>MaxCol - Indent</b>.
|
|
</p>
|
|
<p>
|
|
No actions are performed in the function when S is an empty string (''),
|
|
MaxCol is set to 0 (zero), or BreakChars is an empty set ([]).
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Modified in LazUtils 4.0 to include the overload with an indentation argument.
|
|
</version>
|
|
<seealso>
|
|
<link id="#lazutils.lazstringutils.BreakString">BreakString</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8WrapText.Result">
|
|
<short>Word-wrapped version of the specified text.</short>
|
|
</element>
|
|
<element name="UTF8WrapText.S">
|
|
<short>String with values word-wrapped in the routine.</short>
|
|
</element>
|
|
<element name="UTF8WrapText.BreakStr">
|
|
<short>End-of-line sequence used in the routine.</short>
|
|
</element>
|
|
<element name="UTF8WrapText.BreakChars">
|
|
<short>Set of characters where a line break cab be inserted.</short>
|
|
</element>
|
|
<element name="UTF8WrapText.MaxCol">
|
|
<short>Maximum line width in number of UTF-8 characters.</short>
|
|
</element>
|
|
<element name="UTF8WrapText.Indent">
|
|
<short>
|
|
Number of Space (#32) characters used to indent the individual lines of
|
|
word-wrapped text.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="IsPureAscii">
|
|
<short>
|
|
Determines whether the specified string contains only single-byte ASCII
|
|
characters.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Used in the implementation of the TStringListUTF8Fast.InsertItem method.
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Added in LazUtils version 3.2.
|
|
</version>
|
|
<seealso>
|
|
<link id="TStringListUTF8Fast.InsertItem"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="IsPureAscii.Result">
|
|
<short>
|
|
Returns <b>True</b> if all of the characters in S have a value less than $7F.
|
|
</short>
|
|
</element>
|
|
<element name="IsPureAscii.S">
|
|
<short>
|
|
String with the characters examined in the method.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="TEscapeMode">
|
|
<short>
|
|
Represents styles used to escape control characters.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>TEscapeMode</var> is an enumerated type with values that determine the
|
|
output style for escaped characters in <var>Utf8EscapeControlChars</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="TEscapeMode.emPascal">
|
|
<short>Pascal-style escape characters '#27'</short>
|
|
</element>
|
|
<element name="TEscapeMode.emHexPascal">
|
|
<short>Pascal-style hexadecimal strings '#$1B'</short>
|
|
</element>
|
|
<element name="TEscapeMode.emHexC">
|
|
<short>C-style hexadecimal strings '\0x1B'</short>
|
|
</element>
|
|
<element name="TEscapeMode.emC">
|
|
<short>C-style strings '\e'</short>
|
|
</element>
|
|
<element name="TEscapeMode.emAsciiControlNames">
|
|
<short>ASCII-style control names '[ESC]'</short>
|
|
</element>
|
|
|
|
<element name="Utf8EscapeControlChars">
|
|
<short>
|
|
Translates control characters in a UTF-8-encoded string into human readable
|
|
format.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>Utf8EscapeControlChars</var> translates control characters inside a
|
|
UTF-8-encoded string into human readable format. Characters in the range
|
|
#0..#31 are converted into the human-readable values for the control
|
|
characters in the format specified by <var>EscapeMode</var>, including:
|
|
</p>
|
|
<dl>
|
|
<dt>emPascal</dt>
|
|
<dd>Pascal-style escape characters '#27'</dd>
|
|
<dt>emHexPascal</dt>
|
|
<dd>Pascal-style hexadecimal strings '#$1B'</dd>
|
|
<dt>emHexC</dt>
|
|
<dd>C-style hexadecimal strings '\0x1B'</dd>
|
|
<dt>emC</dt>
|
|
<dd>C-style strings '\e'</dd>
|
|
<dt>emAsciiControlNames</dt>
|
|
<dd>ASCII-style control names '[ESC]'</dd>
|
|
</dl>
|
|
<p>
|
|
Utf8EscapeControlChars calls <var>FindInvalidUTF8Codepoint</var> to see if
|
|
<var>S</var> contains any invalid codepoints for the UTF-8 encoding.
|
|
<var>UTF8FixBroken</var> is called to repair the input value.
|
|
</p>
|
|
<p>
|
|
Utf8EscapeControlChars iterates over the characters in S, and converts any
|
|
character value in the eligible range using an internal lookup table for the
|
|
value in EscapeMode. All other character values (or values in multi-byte
|
|
UTF-8 code points) are included in the return value in their unmodified form.
|
|
</p>
|
|
<p>
|
|
Mainly used as a diagnostic or logging tool.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8FixBroken"/>
|
|
<link id="TEscapeMode"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="Utf8EscapeControlChars.Result">
|
|
<short>String with the escaped values for control characters in S.</short>
|
|
</element>
|
|
<element name="Utf8EscapeControlChars.S">
|
|
<short>UTF-8 encoded string with values converted in the routine.</short>
|
|
</element>
|
|
<element name="Utf8EscapeControlChars.EscapeMode">
|
|
<short>Controls the human readable format for escaped characters.</short>
|
|
</element>
|
|
|
|
<element name="TUTF8TrimFlag">
|
|
<short>
|
|
Controls trimming actions performed in UTF8Trim.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>TUTF8TrimFlag</var> is an enumerated type with values that control
|
|
trimming actions performed in the <var>UTF8Trim</var> function.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TUTF8TrimFlags"/>
|
|
<link id="UTF8Trim"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepStart">
|
|
<short>Keeps leading whitespace.</short>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepEnd">
|
|
<short>Keeps trailing whitespace.</short>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepTabs">
|
|
<short>Keeps tab characters.</short>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepLineBreaks">
|
|
<short>Keeps line breaks.</short>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepNoBreakSpaces">
|
|
<short>Keeps no-break space characters.</short>
|
|
</element>
|
|
<element name="TUTF8TrimFlag.u8tKeepControlCodes">
|
|
<short>Keeps control codes other than tabs and line breaks.</short>
|
|
</element>
|
|
|
|
<element name="TUTF8TrimFlags">
|
|
<short>
|
|
Stores values from the TUTF8TrimFlag enumeration.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>TUTF8TrimFlags</var> is a set type used to store values from the
|
|
<var>TUTF8TrimFlag</var> enumeration. TUTF8TrimFlags is the type passed in
|
|
arguments to the <var>UTF8Trim</var> function.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TUTF8TrimFlag"/>
|
|
<link id="UTF8Trim"/>
|
|
</seealso>
|
|
</element>
|
|
|
|
<element name="UTF8Trim">
|
|
<short>
|
|
Removes leading and trailing whitespace or control characters.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8Trim</var> removes spaces, tabs, line breaks and control characters
|
|
at both the start and the end of the UTF-8-encoded value in <var>s</var>. Use
|
|
<var>Flags</var> to delete at the start only or at the end only, or to to not
|
|
delete line breaks. Control characters are the Unicode sets C0 and C1, and
|
|
the left-to-right and right-to-left marks.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8Trim.Result">
|
|
<short>Trimmed values for the string.</short>
|
|
</element>
|
|
<element name="UTF8Trim.s">
|
|
<short>String with values to trim.</short>
|
|
</element>
|
|
<element name="UTF8Trim.Flags">
|
|
<short>Actions to perform in the function.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareStr">
|
|
<short>
|
|
Compares the UTF-8-encoded string values.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CompareStr</var> is a function used to compare the specified
|
|
UTF-8-encoded string values. The return value indicates the relative sort
|
|
order for the compared values, and includes:
|
|
</p>
|
|
<dl>
|
|
<dt>0</dt>
|
|
<dd>Values are the same</dd>
|
|
<dt><1</dt>
|
|
<dd>Value S1 comes before S2 in an alphabetic sort order</dd>
|
|
<dt>>1</dt>
|
|
<dd>Value S1 comes after S2 in an alphabetic sort order</dd>
|
|
</dl>
|
|
<p>
|
|
Internally, UTF8CompareStr calls <var>WideCompareText</var> using the values
|
|
in S1 and S2 converted to UTF-16 code points.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CompareStr.Result">
|
|
<short>Relative order for the compared values.</short>
|
|
</element>
|
|
<element name="UTF8CompareStr.S1">
|
|
<short>First value for the comparison.</short>
|
|
</element>
|
|
<element name="UTF8CompareStr.S2">
|
|
<short>Second value for the comparison.</short>
|
|
</element>
|
|
<element name="UTF8CompareStr.Count1">
|
|
<short>Length of the first value.</short>
|
|
</element>
|
|
<element name="UTF8CompareStr.Count2">
|
|
<short>Length of the second value.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareStrP">
|
|
<short>Compares the specified PChar values.</short>
|
|
<descr>
|
|
<p>
|
|
Calls UTF8CompareStr to get the return value for the function.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CompareStrP.Result">
|
|
<short>Relative order for the compared values.</short>
|
|
</element>
|
|
<element name="UTF8CompareStrP.S1">
|
|
<short>First PChar value for the comparison.</short>
|
|
</element>
|
|
<element name="UTF8CompareStrP.S2">
|
|
<short>Second PChar value for the comparison.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareText">
|
|
<short>
|
|
Case-insensitive comparison of two UTF-8-encoded values.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CompareText</var> is a function used to perform a case-insensitive
|
|
comparison between the specified UTF-8-encoded values. The return value
|
|
indicates the relative sort order for the compared values, and includes:
|
|
</p>
|
|
<dl>
|
|
<dt>0</dt>
|
|
<dd>Values are the same</dd>
|
|
<dt>< 0</dt>
|
|
<dd>Value S1 comes before S2 in an alphabetic sort order</dd>
|
|
<dt>> 0</dt>
|
|
<dd>Value S1 comes after S2 in an alphabetic sort order</dd>
|
|
</dl>
|
|
<p>
|
|
Internally, UTF8CompareText uses <var>WideCompareText</var> when multi-byte
|
|
codepoints are found in the compared values. This function guarantees proper
|
|
collation on all supported platforms.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CompareText.Result">
|
|
<short>Relative order for the compared values.</short>
|
|
</element>
|
|
<element name="UTF8CompareText.S1">
|
|
<short>First value for the comparison.</short>
|
|
</element>
|
|
<element name="UTF8CompareText.S2">
|
|
<short>Second value for the comparison.</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareTextP">
|
|
<short>
|
|
Performs a case-insensitive comparision for the specified UTF-8-encoded PChar
|
|
values.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Converts values in S1 and S2 to UTF-16 encoding, and calls WideCompareText to
|
|
get the return value for the case-insensitive comparison. The return value
|
|
contains the relative difference between the compared values. For instance:
|
|
</p>
|
|
<ul>
|
|
<li><0 when S1<S2.</li>
|
|
<li>0 when S1=S2.</li>
|
|
<li>>0 when S1>S2.</li>
|
|
</ul>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CompareTextP.Result">
|
|
<short>
|
|
Integer result for the case-insensitive comparison.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CompareTextP.S1">
|
|
<short>
|
|
PChar with the values used in the comparison.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CompareTextP.S2">
|
|
<short>
|
|
PChar with the values used in the comparison.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareLatinTextFast">
|
|
<short>
|
|
Deprecated. Use UTF8CompareText or AnsiCompareText instead.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CompareLatinTextFast</var> is like UTF8CompareText, but does not
|
|
return strict alphabetical order. The order is deterministic and good for
|
|
binary search and similar uses. It avoids the conversions from UTF-8 to UTF-16
|
|
needed to use WideCompareText.
|
|
</p>
|
|
<p>
|
|
UTF8CompareLatinTextFast optimizes the comparison of values using single-byte
|
|
encoding by converting uppercase characters to lowercase characters for the
|
|
comparison. Multi-byte portions (with character values larger than Decimal
|
|
127) are optimized to ignore leading bytes sequences common to both compared
|
|
values.
|
|
</p>
|
|
<p>
|
|
Otherwise, the routine falls back to AnsiCompareText to compare lowercase
|
|
ASCII values in S1 and S2.
|
|
</p>
|
|
<p>
|
|
The return value is a pointer to an Integer where the relative sort order for
|
|
the compared values is stored.
|
|
</p>
|
|
<dl>
|
|
<dt><0</dt>
|
|
<dd>S1 comes before S2 in the sort order.</dd>
|
|
<dt>>0</dt>
|
|
<dd>S1 comes after S2 in the sort order.</dd>
|
|
<dt>0 (zero)</dt>
|
|
<dd>S1 and S2 have the same value in the sort order.</dd>
|
|
</dl>
|
|
</descr>
|
|
<version>
|
|
Deprecated in LazUtils version 3.2 (Feb 2024).
|
|
</version>
|
|
<seealso>
|
|
<link id="UTF8CompareText"/>
|
|
<link id="#rtl.sysutils.AnsiCompareText">AnsiCompareText</link>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF8CompareLatinTextFast.Result">
|
|
<short>
|
|
Pointer to an Integer with the relative order for the compared values.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CompareLatinTextFast.S1">
|
|
<short>
|
|
UTF-8-encoded String value used in the comparison.
|
|
</short>
|
|
</element>
|
|
<element name="UTF8CompareLatinTextFast.S2">
|
|
<short>
|
|
UTF-8-encoded String value used in the comparison.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8CompareStrCollated">
|
|
<short>
|
|
Deprecated. Use UTF8CompareStr instead.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8CompareStrCollated</var> is used to compare two strings using
|
|
language-specific sorting. The return value contains the relative sort order
|
|
for the compared values, as defined for <var>UTF8CompareStr</var>.
|
|
</p>
|
|
</descr>
|
|
<version>
|
|
Deprecated in LazUtils version 3.2 (Feb 2024).
|
|
</version>
|
|
<seealso/>
|
|
</element>
|
|
<element name="UTF8CompareStrCollated.Result">
|
|
<short>Relative order for the compared values.</short>
|
|
</element>
|
|
<element name="UTF8CompareStrCollated.S1">
|
|
<short>First string for the comparison.</short>
|
|
</element>
|
|
<element name="UTF8CompareStrCollated.S2">
|
|
<short>Second string for the comparison.</short>
|
|
</element>
|
|
|
|
<element name="CompareStrListUTF8LowerCase">
|
|
<short>
|
|
Compares the specified lines of text in a TStringList.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>CompareStrListUTF8LowerCase</var> is an <var>Integer</var> function used
|
|
to compare the specified lines of text in the <var>TStringList</var>
|
|
argument. <var>Index1</var> and <var>Index2</var> contain the ordinal
|
|
positions for the respective lines of text. CompareStrListUTF8LowerCase calls
|
|
<var>UTF8CompareText</var> to perform a case-insensitive comparison between
|
|
the values.
|
|
</p>
|
|
<p>
|
|
The return value contains the relative sort order for the compared values, as
|
|
defined for <var>UTF8CompareText</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="UTF8CompareText"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="CompareStrListUTF8LowerCase.Result">
|
|
<short>Relative order for the compared values.</short>
|
|
</element>
|
|
<element name="CompareStrListUTF8LowerCase.List">
|
|
<short>TStringList with values for the comparison.</short>
|
|
</element>
|
|
<element name="CompareStrListUTF8LowerCase.Index1">
|
|
<short>Position of the first text line.</short>
|
|
</element>
|
|
<element name="CompareStrListUTF8LowerCase.Index2">
|
|
<short>Position of the second text line.</short>
|
|
</element>
|
|
|
|
<element name="TStringListUTF8Fast">
|
|
<short>
|
|
Implements a string list using fast ASCII comparison functions when its data is
|
|
pure ASCII.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
When data is Unicode, it switches to slower AnsiCompare functions. The switch
|
|
is managed by setting the UseLocale property/option and should not be changed
|
|
by the user.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="#rtl.classes.TStringList">TStringList</link>
|
|
</seealso>
|
|
</element>
|
|
|
|
<element name="TStringListUTF8Fast.InsertItem">
|
|
<short>
|
|
Ensures that the UseLocale property is enabled when a new line with non-ASCII
|
|
data is stored in the string list.
|
|
</short>
|
|
<descr/>
|
|
<seealso/>
|
|
</element>
|
|
<element name="TStringListUTF8Fast.InsertItem.Index">
|
|
<short>
|
|
Ordinal position in the string list where the value in S is stored.
|
|
</short>
|
|
</element>
|
|
<element name="TStringListUTF8Fast.InsertItem.S">
|
|
<short>
|
|
String value examined and stored in the method.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="TStringListUTF8Fast.Create">
|
|
<short>
|
|
Constructor for the class instance.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>Create</var> is the overridden constructor for the class instance. It
|
|
calls the inherited method on entry to set the default encoding and options
|
|
used in the class instance. Create ensures that the UseLocale property is set
|
|
to <b>False</b> to allow fast comparisons for ASCII data stored in the string
|
|
list.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TStringListUTF8Fast.InsertItem"/>
|
|
<link id="#rtl.classes.TStrings.UseLocale">TStrings.UseLocale</link>
|
|
<link id="#rtl.classes.TStrings.DefaultEncoding">TStrings.DefaultEncoding</link>
|
|
</seealso>
|
|
</element>
|
|
|
|
<element name="TConvertResult">
|
|
<short>
|
|
Indicates the result from UTF-8 <-> UTF-16 conversions.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>TConvertResult</var> is an enumeration type with values that indicate
|
|
the result from <var>ConvertUTF8ToUTF16</var> and
|
|
<var>ConvertUTF16ToUTF8</var> function calls.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="TConvertResult.trNoError">
|
|
<short>No error in the conversion.</short>
|
|
</element>
|
|
<element name="TConvertResult.trNullSrc">
|
|
<short>Source value is null.</short>
|
|
</element>
|
|
<element name="TConvertResult.trNullDest">
|
|
<short>Destination value is null.</short>
|
|
</element>
|
|
<element name="TConvertResult.trDestExhausted">
|
|
<short>Destination value is too small for the converted value.</short>
|
|
</element>
|
|
<element name="TConvertResult.trInvalidChar">
|
|
<short>An invalid encoding was found in the source value.</short>
|
|
</element>
|
|
<element name="TConvertResult.trUnfinishedChar">
|
|
<short>An unfinished encoding was found in the source value.</short>
|
|
</element>
|
|
|
|
<element name="TConvertOption">
|
|
<short>
|
|
Indicates options enabled during UTF-8 <-> UTF-16 conversions.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>TConvertOption</var> is an enumeration type with values that indicate
|
|
options enabled during UTF-8 <-> UTF-16 conversions.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="TConvertOption.toInvalidCharError">
|
|
<short>Stop on invalid source char and report error.</short>
|
|
</element>
|
|
<element name="TConvertOption.toInvalidCharToSymbol">
|
|
<short>Replace invalid source chars with '?'</short>
|
|
</element>
|
|
<element name="TConvertOption.toUnfinishedCharError">
|
|
<short>Stop on unfinished source char and report error.</short>
|
|
</element>
|
|
<element name="TConvertOption.toUnfinishedCharToSymbol">
|
|
<short>Replace unfinished source char with '?'</short>
|
|
</element>
|
|
|
|
<element name="TConvertOptions">
|
|
<short>
|
|
Stores values from the TConvertOption enumeration.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Stores values from the <var>TConvertOption</var> enumeration. Passed as an
|
|
argument to <var>ConvertUTF8ToUTF16</var> and <var>ConvertUTF16ToUTF8</var>.
|
|
</p>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TConvertOption"/>
|
|
<link id="ConvertUTF8ToUTF16"/>
|
|
<link id="ConvertUTF16ToUTF8"/>
|
|
</seealso>
|
|
</element>
|
|
|
|
<element name="ConvertUTF8ToUTF16">
|
|
<short>
|
|
Converts values from UTF-8 encoding to UTF-16 encoding.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>ConvertUTF8ToUTF16</var> is used to convert the specified UTF-8 encoded
|
|
string to UTF-16 encoded (system endian).
|
|
</p>
|
|
<p>
|
|
<var>Options</var> indicates the conversion options enabled in the function,
|
|
and can include the following values:
|
|
</p>
|
|
<dl>
|
|
<dt>toInvalidCharError</dt>
|
|
<dd>
|
|
Stop on invalid source char and report error
|
|
</dd>
|
|
<dt>toInvalidCharToSymbol</dt>
|
|
<dd>
|
|
Replace invalid source chars with '?'
|
|
</dd>
|
|
<dt>toUnfinishedCharError</dt>
|
|
<dd>
|
|
Stop on unfinished source char and report error
|
|
</dd>
|
|
<dt>toUnfinishedCharToSymbol</dt>
|
|
<dd>
|
|
Replace unfinished source char with '?'
|
|
</dd>
|
|
</dl>
|
|
<p>
|
|
The return value is a value from the <var>TConvertResult</var> enumeration,
|
|
including:
|
|
</p>
|
|
<dl>
|
|
<dt>
|
|
trNoError
|
|
</dt>
|
|
<dd>
|
|
The string was successfully converted without any error
|
|
</dd>
|
|
<dt>
|
|
trNullSrc
|
|
</dt>
|
|
<dd>
|
|
Pointer to source string is nil
|
|
</dd>
|
|
<dt>
|
|
trNullDest
|
|
</dt>
|
|
<dd>
|
|
Pointer to destination string is nil
|
|
</dd>
|
|
<dt>
|
|
trDestExhausted
|
|
</dt>
|
|
<dd>
|
|
Destination buffer size is not big enough to hold converted string
|
|
</dd>
|
|
<dt>
|
|
trInvalidChar
|
|
</dt>
|
|
<dd>
|
|
Invalid source char has occurred
|
|
</dd>
|
|
<dt>
|
|
trUnfinishedChar
|
|
</dt>
|
|
<dd>
|
|
Unfinished source char has occurred
|
|
</dd>
|
|
</dl>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.Result">
|
|
<short>Converted values from the function.</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.Dest">
|
|
<short>Pointer to destination string.</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.DestWideCharCount">
|
|
<short>Wide char count allocated in destination string.</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.Src">
|
|
<short>Pointer to source string.</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.SrcCharCount">
|
|
<short>Char count allocated in source string.</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.Options">
|
|
<short>
|
|
Conversion options, if none is set, both invalid and unfinished source chars
|
|
are skipped.
|
|
</short>
|
|
</element>
|
|
<element name="ConvertUTF8ToUTF16.ActualWideCharCount">
|
|
<short>Actual WideChar count used int he conversion.</short>
|
|
</element>
|
|
|
|
<element name="ConvertUTF16ToUTF8">
|
|
<short>Converts values from UTF-16 encoding to UTF-8 encoding.</short>
|
|
<descr>
|
|
<p>
|
|
Converts the specified UTF-16 encoded string (system endian) to its UTF-8
|
|
encoding.
|
|
</p>
|
|
<p>
|
|
<var>Options</var> indicates the conversion options enabled in the function,
|
|
and can include the following values:
|
|
</p>
|
|
<dl>
|
|
<dt>toInvalidCharError</dt>
|
|
<dd>
|
|
Stop on invalid source char and report error
|
|
</dd>
|
|
<dt>toInvalidCharToSymbol</dt>
|
|
<dd>
|
|
Replace invalid source chars with '?'
|
|
</dd>
|
|
<dt>toUnfinishedCharError</dt>
|
|
<dd>
|
|
Stop on unfinished source char and report error
|
|
</dd>
|
|
<dt>toUnfinishedCharToSymbol</dt>
|
|
<dd>
|
|
Replace unfinished source char with '?'
|
|
</dd>
|
|
</dl>
|
|
<p>
|
|
The return value is a value from the <var>TConvertResult</var> enumeration,
|
|
including:
|
|
</p>
|
|
<dl>
|
|
<dt>
|
|
trNoError
|
|
</dt>
|
|
<dd>
|
|
The string was successfully converted without any error
|
|
</dd>
|
|
<dt>
|
|
trNullSrc
|
|
</dt>
|
|
<dd>
|
|
Pointer to source string is nil
|
|
</dd>
|
|
<dt>
|
|
trNullDest
|
|
</dt>
|
|
<dd>
|
|
Pointer to destination string is nil
|
|
</dd>
|
|
<dt>
|
|
trDestExhausted
|
|
</dt>
|
|
<dd>
|
|
Destination buffer size is not big enough to hold converted string
|
|
</dd>
|
|
<dt>
|
|
trInvalidChar
|
|
</dt>
|
|
<dd>
|
|
Invalid source char has occurred
|
|
</dd>
|
|
<dt>
|
|
trUnfinishedChar
|
|
</dt>
|
|
<dd>
|
|
Unfinished source char has occurred
|
|
</dd>
|
|
</dl>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TConvertOptions"/>
|
|
<link id="TConvertOption"/>
|
|
<link id="TConvertResult"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.Result">
|
|
<short>Converted values from the function.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.Dest">
|
|
<short>Pointer to destination string.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.DestCharCount">
|
|
<short>Char count allocated in destination string.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.Src">
|
|
<short>Pointer to source string.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.SrcWideCharCount">
|
|
<short>Wide char count allocated in source string.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.Options">
|
|
<short>Conversion options, if none is set, both
|
|
invalid and unfinished source chars are skipped.</short>
|
|
</element>
|
|
<element name="ConvertUTF16ToUTF8.ActualCharCount">
|
|
<short>
|
|
Actual char count converted from source string to destination string.
|
|
</short>
|
|
</element>
|
|
|
|
<element name="UTF8ToUTF16">
|
|
<short>
|
|
Converts the UTF-8 encoded string to UTF-16 encoding (system endian).
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
Converts the UTF-8 encoded string to UTF-16 encoding (system endian).
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
|
|
<element name="UTF16ToUTF8">
|
|
<short>
|
|
Converts a UTF-16-encoded string (system endian) to UTF-8 encoding.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
UTF16ToUTF8 is a TConvertResult function used to convert the specified
|
|
UTF-16-encoded string (system endian) to UTF-8 encoding.
|
|
</p>
|
|
<p>
|
|
The return value is a <var>TConvertResult</var> enumeration value, and
|
|
includes:
|
|
</p>
|
|
<dl>
|
|
<dt>
|
|
trNoError
|
|
</dt>
|
|
<dd>
|
|
The string was successfully converted without any error
|
|
</dd>
|
|
<dt>
|
|
trNullSrc
|
|
</dt>
|
|
<dd>
|
|
Pointer to source string is Nil
|
|
</dd>
|
|
<dt>
|
|
trNullDest
|
|
</dt>
|
|
<dd>
|
|
Pointer to destination string is Nil
|
|
</dd>
|
|
<dt>
|
|
trDestExhausted
|
|
</dt>
|
|
<dd>
|
|
Destination buffer size is not big enough to hold converted string
|
|
</dd>
|
|
<dt>
|
|
trInvalidChar
|
|
</dt>
|
|
<dd>
|
|
Invalid source char has occurred
|
|
</dd>
|
|
<dt>
|
|
trUnfinishedChar
|
|
</dt>
|
|
<dd>
|
|
Unfinished source char has occurred
|
|
</dd>
|
|
</dl>
|
|
</descr>
|
|
<seealso>
|
|
<link id="TConvertResult"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="UTF16ToUTF8.Result">
|
|
<short>UTF-8-encoded string.</short>
|
|
</element>
|
|
<element name="UTF16ToUTF8.S">
|
|
<short>Source UTF-16 string (system endian).</short>
|
|
</element>
|
|
<element name="UTF16ToUTF8.P">
|
|
<short>Pointer to the Source UTF-16 string (system endian).</short>
|
|
</element>
|
|
<element name="UTF16ToUTF8.WideCnt">
|
|
<short>Number of WideChar values in the source string.</short>
|
|
</element>
|
|
|
|
<element name="LazGetLanguageIDs">
|
|
<short>
|
|
Deprecated. Use the GetLanguageID function from the
|
|
<file>translations.pas</file> unit instead.
|
|
</short>
|
|
<descr/>
|
|
<version>
|
|
Deprecated in LazUtils version 2.3.0.
|
|
</version>
|
|
<seealso>
|
|
<link id="#lazutils.translations.GetLanguageID"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="LazGetLanguageIDs.Lang"/>
|
|
<element name="LazGetLanguageIDs.FallbackLang"/>
|
|
|
|
<element name="LazGetShortLanguageID">
|
|
<short>
|
|
Deprecated. Use the GetLanguageID function from the
|
|
<file>translations.pas</file> unit instead.
|
|
</short>
|
|
<descr/>
|
|
<version>
|
|
Deprecated in LazUtils version 2.3.0.
|
|
</version>
|
|
<seealso>
|
|
<link id="#lazutils.translations.GetLanguageID"/>
|
|
</seealso>
|
|
</element>
|
|
<element name="LazGetShortLanguageID.Lang"/>
|
|
|
|
<element name="FPUpChars">
|
|
<short>
|
|
Contains uppercase characters for all values in the char type.
|
|
</short>
|
|
<descr>
|
|
<p>
|
|
<var>FPUpChars</var> is an array of char type and uses the Lower and Upper
|
|
bounds permitted for the char type. Values in FPUpChars are assigned in the
|
|
initialization section for the <file>lazutf8.pas</file> unit, and contains
|
|
the uppercase equivalent for all characters in the char type.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
|
|
<element name="UTF8GetStandardCodePage">
|
|
<short>Gets the default system code page for the wide string manager.</short>
|
|
<descr>
|
|
<p>
|
|
<var>UTF8GetStandardCodePage</var> is a <var>TSystemCodePage</var> function
|
|
used to get the default code page for strings in the Wide String manager.
|
|
UTF8GetStandardCodePage is implemented for Windows platforms that use a
|
|
UTF-8-enabled Run-time Library (RTL). It is assigned as the procedure used by
|
|
the wide string manager for the platform.
|
|
</p>
|
|
<p>
|
|
<var>stdcp</var> contains the <var>TStandardCodePageEnum</var> enumeration
|
|
value that identifies the default code page for the platform.
|
|
</p>
|
|
<p>
|
|
The return value is set to the <var>CP_UTF8</var> constant.
|
|
</p>
|
|
</descr>
|
|
<seealso/>
|
|
</element>
|
|
|
|
</module>
|
|
<!-- LazUTF8 -->
|
|
</package>
|
|
</fpdoc-descriptions>
|