From a56d94230fca12a4f93a4c20c35698941c5a8ddb Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Mon, 23 Feb 2015 22:48:11 +0000 Subject: [PATCH] + iosxwstr unit that uses CoreFoundation where possible for the widestring manager routines (in particular for locale-based routines, such as upper/ lowercase), and which falls back to cwstring for functionality not supported by CoreFoundation (such as converting an UTF-8 string with illegal sequences to a different encoding) o this unit is particularly useful on iOS 7 (and 8?), as Apple no longer includes command line locale information on that platform and hence locale-dependent libc routines only work with the "C" locale there) git-svn-id: trunk@29828 - --- .gitattributes | 1 + packages/iosxlocale/Makefile.fpc.fpcmake | 2 +- packages/iosxlocale/fpmake.pp | 1 + packages/iosxlocale/src/iosxwstr.pp | 665 +++++++++++++++++++++++ 4 files changed, 668 insertions(+), 1 deletion(-) create mode 100644 packages/iosxlocale/src/iosxwstr.pp diff --git a/.gitattributes b/.gitattributes index 9c310e6265..1eb0625ab8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4458,6 +4458,7 @@ packages/iosxlocale/Makefile.fpc svneol=native#text/plain packages/iosxlocale/Makefile.fpc.fpcmake svneol=native#text/plain packages/iosxlocale/fpmake.pp svneol=native#text/plain packages/iosxlocale/src/iosxlocale.pp svneol=native#text/plain +packages/iosxlocale/src/iosxwstr.pp svneol=native#text/plain packages/jni/Makefile svneol=native#text/plain packages/jni/Makefile.fpc svneol=native#text/plain packages/jni/fpmake.pp svneol=native#text/plain diff --git a/packages/iosxlocale/Makefile.fpc.fpcmake b/packages/iosxlocale/Makefile.fpc.fpcmake index 5f199c501c..ed461664d9 100644 --- a/packages/iosxlocale/Makefile.fpc.fpcmake +++ b/packages/iosxlocale/Makefile.fpc.fpcmake @@ -7,7 +7,7 @@ name=univint version=3.1.1 [target] -units=iosxlocale +units=iosxlocale iosxwstr [libs] libversion=2.0.0 diff --git a/packages/iosxlocale/fpmake.pp b/packages/iosxlocale/fpmake.pp index c21424bf30..0f656ecc6c 100644 --- a/packages/iosxlocale/fpmake.pp +++ b/packages/iosxlocale/fpmake.pp @@ -22,6 +22,7 @@ begin P.OSes:=[darwin,iphonesim]; T:=P.Targets.AddUnit('iosxlocale.pp'); + T:=P.Targets.AddUnit('iosxwstr.pp'); {$ifndef ALLPACKAGES} Run; diff --git a/packages/iosxlocale/src/iosxwstr.pp b/packages/iosxlocale/src/iosxwstr.pp new file mode 100644 index 0000000000..6b300dbe71 --- /dev/null +++ b/packages/iosxlocale/src/iosxwstr.pp @@ -0,0 +1,665 @@ +{ + This file is part of the Free Pascal run time library. + Copyright (c) 2015 by Jonas Maebe, + member of the Free Pascal development team. + + CoreFoundation-based wide string support + + See the file COPYING.FPC, included in this distribution, + for details about the copyright. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + **********************************************************************} + +{$mode objfpc} +{$implicitexceptions off} + +unit iosxwstr; + +interface + +{$linkframework CoreFoundation} + + procedure SetCFWidestringManager; + +implementation + + uses + unixcp, + { for access to libiconv-based routines } + cwstring, + MacTypes, + CFBase,CFString,CFStringEncodingExt,CFLocale; + + procedure fpc_rangeerror; [external name 'FPC_RANGEERROR']; + + var + CWStringWideStringManager: TUnicodeStringManager; + + procedure InitThread; + begin + { we don't need anything special, but since we may use cwstring itself, + call through to it } + CWStringWideStringManager.ThreadInitProc; + end; + + + procedure FiniThread; + begin + { we don't need anything special, but since we may use cwstring itself, + call through to it } + CWStringWideStringManager.ThreadFiniProc; + end; + + + function get_cfencoding_for_cp(cp: TSystemCodePage): CFStringEncoding; + var + defscp: TSystemCodePage; + begin + { translate placeholder code pages } + if (cp=CP_ACP) or + (cp=CP_OEMCP) then + cp:=DefaultSystemCodePage; + result:=CFStringConvertWindowsCodepageToEncoding(cp); + end; + + + procedure GetAnsiDataFromCFString(str: CFstringRef; cfcp: CFStringEncoding; estimated_length: SizeInt; var dest: RawByteString); + var + range: CFRange; + encodedlen,convertedchars: CFIndex; + strlen: SizeInt; + begin + { first rough estimate for the length } + setlength(dest,estimated_length); + { try to convert } + range.location:=0; + strlen:=CFStringGetLength(str); + range.length:=strlen; + convertedchars:=CFStringGetBytes(str,range,cfcp,ByteParameter('?'),false,UInt8Ptr(dest),estimated_length,encodedlen); + { failed -> bail out } + if convertedchars<0 then + begin + CFRelease(str); + runerror(231); + end + { if partially succesful, recreate with the required len } + else if convertedcharsstrlen then + begin + CFRelease(str); + runerror(231); + end; + end; + { truncate } + setlength(dest,encodedlen); + end; + + + function CFStringCreateFromAnsiData(data: pchar; len: SizeInt; cp: TSystemCodePage): CFStringRef; + var + strlen,encodedlen: CFIndex; + range: CFRange; + cfcp: CFStringEncoding; + begin + result:=nil; + { get source cf codepage } + cfcp:=get_cfencoding_for_cp(cp); + { unsupported encoding -> try libiconv instead } + if cfcp=kCFStringEncodingInvalidId then + exit; + { make a cfstring from the original data } + result:=CFStringCreateWithBytesNoCopy(nil,UnivPtr(data),len,cfcp,false,kCFAllocatorNull); + end; + + + function CFStringCreateFromAnsiDataOptionallyViaUnicodeString(data: pchar; len: SizeInt; cp: TSystemCodePage; out wtemp: UnicodeString): CFStringRef; + begin + result:=CFStringCreateFromAnsiData(data,len,cp); + { failed -> translate via libiconv and then create using the unicodestring + characters; since we use the no-copy constructor for performance + reasons, the unicodestring has to survive this routine } + if not assigned(result) then + begin + CWStringWideStringManager.Ansi2UnicodeMoveProc(data,cp,wtemp,len); + result:=CFStringCreateWithCharactersNoCopy(nil,UniCharPtr(wtemp),len,kCFAllocatorNull); + end; + end; + + + function CFStringCreateFromWideData(data: pwidechar; len: SizeInt): CFStringRef; inline; + begin + { make a cfstring from the utf-16 data } + result:=CFStringCreateWithCharactersNoCopy(nil,UniCharPtr(data),len,kCFAllocatorNull); + end; + + + function CFStringCreateFromWideDataOptionallyViaUUTF8String(data: pwidechar; len: SizeInt; out temp: RawByteString): CFStringRef; + begin + result:=CFStringCreateFromWideData(data,len); + { failed -> translate to UTF-8 via libiconv to filter out any + potentially invalid characters and then create using the unicodestring + characters; since we use the no-copy constructor for performance + reasons, the unicodestring has to survive this routine } + if not assigned(result) then + begin + CWStringWideStringManager.Unicode2AnsiMoveProc(data,temp,CP_UTF8,len); + result:=CFStringCreateWithBytesNoCopy(nil,UnivPtr(temp),length(temp),kCFStringEncodingUTF8,false,kCFAllocatorNull); + if not assigned(result) then + runerror(231) + end; + end; + + + procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt); + var + str: CFStringRef; + strlen,estimatedlen: CFIndex; + cfcp: CFStringEncoding; + begin + str:=nil; + { get target cf codepage } + cfcp:=get_cfencoding_for_cp(cp); + { unsupported encoding -> default move } + if cfcp<>kCFStringEncodingInvalidId then + { make a cfstring from the utf-16 data } + str:=CFStringCreateFromWideData(source,len); + { You cannot create a CFString for a sequence that contains an error :/ + We want to replace the error positions with '?' -> fall back to libiconv + } + if not assigned(str) then + begin + CWStringWideStringManager.Unicode2AnsiMoveProc(source,dest,cp,len); + exit; + end; + + GetAnsiDataFromCFString(str,cfcp,len*3,dest); + { set code page } + SetCodePage(dest,cp,false); + { release cfstring } + CFRelease(str); + end; + + + procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt); + var + str: CFStringRef; + strlen,encodedlen: CFIndex; + range: CFRange; + cfcp: CFStringEncoding; + begin + str:=CFStringCreateFromAnsiData(source,len,cp); + { You cannot create a CFString for a sequence that contains an error :/ + We want to replace the error positions with '?' -> fall back to libiconv + } + if not assigned(str) then + begin + CWStringWideStringManager.Ansi2UnicodeMoveProc(source,cp,dest,len); + exit; + end; + + { convert } + range.location:=0; + strlen:=CFStringGetLength(str); + range.length:=strlen; + setlength(dest,strlen); + CFStringGetCharacters(str,range,UniCharPtr(dest)); + { release cfstring } + CFRelease(str); + end; + + + function LowerWideString(const s : WideString) : WideString; + var + str: CFStringRef; + mstr: CFMutableStringRef; + range: CFRange; + encodedlen: CFIndex; + locale: CFLocaleRef; + temp: RawByteString; + begin + { empty string -> exit } + if s='' then + begin + result:=''; + exit; + end; + { create cfstring from the string data } + str:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s),length(s),temp); + { convert to mutable cfstring } + mstr:=CFStringCreateMutableCopy(nil,0,str); + { lowercase } + locale:=CFLocaleCopyCurrent; + CFStringLowercase(mstr,CFLocaleCopyCurrent); + CFRelease(locale); + { extract the data again } + range.location:=0; + range.length:=CFStringGetLength(CFStringRef(mstr)); + setlength(result,range.length); + CFStringGetCharacters(mstr,range,UniCharPtr(result)); + CFRelease(mstr); + CFRelease(str); + end; + + + function UpperWideString(const s : WideString) : WideString; + var + str: CFStringRef; + mstr: CFMutableStringRef; + range: CFRange; + encodedlen: CFIndex; + locale: CFLocaleRef; + temp: RawByteString; + begin + { empty string -> exit } + if s='' then + begin + result:=''; + exit; + end; + { create cfstring from the string data } + str:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s),length(s),temp); + { convert to mutable cfstring } + mstr:=CFStringCreateMutableCopy(nil,0,str); + { lowercase } + locale:=CFLocaleCopyCurrent; + CFStringUppercase(mstr,locale); + CFRelease(locale); + { extract the data again } + range.location:=0; + range.length:=CFStringGetLength(CFStringRef(mstr)); + setlength(result,range.length); + CFStringGetCharacters(mstr,range,UniCharPtr(result)); + CFRelease(mstr); + CFRelease(str); + end; + + + function UpperLowerAnsiString(const s: AnsiString; upper: boolean): AnsiString; + var + str: CFStringRef; + mstr: CFMutableStringRef; + cfcp: CFStringEncoding; + locale: CFLocaleRef; + wtemp: UnicodeString; + range: CFRange; + begin + if s='' then + begin + result:=''; + exit + end; + str:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(pchar(s),length(s),StringCodePage(s),wtemp); + { unsupported encoding for either CF or iconv -> return original string } + if not assigned(str) then + begin + result:=s; + exit; + end; + { convert to mutable cfstring } + mstr:=CFStringCreateMutableCopy(nil,0,str); + CFRelease(str); + { upper/lowercase } + locale:=CFLocaleCopyCurrent; + if upper then + CFStringUppercase(mstr,locale) + else + CFStringLowercase(mstr,locale); + CFRelease(locale); + { convert back to ansistring } + cfcp:=get_cfencoding_for_cp(StringCodePage(s)); + if cfcp<>kCFStringEncodingInvalidId then + begin + GetAnsiDataFromCFString(CFStringRef(mstr),cfcp,length(s),RawByteString(result)); + SetCodePage(RawByteString(result),StringCodePage(s),false); + end + else + begin + { unsupported encoding -> use libiconv instead via UTF-16 + intermediate } + range.location:=0; + range.length:=CFStringGetLength(mstr); + SetLength(wtemp,range.length); + CFStringGetCharacters(mstr,range,UniCharPtr(wtemp)); + CWStringWideStringManager.Wide2AnsiMoveProc(pwidechar(wtemp),RawByteString(result),StringCodePage(s),range.length); + end; + CFRelease(mstr); + end; + + + function LowerAnsiString(const s: AnsiString): AnsiString; + begin + result:=UpperLowerAnsiString(s,false); + end; + + + function UpperAnsiString(const s: AnsiString): AnsiString; + begin + result:=UpperLowerAnsiString(s,true); + end; + + + function CompareCFStrings(const s1, s2: CFStringRef; case_insensitive: boolean): longint; + var + flags: CFStringCompareFlags; + begin + flags:=0; + if case_insensitive then + flags:=flags or kCFCompareCaseInsensitive; + result:=CFStringCompare(s1,s2,flags) + end; + + + function CompareWideString(const s1, s2 : WideString) : PtrInt; + var + cfstr1, cfstr2: CFStringRef; + temp1, temp2: RawByteString; + begin + cfstr1:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s1),length(s1),temp1); + cfstr2:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s2),length(s2),temp2); + result:=CompareCFStrings(cfstr1,cfstr2,false); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + function CompareTextWideString(const s1, s2 : WideString): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + temp1, temp2: RawByteString; + begin + cfstr1:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s1),length(s1),temp1); + cfstr2:=CFStringCreateFromWideDataOptionallyViaUUTF8String(pwidechar(s2),length(s2),temp2); + result:=CompareCFStrings(cfstr1,cfstr2,true); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function InternalCodePointLength(const Str: PChar; cfcp: CFStringEncoding; maxlookahead: ptrint): PtrInt; + var + cfstr: CFStringRef; + begin + result:=0; + { try creating a string with the first 1, 2, ... bytes until we find a + valid one } + while (str[result]<>#0) and + (result skip } + if tmplen=-1 then + tmplen:=1; + inc(s,tmplen); + inc(result); + until s[0]=#0; + end; + + + function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt; + var + cfstr: CFStringRef; + cfcp: CFStringEncoding; + begin + result:=0; + if str[0]=#0 then + exit; + cfcp:=get_cfencoding_for_cp(DefaultSystemCodePage); + if cfcp=kCFStringEncodingInvalidId then + begin + { if we would return -1, then the caller would keep trying with + longer and longer sequences, but that wouldn't change anything } + result:=1; + exit + end; + result:=InternalCodePointLength(str,cfcp,maxlookahead); + end; + + + function CompareStrAnsiString(const s1, s2: ansistring): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(pchar(s1),length(s1),StringCodePage(s1),wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(pchar(s2),length(s2),StringCodePage(s2),wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,false); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function StrCompAnsi(s1,s2 : PChar): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s1,strlen(s1),DefaultSystemCodePage,wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s2,strlen(s2),DefaultSystemCodePage,wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,false); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function AnsiCompareText(const S1, S2: ansistring): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(pchar(s1),length(s1),DefaultSystemCodePage,wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(pchar(s2),length(s2),DefaultSystemCodePage,wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,true); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function AnsiStrIComp(S1, S2: PChar): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s1,strlen(s1),DefaultSystemCodePage,wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s2,strlen(s2),DefaultSystemCodePage,wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,true); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s1,MaxLen,StringCodePage(s1),wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s2,MaxLen,StringCodePage(s2),wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,false); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt; + var + cfstr1, cfstr2: CFStringRef; + wtemp1, wtemp2: UnicodeString; + begin + cfstr1:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s1,MaxLen,StringCodePage(s1),wtemp1); + cfstr2:=CFStringCreateFromAnsiDataOptionallyViaUnicodeString(s2,MaxLen,StringCodePage(s2),wtemp2); + result:=CompareCFStrings(cfstr1,cfstr2,true); + CFRelease(cfstr1); + CFRelease(cfstr2); + end; + + + procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar); + var + newlen: sizeint; + begin + newlen:=length(s); + if newlen>strlen(orgp) then + fpc_rangeerror; + p:=orgp; + if (newlen>0) then + move(s[1],p[0],newlen); + p[newlen]:=#0; + end; + + + function AnsiStrLower(Str: PChar): PChar; + var + temp: ansistring; + begin + temp:=loweransistring(str); + ansi2pchar(temp,str,result); + end; + + + function AnsiStrUpper(Str: PChar): PChar; + var + temp: ansistring; + begin + temp:=upperansistring(str); + ansi2pchar(temp,str,result); + end; + + + function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage; + var + langinfo: pchar; + begin + { don't use CFStringGetSystemEncoding, that one returns MacRoman on e.g. + an English system, which is definitely not what we want. Since there are + no "ansi" interfaces on OS X and all APIs support all characters, always + use UTF-8. Additionally,  Darwin always uses UTF-8 for file system + operations } + result:=CP_UTF8; + end; + + + procedure SetStdIOCodePage(var T: Text); inline; + begin + case TextRec(T).Mode of + fmInput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleInput); + fmOutput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleOutput); + end; + end; + + + procedure SetStdIOCodePages; inline; + begin + SetStdIOCodePage(Input); + SetStdIOCodePage(Output); + SetStdIOCodePage(ErrOutput); + SetStdIOCodePage(StdOut); + SetStdIOCodePage(StdErr); + end; + + + procedure SetCFWideStringManager; + var + CFWideStringManager : TUnicodeStringManager; + begin + GetUnicodeStringManager(CWStringWideStringManager); + CFWideStringManager:=CWStringWideStringManager; + with CFWideStringManager do + begin + Wide2AnsiMoveProc:=@Wide2AnsiMove; + Ansi2WideMoveProc:=@Ansi2WideMove; + + UpperWideStringProc:=@UpperWideString; + LowerWideStringProc:=@LowerWideString; + + CompareWideStringProc:=@CompareWideString; + CompareTextWideStringProc:=@CompareTextWideString; + + CharLengthPCharProc:=@CharLengthPChar; + CodePointLengthProc:=@CodePointLength; + + UpperAnsiStringProc:=@UpperAnsiString; + LowerAnsiStringProc:=@LowerAnsiString; + CompareStrAnsiStringProc:=@CompareStrAnsiString; + CompareTextAnsiStringProc:=@AnsiCompareText; + StrCompAnsiStringProc:=@StrCompAnsi; + StrICompAnsiStringProc:=@AnsiStrIComp; + StrLCompAnsiStringProc:=@AnsiStrLComp; + StrLICompAnsiStringProc:=@AnsiStrLIComp; + StrLowerAnsiStringProc:=@AnsiStrLower; + StrUpperAnsiStringProc:=@AnsiStrUpper; + ThreadInitProc:=@InitThread; + ThreadFiniProc:=@FiniThread; + { Unicode } + Unicode2AnsiMoveProc:=@Wide2AnsiMove; + Ansi2UnicodeMoveProc:=@Ansi2WideMove; + UpperUnicodeStringProc:=@UpperWideString; + LowerUnicodeStringProc:=@LowerWideString; + CompareUnicodeStringProc:=@CompareWideString; + CompareTextUnicodeStringProc:=@CompareTextWideString; + { CodePage } + GetStandardCodePageProc:=@GetStandardCodePage; + end; + SetUnicodeStringManager(CFWideStringManager); + end; + + +initialization + SetCFWideStringManager; + + { set the DefaultSystemCodePage } + DefaultSystemCodePage:=GetStandardCodePage(scpAnsi); + DefaultFileSystemCodePage:=GetStandardCodePage(scpFileSystemSingleByte); + DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage; + + SetStdIOCodePages; + + { don't call init, we don't need to do anything and the cwstring routine we + call through has already been called from the init code of cwstring itself + InitThread; + } +finalization + { don't call for the same reason as not calling FiniThread + FiniThread; + } + { restore previous widestring manager so that subsequent calls + into the widestring manager won't trigger the finalized functionality } + SetWideStringManager(CWStringWideStringManager); +end.