merge r17434 from cpstrnew branch by michael:

* Patch from Inoussa to fix constant strings with codepage

git-svn-id: trunk@19109 -
This commit is contained in:
paul 2011-09-17 13:19:59 +00:00
parent 4b9a082152
commit 1db610ecbd
20 changed files with 583 additions and 57 deletions

1
.gitattributes vendored
View File

@ -123,6 +123,7 @@ compiler/compinnr.inc svneol=native#text/plain
compiler/comprsrc.pas svneol=native#text/plain
compiler/constexp.pas svneol=native#text/x-pascal
compiler/cp1251.pas svneol=native#text/plain
compiler/cp1252.pp svneol=native#text/plain
compiler/cp437.pas svneol=native#text/plain
compiler/cp850.pas svneol=native#text/plain
compiler/cp866.pas svneol=native#text/plain

View File

@ -41,7 +41,8 @@ unit ccharset;
punicodemap = ^tunicodemap;
tunicodemap = record
cpname : string[20];
cpname : string[20];
cp : word;
map : punicodecharmapping;
lastchar : longint;
next : punicodemap;
@ -51,9 +52,10 @@ unit ccharset;
tcp2unicode = class(tcsconvert)
end;
function loadunicodemapping(const cpname,f : string) : punicodemap;
function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
procedure registermapping(p : punicodemap);
function getmap(const s : string) : punicodemap;
function getmap(const s : string) : punicodemap;
function getmap(cp : word) : punicodemap;
function mappingavailable(const s : string) : boolean;
function getunicode(c : char;p : punicodemap) : tunicodechar;
function getascii(c : tunicodechar;p : punicodemap) : string;
@ -63,7 +65,7 @@ unit ccharset;
var
mappings : punicodemap;
function loadunicodemapping(const cpname,f : string) : punicodemap;
function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
var
data : punicodecharmapping;
@ -158,6 +160,7 @@ unit ccharset;
new(p);
p^.lastchar:=lastchar;
p^.cpname:=cpname;
p^.cp:=cp;
p^.internalmap:=false;
p^.next:=nil;
p^.map:=data;
@ -199,7 +202,37 @@ unit ccharset;
hp:=hp^.next;
end;
getmap:=nil;
end;
end;
function getmap(cp : word) : punicodemap;
var
hp : punicodemap;
const
mapcache : word = 0;
mapcachep : punicodemap = nil;
begin
if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
begin
getmap:=mapcachep;
exit;
end;
hp:=mappings;
while assigned(hp) do
begin
if hp^.cp=cp then
begin
getmap:=hp;
mapcache:=cp;
mapcachep:=hp;
exit;
end;
hp:=hp^.next;
end;
getmap:=nil;
end;
function mappingavailable(const s : string) : boolean;

View File

@ -269,7 +269,8 @@ unit cp1251;
);
unicodemap : tunicodemap = (
cpname : 'cp1251';
cpname : 'cp1251';
cp : 1251;
map : @map;
lastchar : 255;
next : nil;

282
compiler/cp1252.pp Normal file
View File

@ -0,0 +1,282 @@
{ This is an automatically created file, so don't edit it }
unit CP1252;
interface
implementation
uses
charset;
const
map : array[0..255] of tunicodecharmapping = (
(unicode : 0; flag : umf_noinfo; reserved: 0),
(unicode : 1; flag : umf_noinfo; reserved: 0),
(unicode : 2; flag : umf_noinfo; reserved: 0),
(unicode : 3; flag : umf_noinfo; reserved: 0),
(unicode : 4; flag : umf_noinfo; reserved: 0),
(unicode : 5; flag : umf_noinfo; reserved: 0),
(unicode : 6; flag : umf_noinfo; reserved: 0),
(unicode : 7; flag : umf_noinfo; reserved: 0),
(unicode : 8; flag : umf_noinfo; reserved: 0),
(unicode : 9; flag : umf_noinfo; reserved: 0),
(unicode : 10; flag : umf_noinfo; reserved: 0),
(unicode : 11; flag : umf_noinfo; reserved: 0),
(unicode : 12; flag : umf_noinfo; reserved: 0),
(unicode : 13; flag : umf_noinfo; reserved: 0),
(unicode : 14; flag : umf_noinfo; reserved: 0),
(unicode : 15; flag : umf_noinfo; reserved: 0),
(unicode : 16; flag : umf_noinfo; reserved: 0),
(unicode : 17; flag : umf_noinfo; reserved: 0),
(unicode : 18; flag : umf_noinfo; reserved: 0),
(unicode : 19; flag : umf_noinfo; reserved: 0),
(unicode : 20; flag : umf_noinfo; reserved: 0),
(unicode : 21; flag : umf_noinfo; reserved: 0),
(unicode : 22; flag : umf_noinfo; reserved: 0),
(unicode : 23; flag : umf_noinfo; reserved: 0),
(unicode : 24; flag : umf_noinfo; reserved: 0),
(unicode : 25; flag : umf_noinfo; reserved: 0),
(unicode : 26; flag : umf_noinfo; reserved: 0),
(unicode : 27; flag : umf_noinfo; reserved: 0),
(unicode : 28; flag : umf_noinfo; reserved: 0),
(unicode : 29; flag : umf_noinfo; reserved: 0),
(unicode : 30; flag : umf_noinfo; reserved: 0),
(unicode : 31; flag : umf_noinfo; reserved: 0),
(unicode : 32; flag : umf_noinfo; reserved: 0),
(unicode : 33; flag : umf_noinfo; reserved: 0),
(unicode : 34; flag : umf_noinfo; reserved: 0),
(unicode : 35; flag : umf_noinfo; reserved: 0),
(unicode : 36; flag : umf_noinfo; reserved: 0),
(unicode : 37; flag : umf_noinfo; reserved: 0),
(unicode : 38; flag : umf_noinfo; reserved: 0),
(unicode : 39; flag : umf_noinfo; reserved: 0),
(unicode : 40; flag : umf_noinfo; reserved: 0),
(unicode : 41; flag : umf_noinfo; reserved: 0),
(unicode : 42; flag : umf_noinfo; reserved: 0),
(unicode : 43; flag : umf_noinfo; reserved: 0),
(unicode : 44; flag : umf_noinfo; reserved: 0),
(unicode : 45; flag : umf_noinfo; reserved: 0),
(unicode : 46; flag : umf_noinfo; reserved: 0),
(unicode : 47; flag : umf_noinfo; reserved: 0),
(unicode : 48; flag : umf_noinfo; reserved: 0),
(unicode : 49; flag : umf_noinfo; reserved: 0),
(unicode : 50; flag : umf_noinfo; reserved: 0),
(unicode : 51; flag : umf_noinfo; reserved: 0),
(unicode : 52; flag : umf_noinfo; reserved: 0),
(unicode : 53; flag : umf_noinfo; reserved: 0),
(unicode : 54; flag : umf_noinfo; reserved: 0),
(unicode : 55; flag : umf_noinfo; reserved: 0),
(unicode : 56; flag : umf_noinfo; reserved: 0),
(unicode : 57; flag : umf_noinfo; reserved: 0),
(unicode : 58; flag : umf_noinfo; reserved: 0),
(unicode : 59; flag : umf_noinfo; reserved: 0),
(unicode : 60; flag : umf_noinfo; reserved: 0),
(unicode : 61; flag : umf_noinfo; reserved: 0),
(unicode : 62; flag : umf_noinfo; reserved: 0),
(unicode : 63; flag : umf_noinfo; reserved: 0),
(unicode : 64; flag : umf_noinfo; reserved: 0),
(unicode : 65; flag : umf_noinfo; reserved: 0),
(unicode : 66; flag : umf_noinfo; reserved: 0),
(unicode : 67; flag : umf_noinfo; reserved: 0),
(unicode : 68; flag : umf_noinfo; reserved: 0),
(unicode : 69; flag : umf_noinfo; reserved: 0),
(unicode : 70; flag : umf_noinfo; reserved: 0),
(unicode : 71; flag : umf_noinfo; reserved: 0),
(unicode : 72; flag : umf_noinfo; reserved: 0),
(unicode : 73; flag : umf_noinfo; reserved: 0),
(unicode : 74; flag : umf_noinfo; reserved: 0),
(unicode : 75; flag : umf_noinfo; reserved: 0),
(unicode : 76; flag : umf_noinfo; reserved: 0),
(unicode : 77; flag : umf_noinfo; reserved: 0),
(unicode : 78; flag : umf_noinfo; reserved: 0),
(unicode : 79; flag : umf_noinfo; reserved: 0),
(unicode : 80; flag : umf_noinfo; reserved: 0),
(unicode : 81; flag : umf_noinfo; reserved: 0),
(unicode : 82; flag : umf_noinfo; reserved: 0),
(unicode : 83; flag : umf_noinfo; reserved: 0),
(unicode : 84; flag : umf_noinfo; reserved: 0),
(unicode : 85; flag : umf_noinfo; reserved: 0),
(unicode : 86; flag : umf_noinfo; reserved: 0),
(unicode : 87; flag : umf_noinfo; reserved: 0),
(unicode : 88; flag : umf_noinfo; reserved: 0),
(unicode : 89; flag : umf_noinfo; reserved: 0),
(unicode : 90; flag : umf_noinfo; reserved: 0),
(unicode : 91; flag : umf_noinfo; reserved: 0),
(unicode : 92; flag : umf_noinfo; reserved: 0),
(unicode : 93; flag : umf_noinfo; reserved: 0),
(unicode : 94; flag : umf_noinfo; reserved: 0),
(unicode : 95; flag : umf_noinfo; reserved: 0),
(unicode : 96; flag : umf_noinfo; reserved: 0),
(unicode : 97; flag : umf_noinfo; reserved: 0),
(unicode : 98; flag : umf_noinfo; reserved: 0),
(unicode : 99; flag : umf_noinfo; reserved: 0),
(unicode : 100; flag : umf_noinfo; reserved: 0),
(unicode : 101; flag : umf_noinfo; reserved: 0),
(unicode : 102; flag : umf_noinfo; reserved: 0),
(unicode : 103; flag : umf_noinfo; reserved: 0),
(unicode : 104; flag : umf_noinfo; reserved: 0),
(unicode : 105; flag : umf_noinfo; reserved: 0),
(unicode : 106; flag : umf_noinfo; reserved: 0),
(unicode : 107; flag : umf_noinfo; reserved: 0),
(unicode : 108; flag : umf_noinfo; reserved: 0),
(unicode : 109; flag : umf_noinfo; reserved: 0),
(unicode : 110; flag : umf_noinfo; reserved: 0),
(unicode : 111; flag : umf_noinfo; reserved: 0),
(unicode : 112; flag : umf_noinfo; reserved: 0),
(unicode : 113; flag : umf_noinfo; reserved: 0),
(unicode : 114; flag : umf_noinfo; reserved: 0),
(unicode : 115; flag : umf_noinfo; reserved: 0),
(unicode : 116; flag : umf_noinfo; reserved: 0),
(unicode : 117; flag : umf_noinfo; reserved: 0),
(unicode : 118; flag : umf_noinfo; reserved: 0),
(unicode : 119; flag : umf_noinfo; reserved: 0),
(unicode : 120; flag : umf_noinfo; reserved: 0),
(unicode : 121; flag : umf_noinfo; reserved: 0),
(unicode : 122; flag : umf_noinfo; reserved: 0),
(unicode : 123; flag : umf_noinfo; reserved: 0),
(unicode : 124; flag : umf_noinfo; reserved: 0),
(unicode : 125; flag : umf_noinfo; reserved: 0),
(unicode : 126; flag : umf_noinfo; reserved: 0),
(unicode : 127; flag : umf_noinfo; reserved: 0),
(unicode : 8364; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 8218; flag : umf_noinfo; reserved: 0),
(unicode : 402; flag : umf_noinfo; reserved: 0),
(unicode : 8222; flag : umf_noinfo; reserved: 0),
(unicode : 8230; flag : umf_noinfo; reserved: 0),
(unicode : 8224; flag : umf_noinfo; reserved: 0),
(unicode : 8225; flag : umf_noinfo; reserved: 0),
(unicode : 710; flag : umf_noinfo; reserved: 0),
(unicode : 8240; flag : umf_noinfo; reserved: 0),
(unicode : 352; flag : umf_noinfo; reserved: 0),
(unicode : 8249; flag : umf_noinfo; reserved: 0),
(unicode : 338; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 381; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 8216; flag : umf_noinfo; reserved: 0),
(unicode : 8217; flag : umf_noinfo; reserved: 0),
(unicode : 8220; flag : umf_noinfo; reserved: 0),
(unicode : 8221; flag : umf_noinfo; reserved: 0),
(unicode : 8226; flag : umf_noinfo; reserved: 0),
(unicode : 8211; flag : umf_noinfo; reserved: 0),
(unicode : 8212; flag : umf_noinfo; reserved: 0),
(unicode : 732; flag : umf_noinfo; reserved: 0),
(unicode : 8482; flag : umf_noinfo; reserved: 0),
(unicode : 353; flag : umf_noinfo; reserved: 0),
(unicode : 8250; flag : umf_noinfo; reserved: 0),
(unicode : 339; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 382; flag : umf_noinfo; reserved: 0),
(unicode : 376; flag : umf_noinfo; reserved: 0),
(unicode : 160; flag : umf_noinfo; reserved: 0),
(unicode : 161; flag : umf_noinfo; reserved: 0),
(unicode : 162; flag : umf_noinfo; reserved: 0),
(unicode : 163; flag : umf_noinfo; reserved: 0),
(unicode : 164; flag : umf_noinfo; reserved: 0),
(unicode : 165; flag : umf_noinfo; reserved: 0),
(unicode : 166; flag : umf_noinfo; reserved: 0),
(unicode : 167; flag : umf_noinfo; reserved: 0),
(unicode : 168; flag : umf_noinfo; reserved: 0),
(unicode : 169; flag : umf_noinfo; reserved: 0),
(unicode : 170; flag : umf_noinfo; reserved: 0),
(unicode : 171; flag : umf_noinfo; reserved: 0),
(unicode : 172; flag : umf_noinfo; reserved: 0),
(unicode : 173; flag : umf_noinfo; reserved: 0),
(unicode : 174; flag : umf_noinfo; reserved: 0),
(unicode : 175; flag : umf_noinfo; reserved: 0),
(unicode : 176; flag : umf_noinfo; reserved: 0),
(unicode : 177; flag : umf_noinfo; reserved: 0),
(unicode : 178; flag : umf_noinfo; reserved: 0),
(unicode : 179; flag : umf_noinfo; reserved: 0),
(unicode : 180; flag : umf_noinfo; reserved: 0),
(unicode : 181; flag : umf_noinfo; reserved: 0),
(unicode : 182; flag : umf_noinfo; reserved: 0),
(unicode : 183; flag : umf_noinfo; reserved: 0),
(unicode : 184; flag : umf_noinfo; reserved: 0),
(unicode : 185; flag : umf_noinfo; reserved: 0),
(unicode : 186; flag : umf_noinfo; reserved: 0),
(unicode : 187; flag : umf_noinfo; reserved: 0),
(unicode : 188; flag : umf_noinfo; reserved: 0),
(unicode : 189; flag : umf_noinfo; reserved: 0),
(unicode : 190; flag : umf_noinfo; reserved: 0),
(unicode : 191; flag : umf_noinfo; reserved: 0),
(unicode : 192; flag : umf_noinfo; reserved: 0),
(unicode : 193; flag : umf_noinfo; reserved: 0),
(unicode : 194; flag : umf_noinfo; reserved: 0),
(unicode : 195; flag : umf_noinfo; reserved: 0),
(unicode : 196; flag : umf_noinfo; reserved: 0),
(unicode : 197; flag : umf_noinfo; reserved: 0),
(unicode : 198; flag : umf_noinfo; reserved: 0),
(unicode : 199; flag : umf_noinfo; reserved: 0),
(unicode : 200; flag : umf_noinfo; reserved: 0),
(unicode : 201; flag : umf_noinfo; reserved: 0),
(unicode : 202; flag : umf_noinfo; reserved: 0),
(unicode : 203; flag : umf_noinfo; reserved: 0),
(unicode : 204; flag : umf_noinfo; reserved: 0),
(unicode : 205; flag : umf_noinfo; reserved: 0),
(unicode : 206; flag : umf_noinfo; reserved: 0),
(unicode : 207; flag : umf_noinfo; reserved: 0),
(unicode : 208; flag : umf_noinfo; reserved: 0),
(unicode : 209; flag : umf_noinfo; reserved: 0),
(unicode : 210; flag : umf_noinfo; reserved: 0),
(unicode : 211; flag : umf_noinfo; reserved: 0),
(unicode : 212; flag : umf_noinfo; reserved: 0),
(unicode : 213; flag : umf_noinfo; reserved: 0),
(unicode : 214; flag : umf_noinfo; reserved: 0),
(unicode : 215; flag : umf_noinfo; reserved: 0),
(unicode : 216; flag : umf_noinfo; reserved: 0),
(unicode : 217; flag : umf_noinfo; reserved: 0),
(unicode : 218; flag : umf_noinfo; reserved: 0),
(unicode : 219; flag : umf_noinfo; reserved: 0),
(unicode : 220; flag : umf_noinfo; reserved: 0),
(unicode : 221; flag : umf_noinfo; reserved: 0),
(unicode : 222; flag : umf_noinfo; reserved: 0),
(unicode : 223; flag : umf_noinfo; reserved: 0),
(unicode : 224; flag : umf_noinfo; reserved: 0),
(unicode : 225; flag : umf_noinfo; reserved: 0),
(unicode : 226; flag : umf_noinfo; reserved: 0),
(unicode : 227; flag : umf_noinfo; reserved: 0),
(unicode : 228; flag : umf_noinfo; reserved: 0),
(unicode : 229; flag : umf_noinfo; reserved: 0),
(unicode : 230; flag : umf_noinfo; reserved: 0),
(unicode : 231; flag : umf_noinfo; reserved: 0),
(unicode : 232; flag : umf_noinfo; reserved: 0),
(unicode : 233; flag : umf_noinfo; reserved: 0),
(unicode : 234; flag : umf_noinfo; reserved: 0),
(unicode : 235; flag : umf_noinfo; reserved: 0),
(unicode : 236; flag : umf_noinfo; reserved: 0),
(unicode : 237; flag : umf_noinfo; reserved: 0),
(unicode : 238; flag : umf_noinfo; reserved: 0),
(unicode : 239; flag : umf_noinfo; reserved: 0),
(unicode : 240; flag : umf_noinfo; reserved: 0),
(unicode : 241; flag : umf_noinfo; reserved: 0),
(unicode : 242; flag : umf_noinfo; reserved: 0),
(unicode : 243; flag : umf_noinfo; reserved: 0),
(unicode : 244; flag : umf_noinfo; reserved: 0),
(unicode : 245; flag : umf_noinfo; reserved: 0),
(unicode : 246; flag : umf_noinfo; reserved: 0),
(unicode : 247; flag : umf_noinfo; reserved: 0),
(unicode : 248; flag : umf_noinfo; reserved: 0),
(unicode : 249; flag : umf_noinfo; reserved: 0),
(unicode : 250; flag : umf_noinfo; reserved: 0),
(unicode : 251; flag : umf_noinfo; reserved: 0),
(unicode : 252; flag : umf_noinfo; reserved: 0),
(unicode : 253; flag : umf_noinfo; reserved: 0),
(unicode : 254; flag : umf_noinfo; reserved: 0),
(unicode : 255; flag : umf_noinfo; reserved: 0)
);
unicodemap : tunicodemap = (
cpname : 'CP1252';
cp : 1252;
map : @map;
lastchar : 255;
next : nil;
internalmap : true
);
begin
registermapping(@unicodemap)
end.

View File

@ -270,6 +270,7 @@ unit cp437;
unicodemap : tunicodemap = (
cpname : 'cp437';
cp : 437;
map : @map[0];
lastchar : 255;
next : nil;

View File

@ -269,7 +269,8 @@ unit cp850;
);
unicodemap : tunicodemap = (
cpname : 'cp850';
cpname : 'cp850';
cp : 850;
map : @map[0];
lastchar : 255;
next : nil;

View File

@ -269,7 +269,8 @@ unit cp866;
);
unicodemap : tunicodemap = (
cpname : 'cp866';
cpname : 'cp866';
cp : 866;
map : @map;
lastchar : 255;
next : nil;

View File

@ -269,8 +269,9 @@ unit cp8859_1;
);
unicodemap : tunicodemap = (
cpname : '8859-1';
map : @map[0];
cpname : '8859-1';
cp : 28591;
map : @map[0];
lastchar : 255;
next : nil;
internalmap : true

View File

@ -269,7 +269,8 @@ unit cp8859_5;
);
unicodemap : tunicodemap = (
cpname : '8859-5';
cpname : '8859-5';
cp : 28595;
map : @map;
lastchar : 255;
next : nil;

View File

@ -104,6 +104,7 @@ interface
{$endif}
CP_UTF8 = 65001;
CP_UTF16 = 1200;
CP_NONE = 65535;
type

View File

@ -62,6 +62,7 @@ interface
function typecheck_cord_to_pointer : tnode;
function typecheck_chararray_to_string : tnode;
function typecheck_string_to_chararray : tnode;
function typecheck_string_to_string : tnode;
function typecheck_char_to_string : tnode;
function typecheck_char_to_chararray : tnode;
function typecheck_int_to_real : tnode;
@ -1030,11 +1031,11 @@ implementation
{ we can't do widechar to ansichar conversions at compile time, since }
{ this maps all non-ascii chars to '?' -> loses information }
if (left.nodetype=ordconstn) and
if (left.nodetype=ordconstn) {and
((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
(torddef(left.resultdef).ordtype=uchar) or
{ widechar >=128 is destroyed }
(tordconstnode(left).value.uvalue<128)) then
(tordconstnode(left).value.uvalue<128))} then
begin
if (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) then
begin
@ -1112,6 +1113,36 @@ implementation
end;
end;
function ttypeconvnode.typecheck_string_to_string : tnode;
begin
result:=nil;
if (left.nodetype=stringconstn) and
(tstringdef(resultdef).stringtype in [st_ansistring,st_shortstring]) and
(tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) then
begin
tstringconstnode(left).changestringtype(resultdef);
Result:=left;
left:=nil;
end
else if (tstringdef(resultdef).stringtype=st_ansistring) and
(tstringdef(left.resultdef).stringtype=st_ansistring) and
(tstringdef(resultdef).encoding<>tstringdef(left.resultdef).encoding) then
begin
result:=ccallnode.createinternres(
'fpc_ansistr_to_ansistr',
ccallparanode.create(
cordconstnode.create(
tstringdef(resultdef).encoding,
u16inttype,
true
),
ccallparanode.create(left,nil)
),
resultdef
);
left:=nil;
end;
end;
function ttypeconvnode.typecheck_char_to_chararray : tnode;
begin
@ -1133,11 +1164,12 @@ implementation
hp : tordconstnode;
begin
result:=nil;
if (left.nodetype=ordconstn) and
if (left.nodetype=ordconstn)
{and
((torddef(resultdef).ordtype<>uchar) or
(torddef(left.resultdef).ordtype<>uwidechar) or
{ >= 128 is replaced by '?' currently -> loses information }
(tordconstnode(left).value.uvalue<128)) then
(tordconstnode(left).value.uvalue<128))} then
begin
if (torddef(resultdef).ordtype=uchar) and
(torddef(left.resultdef).ordtype=uwidechar) then
@ -1677,7 +1709,7 @@ implementation
{none} nil,
{equal} nil,
{not_possible} nil,
{ string_2_string } nil,
{ string_2_string } @ttypeconvnode.typecheck_string_to_string,
{ char_2_string } @ttypeconvnode.typecheck_char_to_string,
{ char_2_chararray } @ttypeconvnode.typecheck_char_to_chararray,
{ pchar_2_string } @ttypeconvnode.typecheck_pchar_to_string,
@ -2205,9 +2237,9 @@ implementation
(
((not is_widechararray(left.resultdef) and
not is_wide_or_unicode_string(left.resultdef)) or
(tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
(tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) {or
{ non-ascii chars would be replaced with '?' -> loses info }
not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str)))
not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str))})
) then
begin
tstringconstnode(left).changestringtype(resultdef);

View File

@ -982,6 +982,9 @@ implementation
var
pw : pcompilerwidestring;
pc : pchar;
cp1 : tstringencoding;
cp2 : tstringencoding;
l,l2 : longint;
begin
if def.typ<>stringdef then
internalerror(200510011);
@ -999,11 +1002,40 @@ implementation
if (cst_type in [cst_widestring,cst_unicodestring]) and
not(tstringdef(def).stringtype in [st_widestring,st_unicodestring]) then
begin
pw:=pcompilerwidestring(value_str);
getmem(pc,getlengthwidestring(pw)+1);
unicode2ascii(pw,pc);
donewidestring(pw);
value_str:=pc;
if (tstringdef(def).encoding=CP_UTF8) then
begin
pw:=pcompilerwidestring(value_str);
l:=(getlengthwidestring(pw)*4)+1;
getmem(pc,l);
l2:=UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),getlengthwidestring(pw));
if (l<>l2) then
begin
ReAllocMem(pc,l2);
len:=l2;
end;
donewidestring(pw);
value_str:=pc;
end
else
begin
pw:=pcompilerwidestring(value_str);
getmem(pc,getlengthwidestring(pw)+1);
unicode2ascii(pw,pc,tstringdef(def).encoding);
donewidestring(pw);
value_str:=pc;
end;
end
else
if (tstringdef(def).stringtype = st_ansistring) and
not(cst_type in [cst_widestring,cst_unicodestring]) then
begin
cp1:=tstringdef(def).encoding;
if (cst_type = cst_ansistring) then
cp2:=tstringdef(resultdef).encoding
else if (cst_type in [cst_shortstring,cst_conststring,cst_longstring]) then
cp2:=codepagebyname(current_settings.sourcecodepage);
if cpavailable(cp1) and cpavailable(cp2) then
changecodepage(value_str,len,cp1,value_str,cp2);
end;
cst_type:=st2cst[tstringdef(def).stringtype];
resultdef:=def;

View File

@ -903,9 +903,15 @@ begin
end;
'm' :
begin
unicodemapping:=loadunicodemapping(More,More+'.txt');
if assigned(unicodemapping) then
registermapping(unicodemapping)
s:=ExtractFileDir(more);
if TryStrToInt(ExtractFileName(more),j) then
begin
unicodemapping:=loadunicodemapping(More,More+'.txt',j);
if assigned(unicodemapping) then
registermapping(unicodemapping)
else
IllegalPara(opt);
end
else
IllegalPara(opt);
end;

View File

@ -176,6 +176,24 @@ implementation
p:=ctypeconvnode.create(p,cwidechartype);
do_typecheckpass(p);
end;
end
else
begin
if is_char(casedef) and is_widechar(p.resultdef) then
begin
if (p.nodetype=ordconstn) then
begin
p:=ctypeconvnode.create(p,cchartype);
do_typecheckpass(p);
end
else if (p.nodetype=rangen) then
begin
trangenode(p).left:=ctypeconvnode.create(trangenode(p).left,cchartype);
trangenode(p).right:=ctypeconvnode.create(trangenode(p).right,cchartype);
do_typecheckpass(trangenode(p).left);
do_typecheckpass(trangenode(p).right);
end;
end;
end;
hl1:=0;
hl2:=0;

View File

@ -231,6 +231,8 @@ implementation
end;
uchar :
begin
if is_constwidecharnode(n) then
inserttypeconv(n,cchartype);
if is_constcharnode(n) or
((m_delphi in current_settings.modeswitches) and
is_constwidecharnode(n) and

View File

@ -4171,6 +4171,19 @@ In case not, the value returned can be arbitrary.
break;
end;
until false;
//------------------
{ convert existing string to an utf-8 string }
if (not iswidestring) and
(current_settings.sourcecodepage<>default_settings.sourcecodepage) then
begin
if len>0 then
ascii2unicode(@cstringpattern[1],len,patternw)
else
ascii2unicode(nil,len,patternw);
iswidestring:=true;
len:=0;
end;
//-------------------
{ strings with length 1 become const chars }
if iswidestring then
begin

View File

@ -53,15 +53,21 @@ unit widestr;
function asciichar2unicode(c : char) : tcompilerwidechar;
function unicode2asciichar(c : tcompilerwidechar) : char;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
procedure unicode2ascii(r : pcompilerwidestring;p : pchar);
procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
function hasnonasciichars(const p: pcompilerwidestring): boolean;
function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
function cpavailable(const s : string) : boolean;
function cpavailable(cp : word) : boolean;
procedure changecodepage(
s : pchar; l : SizeInt; scp : tstringencoding;
d : pchar; dcp : tstringencoding
);
function codepagebyname(const s : string) : tstringencoding;
implementation
uses
cp8859_1,cp850,cp437,
cp8859_1,cp850,cp437,cp1252,
{ cyrillic code pages }
cp1251,cp866,cp8859_5,
globals,cutils;
@ -173,11 +179,14 @@ unit widestr;
end;
function unicode2asciichar(c : tcompilerwidechar) : char;
begin
{begin
if word(c)<128 then
unicode2asciichar:=char(word(c))
else
unicode2asciichar:='?';
end;}
begin
Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
end;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
@ -211,22 +220,28 @@ unit widestr;
end;
end;
procedure unicode2ascii(r : pcompilerwidestring;p:pchar);
(*
procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
var
m : punicodemap;
i : longint;
m : punicodemap;
source : tcompilerwidecharptr;
dest : pchar;
i : longint;
begin
m:=getmap(current_settings.sourcecodepage);
{ should be a very good estimation :) }
setlengthwidestring(r,length(s));
if (cp = 0) or (cp=CP_NONE) then
m:=getmap(current_settings.sourcecodepage)
else
m:=getmap(cp);
// !!!! MBCS
for i:=1 to length(s) do
begin
end;
source:=tcompilerwidecharptr(r^.data);
dest:=p;
for i:=1 to r^.len do
begin
dest^ := getascii(source^,m)[1];
inc(dest);
inc(source);
end;
end;
*)
(*
var
source : tcompilerwidecharptr;
dest : pchar;
@ -247,7 +262,7 @@ unit widestr;
inc(source);
end;
end;
*)
function hasnonasciichars(const p: pcompilerwidestring): boolean;
var
@ -269,6 +284,43 @@ unit widestr;
function cpavailable(const s : string) : boolean;
begin
cpavailable:=mappingavailable(lower(s));
end;
function cpavailable(cp : word) : boolean;
begin
cpavailable:=mappingavailable(cp);
end;
procedure changecodepage(
s : pchar; l : SizeInt; scp : tstringencoding;
d : pchar; dcp : tstringencoding
);
var
ms, md : punicodemap;
source : pchar;
dest : pchar;
i : longint;
begin
ms:=getmap(scp);
md:=getmap(dcp);
source:=s;
dest:=d;
for i:=1 to l do
begin
dest^ := getascii(getunicode(source^,ms),md)[1];
inc(dest);
inc(source);
end;
end;
function codepagebyname(const s : string) : tstringencoding;
var
p : punicodemap;
begin
Result:=0;
p:=getmap(s);
if (p<>nil) then
Result:=p^.cp;
end;
end.

View File

@ -39,6 +39,7 @@ unit charset;
punicodemap = ^tunicodemap;
tunicodemap = record
cpname : string[20];
cp : word;
map : punicodecharmapping;
lastchar : longint;
next : punicodemap;
@ -48,10 +49,12 @@ unit charset;
tcp2unicode = class(tcsconvert)
end;
function loadunicodemapping(const cpname,f : string) : punicodemap;
function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
procedure registermapping(p : punicodemap);
function getmap(const s : string) : punicodemap;
function getmap(const s : string) : punicodemap;
function getmap(cp : word) : punicodemap;
function mappingavailable(const s : string) : boolean;
function mappingavailable(cp :word) : boolean;
function getunicode(c : char;p : punicodemap) : tunicodechar;
function getascii(c : tunicodechar;p : punicodemap) : string;
@ -60,7 +63,7 @@ unit charset;
var
mappings : punicodemap;
function loadunicodemapping(const cpname,f : string) : punicodemap;
function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
var
data : punicodecharmapping;
@ -155,6 +158,7 @@ unit charset;
new(p);
p^.lastchar:=lastchar;
p^.cpname:=cpname;
p^.cp:=cp;
p^.internalmap:=false;
p^.next:=nil;
p^.map:=data;
@ -196,6 +200,36 @@ unit charset;
hp:=hp^.next;
end;
getmap:=nil;
end;////////
function getmap(cp : word) : punicodemap;
var
hp : punicodemap;
const
mapcache : word = 0;
mapcachep : punicodemap = nil;
begin
if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
begin
getmap:=mapcachep;
exit;
end;
hp:=mappings;
while assigned(hp) do
begin
if hp^.cp=cp then
begin
getmap:=hp;
mapcache:=cp;
mapcachep:=hp;
exit;
end;
hp:=hp^.next;
end;
getmap:=nil;
end;
function mappingavailable(const s : string) : boolean;
@ -204,6 +238,12 @@ unit charset;
mappingavailable:=getmap(s)<>nil;
end;
function mappingavailable(cp : word) : boolean;
begin
mappingavailable:=getmap(cp)<>nil;
end;
function getunicode(c : char;p : punicodemap) : tunicodechar;
begin
@ -219,8 +259,8 @@ unit charset;
i : longint;
begin
{ at least map to space }
getascii:=#32;
{ at least map to '?' }
getascii:=#63;
for i:=0 to p^.lastchar do
if p^.map[i].unicode=c then
begin

View File

@ -23,9 +23,8 @@
a punicodechar that points to :
@-8 : SizeInt for reference count;
@-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
Windows COM BSTR.
@-4 : SizeInt for size; size=number of chars. Multiply with
sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
@ : String + Terminating #0;
Punicodechar(Unicodestring) is a valid typecast.
So WS[i] is converted to the address @WS+i-1.
@ -810,7 +809,9 @@ var
begin
{$ifndef FPC_HAS_CPSTRING}
cp:=$ffff;
{$endif FPC_HAS_CPSTRING}
{$endif FPC_HAS_CPSTRING}
if cp=$ffff then
cp:=DefaultSystemCodePage;
widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
end;
@ -1552,10 +1553,10 @@ begin
exit;
if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
begin
L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
SNew:=NewUnicodeString (L);
Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
pointer(S):=SNew;
pointer(result):=SNew;

View File

@ -20,8 +20,9 @@ program creumap;
procedure doerror;
begin
writeln('Usage: creumap <cpname>');
writeln('A mapping file called <cpname>.txt must be present');
writeln('Usage: creumap <cpname> <cpnumber>');
writeln('cpname : A mapping file called <cpname>.txt must be present');
writeln('cpnumber : the code page number');
halt(1);
end;
@ -29,11 +30,16 @@ program creumap;
p : punicodemap;
i : longint;
t : text;
e : word;
begin
if paramcount<>1 then
if paramcount<>2 then
doerror;
p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt');
Val(paramstr(2),i,e);
if e<>0 then
doerror;
p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt',i);
if p=nil then
doerror;
assign(t,paramstr(1)+'.pp');
@ -69,6 +75,7 @@ begin
writeln(t);
writeln(t,' unicodemap : tunicodemap = (');
writeln(t,' cpname : ''',p^.cpname,''';');
writeln(t,' cp : ',p^.cp,';');
writeln(t,' map : @map;');
writeln(t,' lastchar : ',p^.lastchar,';');
writeln(t,' next : nil;');