merge r17434 from cpstrnew branch by michael:

* Patch from Inoussa to fix constant strings with codepage

git-svn-id: trunk@19109 -
This commit is contained in:
paul 2011-09-17 13:19:59 +00:00
parent 4b9a082152
commit 1db610ecbd
20 changed files with 583 additions and 57 deletions

1
.gitattributes vendored
View File

@ -123,6 +123,7 @@ compiler/compinnr.inc svneol=native#text/plain
compiler/comprsrc.pas svneol=native#text/plain compiler/comprsrc.pas svneol=native#text/plain
compiler/constexp.pas svneol=native#text/x-pascal compiler/constexp.pas svneol=native#text/x-pascal
compiler/cp1251.pas svneol=native#text/plain compiler/cp1251.pas svneol=native#text/plain
compiler/cp1252.pp svneol=native#text/plain
compiler/cp437.pas svneol=native#text/plain compiler/cp437.pas svneol=native#text/plain
compiler/cp850.pas svneol=native#text/plain compiler/cp850.pas svneol=native#text/plain
compiler/cp866.pas svneol=native#text/plain compiler/cp866.pas svneol=native#text/plain

View File

@ -42,6 +42,7 @@ unit ccharset;
punicodemap = ^tunicodemap; punicodemap = ^tunicodemap;
tunicodemap = record tunicodemap = record
cpname : string[20]; cpname : string[20];
cp : word;
map : punicodecharmapping; map : punicodecharmapping;
lastchar : longint; lastchar : longint;
next : punicodemap; next : punicodemap;
@ -51,9 +52,10 @@ unit ccharset;
tcp2unicode = class(tcsconvert) tcp2unicode = class(tcsconvert)
end; end;
function loadunicodemapping(const cpname,f : string) : punicodemap; function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
procedure registermapping(p : punicodemap); procedure registermapping(p : punicodemap);
function getmap(const s : string) : punicodemap; function getmap(const s : string) : punicodemap;
function getmap(cp : word) : punicodemap;
function mappingavailable(const s : string) : boolean; function mappingavailable(const s : string) : boolean;
function getunicode(c : char;p : punicodemap) : tunicodechar; function getunicode(c : char;p : punicodemap) : tunicodechar;
function getascii(c : tunicodechar;p : punicodemap) : string; function getascii(c : tunicodechar;p : punicodemap) : string;
@ -63,7 +65,7 @@ unit ccharset;
var var
mappings : punicodemap; mappings : punicodemap;
function loadunicodemapping(const cpname,f : string) : punicodemap; function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
var var
data : punicodecharmapping; data : punicodecharmapping;
@ -158,6 +160,7 @@ unit ccharset;
new(p); new(p);
p^.lastchar:=lastchar; p^.lastchar:=lastchar;
p^.cpname:=cpname; p^.cpname:=cpname;
p^.cp:=cp;
p^.internalmap:=false; p^.internalmap:=false;
p^.next:=nil; p^.next:=nil;
p^.map:=data; p^.map:=data;
@ -201,6 +204,36 @@ unit ccharset;
getmap:=nil; getmap:=nil;
end; end;
function getmap(cp : word) : punicodemap;
var
hp : punicodemap;
const
mapcache : word = 0;
mapcachep : punicodemap = nil;
begin
if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
begin
getmap:=mapcachep;
exit;
end;
hp:=mappings;
while assigned(hp) do
begin
if hp^.cp=cp then
begin
getmap:=hp;
mapcache:=cp;
mapcachep:=hp;
exit;
end;
hp:=hp^.next;
end;
getmap:=nil;
end;
function mappingavailable(const s : string) : boolean; function mappingavailable(const s : string) : boolean;
begin begin

View File

@ -270,6 +270,7 @@ unit cp1251;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : 'cp1251'; cpname : 'cp1251';
cp : 1251;
map : @map; map : @map;
lastchar : 255; lastchar : 255;
next : nil; next : nil;

282
compiler/cp1252.pp Normal file
View File

@ -0,0 +1,282 @@
{ This is an automatically created file, so don't edit it }
unit CP1252;
interface
implementation
uses
charset;
const
map : array[0..255] of tunicodecharmapping = (
(unicode : 0; flag : umf_noinfo; reserved: 0),
(unicode : 1; flag : umf_noinfo; reserved: 0),
(unicode : 2; flag : umf_noinfo; reserved: 0),
(unicode : 3; flag : umf_noinfo; reserved: 0),
(unicode : 4; flag : umf_noinfo; reserved: 0),
(unicode : 5; flag : umf_noinfo; reserved: 0),
(unicode : 6; flag : umf_noinfo; reserved: 0),
(unicode : 7; flag : umf_noinfo; reserved: 0),
(unicode : 8; flag : umf_noinfo; reserved: 0),
(unicode : 9; flag : umf_noinfo; reserved: 0),
(unicode : 10; flag : umf_noinfo; reserved: 0),
(unicode : 11; flag : umf_noinfo; reserved: 0),
(unicode : 12; flag : umf_noinfo; reserved: 0),
(unicode : 13; flag : umf_noinfo; reserved: 0),
(unicode : 14; flag : umf_noinfo; reserved: 0),
(unicode : 15; flag : umf_noinfo; reserved: 0),
(unicode : 16; flag : umf_noinfo; reserved: 0),
(unicode : 17; flag : umf_noinfo; reserved: 0),
(unicode : 18; flag : umf_noinfo; reserved: 0),
(unicode : 19; flag : umf_noinfo; reserved: 0),
(unicode : 20; flag : umf_noinfo; reserved: 0),
(unicode : 21; flag : umf_noinfo; reserved: 0),
(unicode : 22; flag : umf_noinfo; reserved: 0),
(unicode : 23; flag : umf_noinfo; reserved: 0),
(unicode : 24; flag : umf_noinfo; reserved: 0),
(unicode : 25; flag : umf_noinfo; reserved: 0),
(unicode : 26; flag : umf_noinfo; reserved: 0),
(unicode : 27; flag : umf_noinfo; reserved: 0),
(unicode : 28; flag : umf_noinfo; reserved: 0),
(unicode : 29; flag : umf_noinfo; reserved: 0),
(unicode : 30; flag : umf_noinfo; reserved: 0),
(unicode : 31; flag : umf_noinfo; reserved: 0),
(unicode : 32; flag : umf_noinfo; reserved: 0),
(unicode : 33; flag : umf_noinfo; reserved: 0),
(unicode : 34; flag : umf_noinfo; reserved: 0),
(unicode : 35; flag : umf_noinfo; reserved: 0),
(unicode : 36; flag : umf_noinfo; reserved: 0),
(unicode : 37; flag : umf_noinfo; reserved: 0),
(unicode : 38; flag : umf_noinfo; reserved: 0),
(unicode : 39; flag : umf_noinfo; reserved: 0),
(unicode : 40; flag : umf_noinfo; reserved: 0),
(unicode : 41; flag : umf_noinfo; reserved: 0),
(unicode : 42; flag : umf_noinfo; reserved: 0),
(unicode : 43; flag : umf_noinfo; reserved: 0),
(unicode : 44; flag : umf_noinfo; reserved: 0),
(unicode : 45; flag : umf_noinfo; reserved: 0),
(unicode : 46; flag : umf_noinfo; reserved: 0),
(unicode : 47; flag : umf_noinfo; reserved: 0),
(unicode : 48; flag : umf_noinfo; reserved: 0),
(unicode : 49; flag : umf_noinfo; reserved: 0),
(unicode : 50; flag : umf_noinfo; reserved: 0),
(unicode : 51; flag : umf_noinfo; reserved: 0),
(unicode : 52; flag : umf_noinfo; reserved: 0),
(unicode : 53; flag : umf_noinfo; reserved: 0),
(unicode : 54; flag : umf_noinfo; reserved: 0),
(unicode : 55; flag : umf_noinfo; reserved: 0),
(unicode : 56; flag : umf_noinfo; reserved: 0),
(unicode : 57; flag : umf_noinfo; reserved: 0),
(unicode : 58; flag : umf_noinfo; reserved: 0),
(unicode : 59; flag : umf_noinfo; reserved: 0),
(unicode : 60; flag : umf_noinfo; reserved: 0),
(unicode : 61; flag : umf_noinfo; reserved: 0),
(unicode : 62; flag : umf_noinfo; reserved: 0),
(unicode : 63; flag : umf_noinfo; reserved: 0),
(unicode : 64; flag : umf_noinfo; reserved: 0),
(unicode : 65; flag : umf_noinfo; reserved: 0),
(unicode : 66; flag : umf_noinfo; reserved: 0),
(unicode : 67; flag : umf_noinfo; reserved: 0),
(unicode : 68; flag : umf_noinfo; reserved: 0),
(unicode : 69; flag : umf_noinfo; reserved: 0),
(unicode : 70; flag : umf_noinfo; reserved: 0),
(unicode : 71; flag : umf_noinfo; reserved: 0),
(unicode : 72; flag : umf_noinfo; reserved: 0),
(unicode : 73; flag : umf_noinfo; reserved: 0),
(unicode : 74; flag : umf_noinfo; reserved: 0),
(unicode : 75; flag : umf_noinfo; reserved: 0),
(unicode : 76; flag : umf_noinfo; reserved: 0),
(unicode : 77; flag : umf_noinfo; reserved: 0),
(unicode : 78; flag : umf_noinfo; reserved: 0),
(unicode : 79; flag : umf_noinfo; reserved: 0),
(unicode : 80; flag : umf_noinfo; reserved: 0),
(unicode : 81; flag : umf_noinfo; reserved: 0),
(unicode : 82; flag : umf_noinfo; reserved: 0),
(unicode : 83; flag : umf_noinfo; reserved: 0),
(unicode : 84; flag : umf_noinfo; reserved: 0),
(unicode : 85; flag : umf_noinfo; reserved: 0),
(unicode : 86; flag : umf_noinfo; reserved: 0),
(unicode : 87; flag : umf_noinfo; reserved: 0),
(unicode : 88; flag : umf_noinfo; reserved: 0),
(unicode : 89; flag : umf_noinfo; reserved: 0),
(unicode : 90; flag : umf_noinfo; reserved: 0),
(unicode : 91; flag : umf_noinfo; reserved: 0),
(unicode : 92; flag : umf_noinfo; reserved: 0),
(unicode : 93; flag : umf_noinfo; reserved: 0),
(unicode : 94; flag : umf_noinfo; reserved: 0),
(unicode : 95; flag : umf_noinfo; reserved: 0),
(unicode : 96; flag : umf_noinfo; reserved: 0),
(unicode : 97; flag : umf_noinfo; reserved: 0),
(unicode : 98; flag : umf_noinfo; reserved: 0),
(unicode : 99; flag : umf_noinfo; reserved: 0),
(unicode : 100; flag : umf_noinfo; reserved: 0),
(unicode : 101; flag : umf_noinfo; reserved: 0),
(unicode : 102; flag : umf_noinfo; reserved: 0),
(unicode : 103; flag : umf_noinfo; reserved: 0),
(unicode : 104; flag : umf_noinfo; reserved: 0),
(unicode : 105; flag : umf_noinfo; reserved: 0),
(unicode : 106; flag : umf_noinfo; reserved: 0),
(unicode : 107; flag : umf_noinfo; reserved: 0),
(unicode : 108; flag : umf_noinfo; reserved: 0),
(unicode : 109; flag : umf_noinfo; reserved: 0),
(unicode : 110; flag : umf_noinfo; reserved: 0),
(unicode : 111; flag : umf_noinfo; reserved: 0),
(unicode : 112; flag : umf_noinfo; reserved: 0),
(unicode : 113; flag : umf_noinfo; reserved: 0),
(unicode : 114; flag : umf_noinfo; reserved: 0),
(unicode : 115; flag : umf_noinfo; reserved: 0),
(unicode : 116; flag : umf_noinfo; reserved: 0),
(unicode : 117; flag : umf_noinfo; reserved: 0),
(unicode : 118; flag : umf_noinfo; reserved: 0),
(unicode : 119; flag : umf_noinfo; reserved: 0),
(unicode : 120; flag : umf_noinfo; reserved: 0),
(unicode : 121; flag : umf_noinfo; reserved: 0),
(unicode : 122; flag : umf_noinfo; reserved: 0),
(unicode : 123; flag : umf_noinfo; reserved: 0),
(unicode : 124; flag : umf_noinfo; reserved: 0),
(unicode : 125; flag : umf_noinfo; reserved: 0),
(unicode : 126; flag : umf_noinfo; reserved: 0),
(unicode : 127; flag : umf_noinfo; reserved: 0),
(unicode : 8364; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 8218; flag : umf_noinfo; reserved: 0),
(unicode : 402; flag : umf_noinfo; reserved: 0),
(unicode : 8222; flag : umf_noinfo; reserved: 0),
(unicode : 8230; flag : umf_noinfo; reserved: 0),
(unicode : 8224; flag : umf_noinfo; reserved: 0),
(unicode : 8225; flag : umf_noinfo; reserved: 0),
(unicode : 710; flag : umf_noinfo; reserved: 0),
(unicode : 8240; flag : umf_noinfo; reserved: 0),
(unicode : 352; flag : umf_noinfo; reserved: 0),
(unicode : 8249; flag : umf_noinfo; reserved: 0),
(unicode : 338; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 381; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 8216; flag : umf_noinfo; reserved: 0),
(unicode : 8217; flag : umf_noinfo; reserved: 0),
(unicode : 8220; flag : umf_noinfo; reserved: 0),
(unicode : 8221; flag : umf_noinfo; reserved: 0),
(unicode : 8226; flag : umf_noinfo; reserved: 0),
(unicode : 8211; flag : umf_noinfo; reserved: 0),
(unicode : 8212; flag : umf_noinfo; reserved: 0),
(unicode : 732; flag : umf_noinfo; reserved: 0),
(unicode : 8482; flag : umf_noinfo; reserved: 0),
(unicode : 353; flag : umf_noinfo; reserved: 0),
(unicode : 8250; flag : umf_noinfo; reserved: 0),
(unicode : 339; flag : umf_noinfo; reserved: 0),
(unicode : 65535; flag : umf_unused; reserved: 0),
(unicode : 382; flag : umf_noinfo; reserved: 0),
(unicode : 376; flag : umf_noinfo; reserved: 0),
(unicode : 160; flag : umf_noinfo; reserved: 0),
(unicode : 161; flag : umf_noinfo; reserved: 0),
(unicode : 162; flag : umf_noinfo; reserved: 0),
(unicode : 163; flag : umf_noinfo; reserved: 0),
(unicode : 164; flag : umf_noinfo; reserved: 0),
(unicode : 165; flag : umf_noinfo; reserved: 0),
(unicode : 166; flag : umf_noinfo; reserved: 0),
(unicode : 167; flag : umf_noinfo; reserved: 0),
(unicode : 168; flag : umf_noinfo; reserved: 0),
(unicode : 169; flag : umf_noinfo; reserved: 0),
(unicode : 170; flag : umf_noinfo; reserved: 0),
(unicode : 171; flag : umf_noinfo; reserved: 0),
(unicode : 172; flag : umf_noinfo; reserved: 0),
(unicode : 173; flag : umf_noinfo; reserved: 0),
(unicode : 174; flag : umf_noinfo; reserved: 0),
(unicode : 175; flag : umf_noinfo; reserved: 0),
(unicode : 176; flag : umf_noinfo; reserved: 0),
(unicode : 177; flag : umf_noinfo; reserved: 0),
(unicode : 178; flag : umf_noinfo; reserved: 0),
(unicode : 179; flag : umf_noinfo; reserved: 0),
(unicode : 180; flag : umf_noinfo; reserved: 0),
(unicode : 181; flag : umf_noinfo; reserved: 0),
(unicode : 182; flag : umf_noinfo; reserved: 0),
(unicode : 183; flag : umf_noinfo; reserved: 0),
(unicode : 184; flag : umf_noinfo; reserved: 0),
(unicode : 185; flag : umf_noinfo; reserved: 0),
(unicode : 186; flag : umf_noinfo; reserved: 0),
(unicode : 187; flag : umf_noinfo; reserved: 0),
(unicode : 188; flag : umf_noinfo; reserved: 0),
(unicode : 189; flag : umf_noinfo; reserved: 0),
(unicode : 190; flag : umf_noinfo; reserved: 0),
(unicode : 191; flag : umf_noinfo; reserved: 0),
(unicode : 192; flag : umf_noinfo; reserved: 0),
(unicode : 193; flag : umf_noinfo; reserved: 0),
(unicode : 194; flag : umf_noinfo; reserved: 0),
(unicode : 195; flag : umf_noinfo; reserved: 0),
(unicode : 196; flag : umf_noinfo; reserved: 0),
(unicode : 197; flag : umf_noinfo; reserved: 0),
(unicode : 198; flag : umf_noinfo; reserved: 0),
(unicode : 199; flag : umf_noinfo; reserved: 0),
(unicode : 200; flag : umf_noinfo; reserved: 0),
(unicode : 201; flag : umf_noinfo; reserved: 0),
(unicode : 202; flag : umf_noinfo; reserved: 0),
(unicode : 203; flag : umf_noinfo; reserved: 0),
(unicode : 204; flag : umf_noinfo; reserved: 0),
(unicode : 205; flag : umf_noinfo; reserved: 0),
(unicode : 206; flag : umf_noinfo; reserved: 0),
(unicode : 207; flag : umf_noinfo; reserved: 0),
(unicode : 208; flag : umf_noinfo; reserved: 0),
(unicode : 209; flag : umf_noinfo; reserved: 0),
(unicode : 210; flag : umf_noinfo; reserved: 0),
(unicode : 211; flag : umf_noinfo; reserved: 0),
(unicode : 212; flag : umf_noinfo; reserved: 0),
(unicode : 213; flag : umf_noinfo; reserved: 0),
(unicode : 214; flag : umf_noinfo; reserved: 0),
(unicode : 215; flag : umf_noinfo; reserved: 0),
(unicode : 216; flag : umf_noinfo; reserved: 0),
(unicode : 217; flag : umf_noinfo; reserved: 0),
(unicode : 218; flag : umf_noinfo; reserved: 0),
(unicode : 219; flag : umf_noinfo; reserved: 0),
(unicode : 220; flag : umf_noinfo; reserved: 0),
(unicode : 221; flag : umf_noinfo; reserved: 0),
(unicode : 222; flag : umf_noinfo; reserved: 0),
(unicode : 223; flag : umf_noinfo; reserved: 0),
(unicode : 224; flag : umf_noinfo; reserved: 0),
(unicode : 225; flag : umf_noinfo; reserved: 0),
(unicode : 226; flag : umf_noinfo; reserved: 0),
(unicode : 227; flag : umf_noinfo; reserved: 0),
(unicode : 228; flag : umf_noinfo; reserved: 0),
(unicode : 229; flag : umf_noinfo; reserved: 0),
(unicode : 230; flag : umf_noinfo; reserved: 0),
(unicode : 231; flag : umf_noinfo; reserved: 0),
(unicode : 232; flag : umf_noinfo; reserved: 0),
(unicode : 233; flag : umf_noinfo; reserved: 0),
(unicode : 234; flag : umf_noinfo; reserved: 0),
(unicode : 235; flag : umf_noinfo; reserved: 0),
(unicode : 236; flag : umf_noinfo; reserved: 0),
(unicode : 237; flag : umf_noinfo; reserved: 0),
(unicode : 238; flag : umf_noinfo; reserved: 0),
(unicode : 239; flag : umf_noinfo; reserved: 0),
(unicode : 240; flag : umf_noinfo; reserved: 0),
(unicode : 241; flag : umf_noinfo; reserved: 0),
(unicode : 242; flag : umf_noinfo; reserved: 0),
(unicode : 243; flag : umf_noinfo; reserved: 0),
(unicode : 244; flag : umf_noinfo; reserved: 0),
(unicode : 245; flag : umf_noinfo; reserved: 0),
(unicode : 246; flag : umf_noinfo; reserved: 0),
(unicode : 247; flag : umf_noinfo; reserved: 0),
(unicode : 248; flag : umf_noinfo; reserved: 0),
(unicode : 249; flag : umf_noinfo; reserved: 0),
(unicode : 250; flag : umf_noinfo; reserved: 0),
(unicode : 251; flag : umf_noinfo; reserved: 0),
(unicode : 252; flag : umf_noinfo; reserved: 0),
(unicode : 253; flag : umf_noinfo; reserved: 0),
(unicode : 254; flag : umf_noinfo; reserved: 0),
(unicode : 255; flag : umf_noinfo; reserved: 0)
);
unicodemap : tunicodemap = (
cpname : 'CP1252';
cp : 1252;
map : @map;
lastchar : 255;
next : nil;
internalmap : true
);
begin
registermapping(@unicodemap)
end.

View File

@ -270,6 +270,7 @@ unit cp437;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : 'cp437'; cpname : 'cp437';
cp : 437;
map : @map[0]; map : @map[0];
lastchar : 255; lastchar : 255;
next : nil; next : nil;

View File

@ -270,6 +270,7 @@ unit cp850;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : 'cp850'; cpname : 'cp850';
cp : 850;
map : @map[0]; map : @map[0];
lastchar : 255; lastchar : 255;
next : nil; next : nil;

View File

@ -270,6 +270,7 @@ unit cp866;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : 'cp866'; cpname : 'cp866';
cp : 866;
map : @map; map : @map;
lastchar : 255; lastchar : 255;
next : nil; next : nil;

View File

@ -270,6 +270,7 @@ unit cp8859_1;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : '8859-1'; cpname : '8859-1';
cp : 28591;
map : @map[0]; map : @map[0];
lastchar : 255; lastchar : 255;
next : nil; next : nil;

View File

@ -270,6 +270,7 @@ unit cp8859_5;
unicodemap : tunicodemap = ( unicodemap : tunicodemap = (
cpname : '8859-5'; cpname : '8859-5';
cp : 28595;
map : @map; map : @map;
lastchar : 255; lastchar : 255;
next : nil; next : nil;

View File

@ -104,6 +104,7 @@ interface
{$endif} {$endif}
CP_UTF8 = 65001; CP_UTF8 = 65001;
CP_UTF16 = 1200; CP_UTF16 = 1200;
CP_NONE = 65535;
type type

View File

@ -62,6 +62,7 @@ interface
function typecheck_cord_to_pointer : tnode; function typecheck_cord_to_pointer : tnode;
function typecheck_chararray_to_string : tnode; function typecheck_chararray_to_string : tnode;
function typecheck_string_to_chararray : tnode; function typecheck_string_to_chararray : tnode;
function typecheck_string_to_string : tnode;
function typecheck_char_to_string : tnode; function typecheck_char_to_string : tnode;
function typecheck_char_to_chararray : tnode; function typecheck_char_to_chararray : tnode;
function typecheck_int_to_real : tnode; function typecheck_int_to_real : tnode;
@ -1030,11 +1031,11 @@ implementation
{ we can't do widechar to ansichar conversions at compile time, since } { we can't do widechar to ansichar conversions at compile time, since }
{ this maps all non-ascii chars to '?' -> loses information } { this maps all non-ascii chars to '?' -> loses information }
if (left.nodetype=ordconstn) and if (left.nodetype=ordconstn) {and
((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or ((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
(torddef(left.resultdef).ordtype=uchar) or (torddef(left.resultdef).ordtype=uchar) or
{ widechar >=128 is destroyed } { widechar >=128 is destroyed }
(tordconstnode(left).value.uvalue<128)) then (tordconstnode(left).value.uvalue<128))} then
begin begin
if (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) then if (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) then
begin begin
@ -1112,6 +1113,36 @@ implementation
end; end;
end; end;
function ttypeconvnode.typecheck_string_to_string : tnode;
begin
result:=nil;
if (left.nodetype=stringconstn) and
(tstringdef(resultdef).stringtype in [st_ansistring,st_shortstring]) and
(tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) then
begin
tstringconstnode(left).changestringtype(resultdef);
Result:=left;
left:=nil;
end
else if (tstringdef(resultdef).stringtype=st_ansistring) and
(tstringdef(left.resultdef).stringtype=st_ansistring) and
(tstringdef(resultdef).encoding<>tstringdef(left.resultdef).encoding) then
begin
result:=ccallnode.createinternres(
'fpc_ansistr_to_ansistr',
ccallparanode.create(
cordconstnode.create(
tstringdef(resultdef).encoding,
u16inttype,
true
),
ccallparanode.create(left,nil)
),
resultdef
);
left:=nil;
end;
end;
function ttypeconvnode.typecheck_char_to_chararray : tnode; function ttypeconvnode.typecheck_char_to_chararray : tnode;
begin begin
@ -1133,11 +1164,12 @@ implementation
hp : tordconstnode; hp : tordconstnode;
begin begin
result:=nil; result:=nil;
if (left.nodetype=ordconstn) and if (left.nodetype=ordconstn)
{and
((torddef(resultdef).ordtype<>uchar) or ((torddef(resultdef).ordtype<>uchar) or
(torddef(left.resultdef).ordtype<>uwidechar) or (torddef(left.resultdef).ordtype<>uwidechar) or
{ >= 128 is replaced by '?' currently -> loses information } { >= 128 is replaced by '?' currently -> loses information }
(tordconstnode(left).value.uvalue<128)) then (tordconstnode(left).value.uvalue<128))} then
begin begin
if (torddef(resultdef).ordtype=uchar) and if (torddef(resultdef).ordtype=uchar) and
(torddef(left.resultdef).ordtype=uwidechar) then (torddef(left.resultdef).ordtype=uwidechar) then
@ -1677,7 +1709,7 @@ implementation
{none} nil, {none} nil,
{equal} nil, {equal} nil,
{not_possible} nil, {not_possible} nil,
{ string_2_string } nil, { string_2_string } @ttypeconvnode.typecheck_string_to_string,
{ char_2_string } @ttypeconvnode.typecheck_char_to_string, { char_2_string } @ttypeconvnode.typecheck_char_to_string,
{ char_2_chararray } @ttypeconvnode.typecheck_char_to_chararray, { char_2_chararray } @ttypeconvnode.typecheck_char_to_chararray,
{ pchar_2_string } @ttypeconvnode.typecheck_pchar_to_string, { pchar_2_string } @ttypeconvnode.typecheck_pchar_to_string,
@ -2205,9 +2237,9 @@ implementation
( (
((not is_widechararray(left.resultdef) and ((not is_widechararray(left.resultdef) and
not is_wide_or_unicode_string(left.resultdef)) or not is_wide_or_unicode_string(left.resultdef)) or
(tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) {or
{ non-ascii chars would be replaced with '?' -> loses info } { non-ascii chars would be replaced with '?' -> loses info }
not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str))) not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str))})
) then ) then
begin begin
tstringconstnode(left).changestringtype(resultdef); tstringconstnode(left).changestringtype(resultdef);

View File

@ -982,6 +982,9 @@ implementation
var var
pw : pcompilerwidestring; pw : pcompilerwidestring;
pc : pchar; pc : pchar;
cp1 : tstringencoding;
cp2 : tstringencoding;
l,l2 : longint;
begin begin
if def.typ<>stringdef then if def.typ<>stringdef then
internalerror(200510011); internalerror(200510011);
@ -999,11 +1002,40 @@ implementation
if (cst_type in [cst_widestring,cst_unicodestring]) and if (cst_type in [cst_widestring,cst_unicodestring]) and
not(tstringdef(def).stringtype in [st_widestring,st_unicodestring]) then not(tstringdef(def).stringtype in [st_widestring,st_unicodestring]) then
begin begin
pw:=pcompilerwidestring(value_str); if (tstringdef(def).encoding=CP_UTF8) then
getmem(pc,getlengthwidestring(pw)+1); begin
unicode2ascii(pw,pc); pw:=pcompilerwidestring(value_str);
donewidestring(pw); l:=(getlengthwidestring(pw)*4)+1;
value_str:=pc; getmem(pc,l);
l2:=UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),getlengthwidestring(pw));
if (l<>l2) then
begin
ReAllocMem(pc,l2);
len:=l2;
end;
donewidestring(pw);
value_str:=pc;
end
else
begin
pw:=pcompilerwidestring(value_str);
getmem(pc,getlengthwidestring(pw)+1);
unicode2ascii(pw,pc,tstringdef(def).encoding);
donewidestring(pw);
value_str:=pc;
end;
end
else
if (tstringdef(def).stringtype = st_ansistring) and
not(cst_type in [cst_widestring,cst_unicodestring]) then
begin
cp1:=tstringdef(def).encoding;
if (cst_type = cst_ansistring) then
cp2:=tstringdef(resultdef).encoding
else if (cst_type in [cst_shortstring,cst_conststring,cst_longstring]) then
cp2:=codepagebyname(current_settings.sourcecodepage);
if cpavailable(cp1) and cpavailable(cp2) then
changecodepage(value_str,len,cp1,value_str,cp2);
end; end;
cst_type:=st2cst[tstringdef(def).stringtype]; cst_type:=st2cst[tstringdef(def).stringtype];
resultdef:=def; resultdef:=def;

View File

@ -903,9 +903,15 @@ begin
end; end;
'm' : 'm' :
begin begin
unicodemapping:=loadunicodemapping(More,More+'.txt'); s:=ExtractFileDir(more);
if assigned(unicodemapping) then if TryStrToInt(ExtractFileName(more),j) then
registermapping(unicodemapping) begin
unicodemapping:=loadunicodemapping(More,More+'.txt',j);
if assigned(unicodemapping) then
registermapping(unicodemapping)
else
IllegalPara(opt);
end
else else
IllegalPara(opt); IllegalPara(opt);
end; end;

View File

@ -176,6 +176,24 @@ implementation
p:=ctypeconvnode.create(p,cwidechartype); p:=ctypeconvnode.create(p,cwidechartype);
do_typecheckpass(p); do_typecheckpass(p);
end; end;
end
else
begin
if is_char(casedef) and is_widechar(p.resultdef) then
begin
if (p.nodetype=ordconstn) then
begin
p:=ctypeconvnode.create(p,cchartype);
do_typecheckpass(p);
end
else if (p.nodetype=rangen) then
begin
trangenode(p).left:=ctypeconvnode.create(trangenode(p).left,cchartype);
trangenode(p).right:=ctypeconvnode.create(trangenode(p).right,cchartype);
do_typecheckpass(trangenode(p).left);
do_typecheckpass(trangenode(p).right);
end;
end;
end; end;
hl1:=0; hl1:=0;
hl2:=0; hl2:=0;

View File

@ -231,6 +231,8 @@ implementation
end; end;
uchar : uchar :
begin begin
if is_constwidecharnode(n) then
inserttypeconv(n,cchartype);
if is_constcharnode(n) or if is_constcharnode(n) or
((m_delphi in current_settings.modeswitches) and ((m_delphi in current_settings.modeswitches) and
is_constwidecharnode(n) and is_constwidecharnode(n) and

View File

@ -4171,6 +4171,19 @@ In case not, the value returned can be arbitrary.
break; break;
end; end;
until false; until false;
//------------------
{ convert existing string to an utf-8 string }
if (not iswidestring) and
(current_settings.sourcecodepage<>default_settings.sourcecodepage) then
begin
if len>0 then
ascii2unicode(@cstringpattern[1],len,patternw)
else
ascii2unicode(nil,len,patternw);
iswidestring:=true;
len:=0;
end;
//-------------------
{ strings with length 1 become const chars } { strings with length 1 become const chars }
if iswidestring then if iswidestring then
begin begin

View File

@ -53,15 +53,21 @@ unit widestr;
function asciichar2unicode(c : char) : tcompilerwidechar; function asciichar2unicode(c : char) : tcompilerwidechar;
function unicode2asciichar(c : tcompilerwidechar) : char; function unicode2asciichar(c : tcompilerwidechar) : char;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring); procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
procedure unicode2ascii(r : pcompilerwidestring;p : pchar); procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
function hasnonasciichars(const p: pcompilerwidestring): boolean; function hasnonasciichars(const p: pcompilerwidestring): boolean;
function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar; function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
function cpavailable(const s : string) : boolean; function cpavailable(const s : string) : boolean;
function cpavailable(cp : word) : boolean;
procedure changecodepage(
s : pchar; l : SizeInt; scp : tstringencoding;
d : pchar; dcp : tstringencoding
);
function codepagebyname(const s : string) : tstringencoding;
implementation implementation
uses uses
cp8859_1,cp850,cp437, cp8859_1,cp850,cp437,cp1252,
{ cyrillic code pages } { cyrillic code pages }
cp1251,cp866,cp8859_5, cp1251,cp866,cp8859_5,
globals,cutils; globals,cutils;
@ -173,11 +179,14 @@ unit widestr;
end; end;
function unicode2asciichar(c : tcompilerwidechar) : char; function unicode2asciichar(c : tcompilerwidechar) : char;
begin {begin
if word(c)<128 then if word(c)<128 then
unicode2asciichar:=char(word(c)) unicode2asciichar:=char(word(c))
else else
unicode2asciichar:='?'; unicode2asciichar:='?';
end;}
begin
Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
end; end;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring); procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
@ -211,22 +220,28 @@ unit widestr;
end; end;
end; end;
procedure unicode2ascii(r : pcompilerwidestring;p:pchar); procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
(*
var var
m : punicodemap; m : punicodemap;
i : longint; source : tcompilerwidecharptr;
dest : pchar;
i : longint;
begin begin
m:=getmap(current_settings.sourcecodepage); if (cp = 0) or (cp=CP_NONE) then
{ should be a very good estimation :) } m:=getmap(current_settings.sourcecodepage)
setlengthwidestring(r,length(s)); else
m:=getmap(cp);
// !!!! MBCS // !!!! MBCS
for i:=1 to length(s) do source:=tcompilerwidecharptr(r^.data);
begin dest:=p;
end; for i:=1 to r^.len do
begin
dest^ := getascii(source^,m)[1];
inc(dest);
inc(source);
end;
end; end;
*) (*
var var
source : tcompilerwidecharptr; source : tcompilerwidecharptr;
dest : pchar; dest : pchar;
@ -247,7 +262,7 @@ unit widestr;
inc(source); inc(source);
end; end;
end; end;
*)
function hasnonasciichars(const p: pcompilerwidestring): boolean; function hasnonasciichars(const p: pcompilerwidestring): boolean;
var var
@ -271,4 +286,41 @@ unit widestr;
cpavailable:=mappingavailable(lower(s)); cpavailable:=mappingavailable(lower(s));
end; end;
function cpavailable(cp : word) : boolean;
begin
cpavailable:=mappingavailable(cp);
end;
procedure changecodepage(
s : pchar; l : SizeInt; scp : tstringencoding;
d : pchar; dcp : tstringencoding
);
var
ms, md : punicodemap;
source : pchar;
dest : pchar;
i : longint;
begin
ms:=getmap(scp);
md:=getmap(dcp);
source:=s;
dest:=d;
for i:=1 to l do
begin
dest^ := getascii(getunicode(source^,ms),md)[1];
inc(dest);
inc(source);
end;
end;
function codepagebyname(const s : string) : tstringencoding;
var
p : punicodemap;
begin
Result:=0;
p:=getmap(s);
if (p<>nil) then
Result:=p^.cp;
end;
end. end.

View File

@ -39,6 +39,7 @@ unit charset;
punicodemap = ^tunicodemap; punicodemap = ^tunicodemap;
tunicodemap = record tunicodemap = record
cpname : string[20]; cpname : string[20];
cp : word;
map : punicodecharmapping; map : punicodecharmapping;
lastchar : longint; lastchar : longint;
next : punicodemap; next : punicodemap;
@ -48,10 +49,12 @@ unit charset;
tcp2unicode = class(tcsconvert) tcp2unicode = class(tcsconvert)
end; end;
function loadunicodemapping(const cpname,f : string) : punicodemap; function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
procedure registermapping(p : punicodemap); procedure registermapping(p : punicodemap);
function getmap(const s : string) : punicodemap; function getmap(const s : string) : punicodemap;
function getmap(cp : word) : punicodemap;
function mappingavailable(const s : string) : boolean; function mappingavailable(const s : string) : boolean;
function mappingavailable(cp :word) : boolean;
function getunicode(c : char;p : punicodemap) : tunicodechar; function getunicode(c : char;p : punicodemap) : tunicodechar;
function getascii(c : tunicodechar;p : punicodemap) : string; function getascii(c : tunicodechar;p : punicodemap) : string;
@ -60,7 +63,7 @@ unit charset;
var var
mappings : punicodemap; mappings : punicodemap;
function loadunicodemapping(const cpname,f : string) : punicodemap; function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
var var
data : punicodecharmapping; data : punicodecharmapping;
@ -155,6 +158,7 @@ unit charset;
new(p); new(p);
p^.lastchar:=lastchar; p^.lastchar:=lastchar;
p^.cpname:=cpname; p^.cpname:=cpname;
p^.cp:=cp;
p^.internalmap:=false; p^.internalmap:=false;
p^.next:=nil; p^.next:=nil;
p^.map:=data; p^.map:=data;
@ -196,6 +200,36 @@ unit charset;
hp:=hp^.next; hp:=hp^.next;
end; end;
getmap:=nil; getmap:=nil;
end;////////
function getmap(cp : word) : punicodemap;
var
hp : punicodemap;
const
mapcache : word = 0;
mapcachep : punicodemap = nil;
begin
if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
begin
getmap:=mapcachep;
exit;
end;
hp:=mappings;
while assigned(hp) do
begin
if hp^.cp=cp then
begin
getmap:=hp;
mapcache:=cp;
mapcachep:=hp;
exit;
end;
hp:=hp^.next;
end;
getmap:=nil;
end; end;
function mappingavailable(const s : string) : boolean; function mappingavailable(const s : string) : boolean;
@ -204,6 +238,12 @@ unit charset;
mappingavailable:=getmap(s)<>nil; mappingavailable:=getmap(s)<>nil;
end; end;
function mappingavailable(cp : word) : boolean;
begin
mappingavailable:=getmap(cp)<>nil;
end;
function getunicode(c : char;p : punicodemap) : tunicodechar; function getunicode(c : char;p : punicodemap) : tunicodechar;
begin begin
@ -219,8 +259,8 @@ unit charset;
i : longint; i : longint;
begin begin
{ at least map to space } { at least map to '?' }
getascii:=#32; getascii:=#63;
for i:=0 to p^.lastchar do for i:=0 to p^.lastchar do
if p^.map[i].unicode=c then if p^.map[i].unicode=c then
begin begin

View File

@ -23,9 +23,8 @@
a punicodechar that points to : a punicodechar that points to :
@-8 : SizeInt for reference count; @-8 : SizeInt for reference count;
@-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply @-4 : SizeInt for size; size=number of chars. Multiply with
with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
Windows COM BSTR.
@ : String + Terminating #0; @ : String + Terminating #0;
Punicodechar(Unicodestring) is a valid typecast. Punicodechar(Unicodestring) is a valid typecast.
So WS[i] is converted to the address @WS+i-1. So WS[i] is converted to the address @WS+i-1.
@ -811,6 +810,8 @@ begin
{$ifndef FPC_HAS_CPSTRING} {$ifndef FPC_HAS_CPSTRING}
cp:=$ffff; cp:=$ffff;
{$endif FPC_HAS_CPSTRING} {$endif FPC_HAS_CPSTRING}
if cp=$ffff then
cp:=DefaultSystemCodePage;
widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1); widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
end; end;
@ -1552,10 +1553,10 @@ begin
exit; exit;
if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
begin begin
L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar); L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
SNew:=NewUnicodeString (L); SNew:=NewUnicodeString (L);
Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar)); Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar); PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe } fpc_unicodestr_decr_ref (Pointer(S)); { Thread safe }
pointer(S):=SNew; pointer(S):=SNew;
pointer(result):=SNew; pointer(result):=SNew;

View File

@ -20,8 +20,9 @@ program creumap;
procedure doerror; procedure doerror;
begin begin
writeln('Usage: creumap <cpname>'); writeln('Usage: creumap <cpname> <cpnumber>');
writeln('A mapping file called <cpname>.txt must be present'); writeln('cpname : A mapping file called <cpname>.txt must be present');
writeln('cpnumber : the code page number');
halt(1); halt(1);
end; end;
@ -29,11 +30,16 @@ program creumap;
p : punicodemap; p : punicodemap;
i : longint; i : longint;
t : text; t : text;
e : word;
begin begin
if paramcount<>1 then if paramcount<>2 then
doerror; doerror;
p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt'); Val(paramstr(2),i,e);
if e<>0 then
doerror;
p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt',i);
if p=nil then if p=nil then
doerror; doerror;
assign(t,paramstr(1)+'.pp'); assign(t,paramstr(1)+'.pp');
@ -69,6 +75,7 @@ begin
writeln(t); writeln(t);
writeln(t,' unicodemap : tunicodemap = ('); writeln(t,' unicodemap : tunicodemap = (');
writeln(t,' cpname : ''',p^.cpname,''';'); writeln(t,' cpname : ''',p^.cpname,''';');
writeln(t,' cp : ',p^.cp,';');
writeln(t,' map : @map;'); writeln(t,' map : @map;');
writeln(t,' lastchar : ',p^.lastchar,';'); writeln(t,' lastchar : ',p^.lastchar,';');
writeln(t,' next : nil;'); writeln(t,' next : nil;');