diff --git a/demo/wasienv/dom/WasiDomTest1.lpr b/demo/wasienv/dom/WasiDomTest1.lpr index 3eca457..174f72a 100644 --- a/demo/wasienv/dom/WasiDomTest1.lpr +++ b/demo/wasienv/dom/WasiDomTest1.lpr @@ -116,6 +116,17 @@ type procedure TestFuncResultUnicodeString; procedure TestFuncResultUTF8String; procedure TestFuncResultBird; + // todo procedure TestFuncResultVariant; + + // function args + // todo procedure TestFuncArgBoolean; + // todo procedure TestFuncArgInteger; + // todo procedure TestFuncArgDouble; + // todo procedure TestFuncArgUnicodeString; + // todo procedure TestFuncArgUTF8String; + // todo procedure TestFuncArgBird; + // todo procedure TestFuncArgMethod; + // todo procedure TestFuncArgVariant; end; @@ -150,7 +161,7 @@ begin TestFuncResultBoolean; TestFuncResultInteger; TestFuncResultDouble; - //TestFuncResultUnicodeString; + TestFuncResultUnicodeString; TestFuncResultUTF8String; TestFuncResultBird; diff --git a/demo/wasienv/dom/job_js.pas b/demo/wasienv/dom/job_js.pas index ef25452..b1511a9 100644 --- a/demo/wasienv/dom/job_js.pas +++ b/demo/wasienv/dom/job_js.pas @@ -994,6 +994,8 @@ function __job_get_global( function JOBCallback(const Func: TJOBCallback; Data, Code: Pointer; Args: PByte): PByte; function VarRecToJSValue(const V: TVarRec): TJOB_JSValue; +function UTF8CodepointToUnicode(p: PChar; out CodepointLen: integer): Cardinal; +function UTF8AsUTF16Len(p: PChar; l: NativeInt): NativeInt; implementation @@ -1082,7 +1084,7 @@ begin Result:=TJOB_Double.Create(V.VExtended^); {$endif} vtString: - Result:=TJOB_String.Create(UnicodeString(V.VString^)); + Result:=TJOB_String.Create(UTF8Decode(V.VString^)); vtPointer: begin p:=V.VPointer; @@ -1097,7 +1099,7 @@ begin begin CurLen:=strlen(V.VPChar); SetString(S,V.VPChar,CurLen); - Result:=TJOB_String.Create(UnicodeString(S)); + Result:=TJOB_String.Create(UTF8Decode(S)); end; vtObject: begin @@ -1118,7 +1120,7 @@ begin vtPWideChar: raise EJSArgParse.Create('VarRecToJSValue vtPWideChar not supported'); vtAnsiString: - Result:=TJOB_String.Create(UnicodeString(PAnsiString(V.VAnsiString)^)); + Result:=TJOB_String.Create(UTF8Decode(PAnsiString(V.VAnsiString)^)); vtCurrency: Result:=TJOB_Double.Create(V.VCurrency^); vtVariant: @@ -1141,6 +1143,120 @@ begin end; end; +function UTF8CodepointToUnicode(p: PChar; out CodepointLen: integer): Cardinal; +{ if p=nil then CodepointLen=0 otherwise CodepointLen>0 + If there is an encoding error the Result is 0 and CodepointLen=1. + It does not check if the codepoint is defined in the Unicode tables. +} +begin + if p<>nil then begin + if ord(p^)<%11000000 then begin + // regular single byte character (#0 is a normal char, this is pascal ;) + Result:=ord(p^); + CodepointLen:=1; + end + else if ((ord(p^) and %11100000) = %11000000) then begin + // starts with %110 => could be double byte character + if (ord(p[1]) and %11000000) = %10000000 then begin + CodepointLen:=2; + Result:=((ord(p^) and %00011111) shl 6) or (ord(p[1]) and %00111111); + if Result<(1 shl 7) then begin + // wrong encoded, could be an XSS attack + Result:=0; + end; + end else begin + Result:=ord(p^); + CodepointLen:=1; + end; + end + else if ((ord(p^) and %11110000) = %11100000) then begin + // starts with %1110 => could be triple byte character + if ((ord(p[1]) and %11000000) = %10000000) + and ((ord(p[2]) and %11000000) = %10000000) then begin + CodepointLen:=3; + Result:=((ord(p^) and %00011111) shl 12) + or ((ord(p[1]) and %00111111) shl 6) + or (ord(p[2]) and %00111111); + if Result<(1 shl 11) then begin + // wrong encoded, could be an XSS attack + Result:=0; + end; + end else begin + Result:=ord(p^); + CodepointLen:=1; + end; + end + else if ((ord(p^) and %11111000) = %11110000) then begin + // starts with %11110 => could be 4 byte character + if ((ord(p[1]) and %11000000) = %10000000) + and ((ord(p[2]) and %11000000) = %10000000) + and ((ord(p[3]) and %11000000) = %10000000) then begin + CodepointLen:=4; + Result:=((ord(p^) and %00001111) shl 18) + or ((ord(p[1]) and %00111111) shl 12) + or ((ord(p[2]) and %00111111) shl 6) + or (ord(p[3]) and %00111111); + if Result<(1 shl 16) then begin + // wrong encoded, could be an XSS attack + Result:=0; + end else if Result>$10FFFF then begin + // out of range + Result:=0; + end; + end else begin + Result:=ord(p^); + CodepointLen:=1; + end; + end + else begin + // invalid character + Result:=ord(p^); + CodepointLen:=1; + end; + end else begin + Result:=0; + CodepointLen:=0; + end; +end; + +function UTF8AsUTF16Len(p: PChar; l: NativeInt): NativeInt; +// l<0 means count til #0 +var + MaxP: PChar; + c: Char; + CodepointLen: integer; + CodePoint: Cardinal; +begin + Result:=0; + writeln('BBB1 UTF8AsUTF16Len ',p<>nil,' l=',l); + if (p=nil) or (l=0) then + exit + else if l>0 then + MaxP:=p+l; + repeat + c:=p^; + if (c=#0) and (l<0) then + // end at #0 + break + else if c<#192 then + begin + inc(Result); + inc(p); + end + else begin + CodePoint:=UTF8CodepointToUnicode(p,CodepointLen); + inc(p,CodepointLen); + case CodePoint of + 0..$D7FF: inc(Result); + $D800..$DFFF: raise EJSArgParse.Create('invalid UTF8'); + $E000..$10000: inc(Result); + else + inc(Result,2); + end; + end; + until (l>0) and (p>=MaxP); +end; + function JOBCallTJSPromiseResolver(const aMethod: TMethod; var H: TJOBCallbackHelper): PByte; var aValue: TJOB_JSValue; @@ -2146,7 +2262,7 @@ begin vtChar: CurName:=UnicodeString(Pairs[i].VChar); vtString: - CurName:=UnicodeString(Pairs[i].VString^); + CurName:=UTF8Decode(Pairs[i].VString^); vtPChar: begin CurLen:=strlen(Pairs[i].VPChar); @@ -2155,7 +2271,7 @@ begin vtWideChar: CurName:=Pairs[i].VWideChar; vtAnsiString: - CurName:=UnicodeString(PAnsiString(Pairs[i].VAnsiString)^); + CurName:=UTF8Decode(PAnsiString(Pairs[i].VAnsiString)^); vtUnicodeString: CurName:=PUnicodeString(Pairs[i].VUnicodeString)^; else @@ -2353,6 +2469,7 @@ function TJSObject.CreateInvokeJSArgs(const Args: array of const): PByte; var p: PByte; + Len: NativeInt; function SizeOfTJOB_JSValue(JSValue: TJOB_JSValue): integer; var @@ -2395,8 +2512,21 @@ var end; end; + procedure Grow(Need: NativeInt); + begin + inc(Need,p-Result); + if Need<=Len then exit; + Len:=Len*2; + if Len255 then raise EJSInvoke.Create('Invoke js: too many args'); - Len:=1; - for i:=0 to high(Args) do - begin - {$IFDEF VerboseInvokeJSArgs} - writeln('TJSObject.CreateInvokeJSArgs ',i,' VType=',Args[i].VType); - {$ENDIF} - case Args[i].VType of - vtInteger : inc(Len,5); - vtBoolean : inc(Len); - vtChar, - vtWideChar : inc(Len,3); - {$ifndef FPUNONE} - vtExtended: - begin - d:=double(Args[i].VExtended^); - if d=0 then ; - inc(Len,9); - end; - {$endif} - vtString : inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - vtPointer: - begin - p:=Args[i].VPointer; - if p=JOB_Undefined then - inc(Len) - else - inc(Len,1+SizeOf(PByte)); - end; - vtPChar: - begin - // check length - strlen(Args[i].VPChar); - inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - end; - vtObject: - begin - Obj:=Args[i].VObject; - if Obj=nil then - inc(Len,1) - else if Obj is TJSObject then - inc(Len,1+sizeof(TJOBObjectID)) - else if Obj is TJOB_JSValue then - begin - JSValue:=TJOB_JSValue(Obj); - inc(Len,SizeOfTJOB_JSValue(JSValue)); - end else - RaiseNotSupported('object'); - end; - vtClass : RaiseNotSupported('class'); - vtPWideChar: - begin - // check length - strlen(Args[i].VPWideChar); - inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - end; - vtAnsiString: - inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - vtCurrency : RaiseNotSupported('currency'); - {$ifdef FPC_HAS_FEATURE_VARIANTS} - vtVariant : RaiseNotSupported('variant'); - {$endif FPC_HAS_FEATURE_VARIANTS} - vtInterface: - begin - p:=Args[i].VInterface; - if p=nil then - inc(Len,1) - else if IInterface(p) is IJSObject then - inc(Len,1+sizeof(TJOBObjectID)) - else - RaiseNotSupported('interface'); - end; - vtWideString: - inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - vtInt64: - begin - i64:=Args[i].VInt64^; - if (i64MaxSafeIntDouble) then - RaiseRange; - if (i64>=low(longint)) and (i64<=high(longint)) then - inc(Len,5) - else - inc(Len,9); - end; - vtUnicodeString: - inc(Len,1+SizeOf(NativeInt)+SizeOf(PByte)); - vtQWord: - begin - qw:=Args[i].VQWord^; - if (qw>MaxSafeIntDouble) then - RaiseRange; - if (qw<=high(longint)) then - inc(Len,5) - else - inc(Len,9); - end; - else - RaiseNotSupported(IntToStr(Args[i].VType)); - end; - end; - + Len:=1+length(Args); Result:=GetMem(Len); - p:=Result; - p^:=length(Args); - inc(p); - for i:=0 to high(Args) do - begin - case Args[i].VType of - vtInteger: - AddLongInt(Args[i].VInteger); - vtBoolean: - AddBoolean(Args[i].VBoolean); - {$ifndef FPUNONE} - vtExtended: - AddDouble(double(Args[i].VExtended^)); - {$endif} - vtChar: - AddChar(ord(Args[i].VChar)); - vtWideChar: - AddChar(ord(Args[i].VWideChar)); - vtString: - begin - // shortstring - h:=PByte(Args[i].VString); - AddUTF8String(h+1,h^); - end; - vtPointer: - begin - h:=Args[i].VPointer; - if h=nil then + ok:=false; + try + p:=Result; + + p^:=length(Args); + inc(p); + for i:=0 to high(Args) do + begin + case Args[i].VType of + vtInteger: + AddLongInt(Args[i].VInteger); + vtBoolean: + AddBoolean(Args[i].VBoolean); + vtExtended: + AddDouble(double(Args[i].VExtended^)); + vtChar: + AddChar(ord(Args[i].VChar)); + vtWideChar: + AddChar(ord(Args[i].VWideChar)); + vtString: begin - p^:=JOBArgNil; - inc(p); - end else if h=JOB_Undefined then - begin - p^:=JOBArgUndefined; - inc(p); - end - else begin - p^:=JOBArgPointer; - inc(p); - PPointer(p)^:=h; - inc(p,sizeof(Pointer)); + // shortstring + h:=PByte(Args[i].VString); + AddUTF8String(h+1,h^); end; - end; - vtPChar: - begin - h:=PByte(Args[i].VPChar); - AddUTF8String(h,strlen(PChar(h))); - end; - vtObject: - begin - Obj:=Args[i].VObject; - if Obj=nil then + vtPointer: begin - p^:=JOBArgNil; - inc(p); - end else if Obj is TJSObject then - AddObjectID(TJSObject(Obj).JOBObjectID) - else if Obj is TJOB_JSValue then + h:=Args[i].VPointer; + if h=nil then + begin + Grow(1); + p^:=JOBArgNil; + inc(p); + end else if h=JOB_Undefined then + begin + Grow(1); + p^:=JOBArgUndefined; + inc(p); + end + else begin + Grow(1+SizeOf(Pointer)); + p^:=JOBArgPointer; + inc(p); + PPointer(p)^:=h; + inc(p,sizeof(Pointer)); + end; + end; + vtPChar: begin - JSValue:=TJOB_JSValue(Obj); - Add_TJOB_JSValue(JSValue); - end else - RaiseNotSupported(Obj.ClassName); - end; - vtClass: ; - vtPWideChar: - begin - h:=PByte(Args[i].VPWideChar); - AddUnicodeString(h,strlen(PWideChar(h))); - end; - vtAnsiString: - begin - h:=Args[i].VAnsiString; - s:=AnsiString(h); - AddUTF8String(h,length(s)); - end; - vtCurrency : ; - {$ifdef FPC_HAS_FEATURE_VARIANTS} - vtVariant : ; - {$endif FPC_HAS_FEATURE_VARIANTS} - vtInterface: - begin - h:=Args[i].VInterface; - AddIJSObject(IJSObject(h)); - end; - vtWideString: - begin - h:=Args[i].VWideString; - ws:=WideString(h); - AddUnicodeString(h,length(ws)); - end; - vtInt64: - begin - i64:=Args[i].VInt64^; - if (i64>=low(longint)) and (i64<=high(longint)) then - AddLongInt(i64) - else - AddDouble(i64); - end; - vtUnicodeString: - begin - h:=Args[i].VUnicodeString; - us:=UnicodeString(h); - AddUnicodeString(h,length(us)); - end; - vtQWord: - begin - qw:=Args[i].VQWord^; - if (qw<=high(longint)) then - AddLongInt(qw) - else - AddDouble(qw); + h:=PByte(Args[i].VPChar); + AddUTF8String(h,strlen(PChar(h))); + end; + vtObject: + begin + Obj:=Args[i].VObject; + if Obj=nil then + begin + Grow(1); + p^:=JOBArgNil; + inc(p); + end else if Obj is TJSObject then + AddObjectID(TJSObject(Obj).JOBObjectID) + else if Obj is TJOB_JSValue then + begin + JSValue:=TJOB_JSValue(Obj); + Add_TJOB_JSValue(JSValue); + end else + RaiseNotSupported(Obj.ClassName); + end; + vtClass: ; + vtPWideChar: + begin + h:=PByte(Args[i].VPWideChar); + AddUnicodeString(h,strlen(PWideChar(h))); + end; + vtAnsiString: + begin + h:=Args[i].VAnsiString; + s:=AnsiString(h); + AddUTF8String(h,length(s)); + end; + vtCurrency : RaiseNotSupported('currency'); + vtVariant : RaiseNotSupported('variant'); + vtInterface: + begin + h:=Args[i].VInterface; + AddIJSObject(IJSObject(h)); + end; + vtWideString: + begin + h:=Args[i].VWideString; + ws:=WideString(h); + AddUnicodeString(h,length(ws)); + end; + vtInt64: + begin + i64:=Args[i].VInt64^; + if (i64>=low(longint)) and (i64<=high(longint)) then + AddLongInt(i64) + else + AddDouble(i64); + end; + vtUnicodeString: + begin + h:=Args[i].VUnicodeString; + us:=UnicodeString(h); + AddUnicodeString(h,length(us)); + end; + vtQWord: + begin + qw:=Args[i].VQWord^; + if (qw<=high(longint)) then + AddLongInt(qw) + else + AddDouble(qw); + end; + else + RaiseNotSupported(IntToStr(Args[i].VType)); end; end; + Len:=p-Result; + ReAllocMem(Result,Len); + ok:=true; + finally + if not ok then + FreeMemAndNil(Result); end; {$IFDEF VerboseInvokeJSArgs} @@ -2945,7 +3028,7 @@ end; function TJSObject.InvokeJSUtf8StringResult(const aName: string; const args: array of const; Invoke: TJOBInvokeType): String; begin - Result:=String(InvokeJSUnicodeStringResult(aName,Args,Invoke)); + Result:=UTF8Encode(InvokeJSUnicodeStringResult(aName,Args,Invoke)); end; function TJSObject.InvokeJSLongIntResult(const aName: string; diff --git a/packages/job/job_browser.pp b/packages/job/job_browser.pp index aac00b9..2e9cf59 100644 --- a/packages/job/job_browser.pp +++ b/packages/job/job_browser.pp @@ -513,15 +513,14 @@ var end; end; - function ReadUtf8String: String; + function ReadString: String; var - Len, Ptr: TWasmNativeInt; - aBytes: TJSUint8Array; + Len: TWasmNativeInt; + aWords: TJSUint16Array; begin Len:=ReadWasmNativeInt; - Ptr:=ReadWasmNativeInt; - aBytes:=TJSUint8Array.New(View.buffer, Ptr,Len); - Result:=TypedArrayToString(aBytes); + aWords:=TJSUint16Array.New(View.buffer, p,Len); + Result:=TypedArrayToString(aWords); end; function ReadUnicodeString: String; @@ -610,8 +609,8 @@ var Result:=chr(View.getUint16(p,env.IsLittleEndian)); inc(p,2); end; - JOBArgUTF8String: - Result:=ReadUtf8String; + JOBArgString: + Result:=ReadString; JOBArgUnicodeString: Result:=ReadUnicodeString; JOBArgNil: diff --git a/packages/job/job_shared.pp b/packages/job/job_shared.pp index 2af146b..90d730e 100644 --- a/packages/job/job_shared.pp +++ b/packages/job/job_shared.pp @@ -70,7 +70,7 @@ const JOBArgTrue = 3; JOBArgFalse = 4; JOBArgChar = 5; // followed by a word - JOBArgUTF8String = 6; // followed by length and pointer + JOBArgString = 6; // followed by length and UTF-16 data JOBArgUnicodeString = 7; // followed by length and pointer JOBArgNil = 8; JOBArgPointer = 9;