From cc15c3c2de57d0039fbdcad6632349c26e6c78c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Van=20Canneyt?= Date: Tue, 10 Sep 2024 16:51:55 +0200 Subject: [PATCH] * Regexp for wasm (using browser API) --- packages/vcl-compat/fpmake.pp | 11 +- .../src/system.regularexpressionscore.pp | 30 +- packages/wasm-utils/fpmake.pp | 4 + packages/wasm-utils/src/wasm.pcrebridge.pas | 406 ++++++++++++++++++ 4 files changed, 439 insertions(+), 12 deletions(-) create mode 100644 packages/wasm-utils/src/wasm.pcrebridge.pas diff --git a/packages/vcl-compat/fpmake.pp b/packages/vcl-compat/fpmake.pp index b4a866c7f5..8d35ba7d42 100644 --- a/packages/vcl-compat/fpmake.pp +++ b/packages/vcl-compat/fpmake.pp @@ -37,6 +37,7 @@ begin P.Dependencies.Add('fcl-hash'); P.Dependencies.Add('hash'); P.Dependencies.Add('libpcre',[Win64,Linux,darwin]); + P.Dependencies.Add('wasm-utils',[wasi]); P.SourcePath.Add('src'); P.IncludePath.Add('src'); @@ -64,12 +65,12 @@ begin T.ResourceStrings := True; T:=P.Targets.AddUnit('system.credentials.pp'); T.ResourceStrings := True; - T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp',[Win64,Linux,darwin]); + T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp',[Win64,Linux,darwin,wasi]); T.ResourceStrings := True; - T:=P.Targets.AddUnit('system.regularexpressionscore.pp',[Win64,Linux,darwin]); - T.Dependencies.AddUnit('system.regularexpressionsconsts',[Win64,Linux,darwin]); - T:=P.Targets.AddUnit('system.regularexpressions.pp',[Win64,Linux,darwin]); - T.Dependencies.AddUnit('system.regularexpressionscore',[Win64,Linux,darwin]); + T:=P.Targets.AddUnit('system.regularexpressionscore.pp',[Win64,Linux,darwin,wasi]); + T.Dependencies.AddUnit('system.regularexpressionsconsts',[Win64,Linux,darwin,wasi]); + T:=P.Targets.AddUnit('system.regularexpressions.pp',[Win64,Linux,darwin,wasi]); + T.Dependencies.AddUnit('system.regularexpressionscore',[Win64,Linux,darwin,wasi]); T:=P.Targets.AddUnit('system.threading.pp',AllOSes-[go32v2,nativent,atari]); T.ResourceStrings := True; diff --git a/packages/vcl-compat/src/system.regularexpressionscore.pp b/packages/vcl-compat/src/system.regularexpressionscore.pp index 71153df96c..0756e9da47 100755 --- a/packages/vcl-compat/src/system.regularexpressionscore.pp +++ b/packages/vcl-compat/src/system.regularexpressionscore.pp @@ -37,9 +37,29 @@ interface uses {$IFDEF FPC_DOTTEDUNITS} - System.SysUtils, System.Classes, System.Contnrs, {$IFNDEF USEWIDESTRING} Api.PCRE2_8 {$ELSE} Api.PCRE2_16 {$ENDIF}, System.CTypes, System.RegularExpressionsConsts; + System.SysUtils, System.Classes, System.Contnrs, System.CTypes, + {$IFNDEF CPUWASM} + {$IFNDEF USEWIDESTRING} + Api.PCRE2_8 + {$ELSE} + Api.PCRE2_16 + {$ENDIF}, + {$ELSE} + wasm.pcrebridge, + {$ENDIF} + System.RegularExpressionsConsts; {$ELSE} - SysUtils, Classes, Contnrs, {$IFNDEF USEWIDESTRING} libpcre2_8 {$ELSE} libpcre2_16 {$ENDIF}, ctypes, System.RegularExpressionsConsts; + SysUtils, Classes, Contnrs,ctypes, + {$IFNDEF CPUWASM} + {$IFNDEF USEWIDESTRING} + libpcre2_8 + {$ELSE} + libpcre2_16 + {$ENDIF}, + {$ELSE} + wasm.pcrebridge, + {$ENDIF} + System.RegularExpressionsConsts; {$ENDIF} const @@ -642,7 +662,6 @@ begin if (FCode=nil) then raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]); FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil); - end; procedure TPerlRegEx.Study; @@ -705,8 +724,7 @@ end; function TPerlRegEx.GetNames(aIndex : Integer): TREString; var Ptr : PCRE2_SPTR; - N,I : Integer; - tblName : TREString; + I : Integer; begin Ptr:=FNameTable; @@ -715,10 +733,8 @@ begin for i:=0 to aIndex-1 do Inc(Ptr,FNameEntrySize); {$IFDEF USEWIDESTRING} - n:=ord(ptr[0]); Result:=GetStrLen((Ptr+1),FNameEntrySize-2); {$ELSE} - n:=(ord(ptr[0]) shl 8) or ord(ptr[1]); Result:=GetStrLen((Ptr+2),FNameEntrySize-3); {$ENDIF} end; diff --git a/packages/wasm-utils/fpmake.pp b/packages/wasm-utils/fpmake.pp index c95ac5fa85..6f763d5107 100644 --- a/packages/wasm-utils/fpmake.pp +++ b/packages/wasm-utils/fpmake.pp @@ -50,6 +50,10 @@ begin T:=P.Targets.AddUnit('wasm.regexp.objects.pas'); T.Dependencies.AddUnit('wasm.regexp.api'); T.Dependencies.AddUnit('wasm.regexp.shared'); + T:=P.Targets.AddUnit('wasm.pcrebridge.pas'); + T.Dependencies.AddUnit('wasm.regexp.api'); + T.Dependencies.AddUnit('wasm.regexp.shared'); + T.Dependencies.AddUnit('wasm.regexp.objects'); {$ifndef ALLPACKAGES} Run; diff --git a/packages/wasm-utils/src/wasm.pcrebridge.pas b/packages/wasm-utils/src/wasm.pcrebridge.pas new file mode 100644 index 0000000000..fa6de49a20 --- /dev/null +++ b/packages/wasm-utils/src/wasm.pcrebridge.pas @@ -0,0 +1,406 @@ +unit wasm.pcrebridge; + +{$mode ObjFPC}{$H+} + +interface + +uses +{$IFDEF FPC_DOTTEDUNITS} + System.CTypes, System.SysUtils, +{$ELSE} + ctypes, sysutils, +{$ENDIF} + wasm.regexp.objects; + +{$IF SIZEOF(CHAR)=2} +{$DEFINE STRING_IS_UNICODE} +{$ENDIF} + +const + PCRE2_NEWLINE_ANY = 1; + PCRE2_UTF = 1 shl 1; + PCRE2_CASELESS = 1 shl 2; + PCRE2_MULTILINE = 1 shl 3; + PCRE2_DOTALL = 1 shl 4; + PCRE2_EXTENDED = 1 shl 5; + PCRE2_ANCHORED = 1 shl 6; + PCRE2_UNGREEDY = 1 shl 7; + PCRE2_NO_AUTO_CAPTURE = 1 shl 8; + PCRE2_ALLOW_EMPTY_CLASS = 1 shl 9; + PCRE2_ALT_BSUX = 1 shl 10; + PCRE2_ALT_CIRCUMFLEX = 1 shl 11; + PCRE2_ALT_VERBNAMES = 1 shl 12; + PCRE2_DOLLAR_ENDONLY = 1 shl 13; + PCRE2_DUPNAMES = 1 shl 14; + PCRE2_ENDANCHORED = 1 shl 15; + PCRE2_FIRSTLINE = 1 shl 16; + PCRE2_LITERAL = 1 shl 17; + PCRE2_MATCH_INVALID_UTF = 1 shl 18; + PCRE2_MATCH_UNSET_BACKREF = 1 shl 19; + PCRE2_NEVER_BACKSLASH_C = 1 shl 20; + PCRE2_NO_AUTO_POSSESS = 1 shl 21; + PCRE2_NO_DOTSTAR_ANCHOR = 1 shl 22; + PCRE2_NO_START_OPTIMIZE = 1 shl 23; + PCRE2_NO_UTF_CHECK = 1 shl 24; + PCRE2_USE_OFFSET_LIMIT = 1 shl 25; + + PCRE2_ERROR_NOMATCH = -(1); + PCRE2_ERROR_PARTIAL = -(2); + PCRE2_ERROR_WASM = -(200); + + PCRE2_INFO_ALLOPTIONS = 0; + PCRE2_INFO_NAMECOUNT = 1; + PCRE2_INFO_NAMETABLE = 2; + PCRE2_INFO_NEWLINE = 3; + PCRE2_INFO_NAMEENTRYSIZE = 4; + + PCRE2_NEWLINE_CR = 1; + PCRE2_NEWLINE_LF = 2; + PCRE2_NEWLINE_CRLF = 3; + + PCRE2_NEWLINE_ANYCRLF = 5; + PCRE2_NEWLINE_NUL = 6; + PCRE2_BSR_UNICODE = 1; + PCRE2_BSR_ANYCRLF = 2; + + PCRE2_NOTEMPTY_ATSTART = 1; + + +Type + tsize_t = csize_t; + Psize_t = ^tsize_t; + PCRE2_SIZE = tsize_t; + PTsize_t = ^Tsize_t; + PPTsize_t = ^PTsize_t; + tuint32_t = cardinal; + Tint32_t = cuint32; + Tcint = cint; + + { TPCREWasmRegExp } + + TPCREWasmRegExp = Class(TWasmRegExp) + matchindex : Integer; + lastRes : TRegExpResult; + haveres : Boolean; + lasterror : String; + OVector : Array of Tsize_t; + FNamesTable : String; + FNamesTableEntrySize : Integer; + private + procedure CreateNamesTable; + function GetCaptureIndex(aStartIndex, aStopIndex: Integer): Integer; + function HandleExec(aOffset: TSize_t; const S: String): Tcint; + end; + + TPCRE2_SPTR8 = PAnsichar; + TPCRE2_SPTR16 = PWideChar; + Ppcre2_code_8 = TPCREWasmRegExp; + Ppcre2_code_16 = TPCREWasmRegExp; + PTpcre2_code_8 = Ppcre2_code_8; + PTpcre2_code_16 = Ppcre2_code_16; + PTpcre2_compile_context_8 = Pointer; + PTpcre2_match_data_8 = TPCREWasmRegExp; + PTpcre2_general_context_8 = Pointer; + PTpcre2_match_context_8 = Pointer; + ppcre2_match_data = PTpcre2_match_data_8; + {$IFDEF STRING_IS_UNICODE} + PCRE2_SPTR = PWideChar; + {$ELSE} + PCRE2_SPTR = PAnsiChar; + {$ENDIF} + PTPCRE2_UCHAR8 = PAnsiChar; + + + +function pcre2_compile(RegExp: TPCRE2_SPTR8; RegexLen: Tsize_t; Opts: Tuint32_t; ErrorNr: Pcint; ErrorPos: Psize_t; Context: PTpcre2_compile_context_8):Ppcre2_code_8; +procedure pcre2_code_free(Regexp:PTpcre2_code_8); + +function pcre2_match_w(RegExp:PTpcre2_code_8; aSubject: TPCRE2_SPTR16; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8):Tcint; +function pcre2_match(RegExp:PTpcre2_code_8; aSubject: TPCRE2_SPTR8; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8):Tcint; +function pcre2_match_data_create_from_pattern(RegExp: PTpcre2_code_8; aContext: PTpcre2_general_context_8):PTpcre2_match_data_8; +procedure pcre2_match_data_free(aMatchData:PTpcre2_match_data_8); + +function pcre2_pattern_info(RegExp:PTpcre2_code_8; Info: Tuint32_t; Res: pointer):Tcint; + +function pcre2_get_ovector_pointer (match: PTpcre2_match_data_8): Psize_t; +function pcre2_get_startchar(match: PTpcre2_match_data_8): Tsize_t; +function pcre2_get_error_message(ErrNo:Tcint; aString :PTPCRE2_UCHAR8; aStrlen:Tsize_t):Tcint; + +function libpcre28loaded : Boolean; +procedure Loadlibpcre28; + + +implementation + +var + gLastError : String; + +function OptsToFlags(Opts : Tuint32_t) : TRegexpFlags; + + Procedure Check(PerlFlag :Tuint32_t; RegexFlag :TRegexpFlag); + begin + if (Opts and PerlFlag)<>0 then + Include(Result,RegexFlag); + end; + +begin + Result:=[rfIndices,rfGlobal]; + Check(PCRE2_MULTILINE,rfMultiLine); + Check(PCRE2_CASELESS,rfIgnoreCase); + Check(PCRE2_UTF,rfUnicode); + Check(PCRE2_DOTALL,rfDotAll); + (* The rest is not supported *) +end; + +function pcre2_compile (RegExp:TPCRE2_SPTR8; RegexLen:Tsize_t; Opts :Tuint32_t; ErrorNr :Pcint; ErrorPos:Psize_t; Context :PTpcre2_compile_context_8):Ppcre2_code_8; + +var + F : TRegexpFlags ; + S : String; + +begin + Result:=Nil; + SetLength(S,RegexLen); + Move(Regexp^,S[1],SizeOf(Char)*RegexLen); + F:=OptsToFlags(Opts); + try + Result:=TPCREWasmRegExp.Create(S,F); + except + on E : Exception do + begin + ErrorNr^:=PCRE2_ERROR_WASM; // Does not exist (yet) + gLastError:=E.Message; + ErrorPos^:=0; + end; + end; +end; + +procedure pcre2_code_free(Regexp:PTpcre2_code_8); + +var + RE : TPCREWasmRegExp absolute Regexp; + +begin + RE.Free; +end; + +function pcre2_match_data_create_from_pattern(RegExp:PTpcre2_code_8; aContext :PTpcre2_general_context_8):PTpcre2_match_data_8; + +begin + Result:=Regexp; +end; + +function pcre2_pattern_info(RegExp: PTpcre2_code_8; Info: Tuint32_t; Res: pointer): Tcint; + +var + RE : TPCREWasmRegExp absolute Regexp; + +begin + result:=0; + case info of + PCRE2_INFO_NAMECOUNT : + Pcuint32(Res)^:=Length(RE.lastRes.Groups); + PCRE2_INFO_NAMETABLE : + PPChar(Res)^:=PChar(RE.FNamesTable); + PCRE2_INFO_NAMEENTRYSIZE : + Pcuint32(Res)^:=RE.FNamesTableEntrySize; + PCRE2_INFO_ALLOPTIONS : + Pcuint32(Res)^:=PCRE2_UTF; + PCRE2_INFO_NEWLINE : + Pcuint32(Res)^:=PCRE2_NEWLINE_ANY; + else + Result:=PCRE2_ERROR_WASM; + end; +end; + + +procedure pcre2_match_data_free(aMatchData: PTpcre2_match_data_8); +begin + // Do nothing +end; + +function TPCREWasmRegExp.GetCaptureIndex(aStartIndex,aStopIndex : Integer) : Integer; + +begin + Result:=Length(lastRes.Matches)-1; + While (Result>=0) and Not lastRes.Matches[Result].MatchPos(aStartIndex,aStopIndex) do + Dec(Result); +end; + +procedure TPCREWasmRegExp.CreateNamesTable; + +Const + ExtraSize = SizeOf(Word); + {$IFDEF STRING_IS_UNICODE} + CharOffset = 1; + {$ELSE} + CharOffset = 2; + {$ENDIF} + +var + i, tmp, entrylen,len : Integer; + N: UTF8String; + NS : String; + CaptureIdx : Word; + CaptureOffset,NameOffset : integer; + +begin + FNamesTableEntrySize:=0; + FNamesTable:=''; + Len:=Length(lastRes.Groups); + if Len=0 then exit; + entryLen:=Length(lastRes.Groups[0].Name); + For I:=1 to Len-1 do + begin + tmp:=Length(lastRes.Groups[i].Name); + if tmp>Entrylen then + Entrylen:=Tmp; + end; + EntryLen:=EntryLen+ExtraSize*2; + FNamesTableEntrySize:=EntryLen; + SetLength(FNamesTable,Len*EntryLen); + FillChar(FNamesTable[1],Length(FNamesTable)*SizeOf(Char),0); + For I:=0 to Len-1 do + begin + N:=lastRes.Groups[i].Name; + {$IFDEF STRING_IS_UNICODE} + NS:=UTF8Decode(N); + {$ELSE} + NS:=N; + {$ENDIF} + CaptureIdx:=GetCaptureIndex(lastRes.Groups[i].StartIndex,lastRes.Groups[i].StopIndex); + CaptureIdx:=NtoBE(CaptureIdx); + CaptureOffset:=1+(I*EntryLen); + NameOffset:=1+CharOffset+(I*EntryLen); + Move(CaptureIdx,FNamesTable[CaptureOffset],SizeOf(Word)); + Move(NS[1],FNamesTable[NameOffset],Length(NS)*SizeOf(Char)); + end; +end; + +function TPCREWasmRegExp.HandleExec(aOffset : TSize_t;Const S : String) : Tcint; + +var + i, len : Integer; + +begin + try + LastIndex:=aOffset; + lastRes:=Exec(S); + len:=Length(lastRes.Matches); + if len=0 then + Result:=PCRE2_ERROR_NOMATCH + else + Result:=len; + SetLength(OVector,Len*2); + For i:=0 to len-1 do + begin + OVector[I*2]:=LastRes.Matches[i].StartIndex-1; + OVector[(I*2)+1]:=LastRes.Matches[i].StopIndex-1; + end; + Len:=Length(lastRes.Groups); + if Len>0 then + CreateNamesTable; + except + Result:=PCRE2_ERROR_WASM; + end; +end; + +function pcre2_match_w(RegExp: PTpcre2_code_8; aSubject: TPCRE2_SPTR16; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; + MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8): Tcint; + +var + RE : TPCREWasmRegExp absolute Regexp; + S : String; + {$IFNDEF STRING_IS_UNICODE} + US : UnicodeString; + {$ENDIF} + +begin + {$IFDEF STRING_IS_UNICODE} + S:=''; + SetLength(S,aSubjectLen); + if aSubjectLen>0 then + Move(aSubject^,S[1],aSubjectLen*SizeOf(Char)); + {$ELSE} + US:=''; + SetLength(US,aSubjectLen); + if aSubjectLen>0 then + Move(aSubject^,US[1],aSubjectLen* SizeOf(Char)); + S:=UTF8Encode(US); + {$ENDIF} + Result:=Re.HandleExec(aOffset,S); +end; + +function pcre2_match(RegExp: PTpcre2_code_8; aSubject: TPCRE2_SPTR8; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; + MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8): Tcint; + +var + RE : TPCREWasmRegExp absolute Regexp; + {$IFDEF STRING_IS_UNICODE} + RS : UnicodeString; + {$ENDIF} + S : String; + +begin + {$IFDEF STRING_IS_UNICODE} + Rs:=''; + SetLength(RS,aSubjectLen); + if aSubjectLen>0 then + Move(aSubject^,RS[1],aSubjectLen* SizeOf(Char)); + S:=UTF8Decode(RS); + {$ELSE} + S:=''; + SetLength(S,aSubjectLen); + if aSubjectLen>0 then + Move(aSubject^,S[1],aSubjectLen*SizeOf(Char)); + {$ENDIF} + Result:=Re.HandleExec(aOffset,S); +end; + + +function pcre2_get_ovector_pointer(match: PTpcre2_match_data_8): Psize_t; +var + RE : TPCREWasmRegExp absolute match; +begin + Result:=PSize_t(Re.OVector); +end; + +function pcre2_get_startchar(match: PTpcre2_match_data_8): Tsize_t; +var + RE : TPCREWasmRegExp absolute match; +begin + if (Length(Re.lastRes.Matches)>0) then + Result:=Re.LastRes.Matches[0].StartIndex + else + Result:=Length(Re.lastRes.Input); +end; + +function pcre2_get_error_message(ErrNo: Tcint; aString: PTPCRE2_UCHAR8; aStrlen: Tsize_t): Tcint; + +var + Err : RawByteString; + +begin + Err:=Format('Unknown error %d',[ErrNo]); + Result:=Length(Err); + if (Result>aStrLen) then + Result:=aStrLen; + if Result>0 then + Move(Err[1],aString^,Result); +end; + +function libpcre28loaded : Boolean; + +begin + Result:=True; +end; + +procedure Loadlibpcre28; +begin + // +end; + + + +end. +