From 46a9fcc5f93074c9c37fc2539cc9b19d094286f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Van=20Canneyt?= Date: Sat, 7 Sep 2024 23:46:07 +0200 Subject: [PATCH] * Regexp support for wasm target --- packages/wasm-utils/demo/README.md | 5 + .../wasm-utils/demo/regexp/wasmregexpdemo.lpi | 66 ++++ .../wasm-utils/demo/regexp/wasmregexpdemo.pp | 50 +++ packages/wasm-utils/fpmake.pp | 8 + packages/wasm-utils/src/wasm.regexp.api.pas | 26 ++ .../wasm-utils/src/wasm.regexp.objects.pas | 306 ++++++++++++++++++ .../wasm-utils/src/wasm.regexp.shared.pas | 133 ++++++++ 7 files changed, 594 insertions(+) create mode 100644 packages/wasm-utils/demo/regexp/wasmregexpdemo.lpi create mode 100644 packages/wasm-utils/demo/regexp/wasmregexpdemo.pp create mode 100644 packages/wasm-utils/src/wasm.regexp.api.pas create mode 100644 packages/wasm-utils/src/wasm.regexp.objects.pas create mode 100644 packages/wasm-utils/src/wasm.regexp.shared.pas diff --git a/packages/wasm-utils/demo/README.md b/packages/wasm-utils/demo/README.md index bdb515fe12..2867863e88 100644 --- a/packages/wasm-utils/demo/README.md +++ b/packages/wasm-utils/demo/README.md @@ -22,3 +22,8 @@ packages/fcl-web/examples/websocket/server ``` is needed, since this is the websocket server that the demo program will connect to. + +For the regexp demo, you need the corresponding pas2js host program +``` +demos/wasienv/regexp +``` diff --git a/packages/wasm-utils/demo/regexp/wasmregexpdemo.lpi b/packages/wasm-utils/demo/regexp/wasmregexpdemo.lpi new file mode 100644 index 0000000000..f06c07e8e4 --- /dev/null +++ b/packages/wasm-utils/demo/regexp/wasmregexpdemo.lpi @@ -0,0 +1,66 @@ + + + + + + + + + + + + + <UseAppBundle Value="False"/> + <ResourceType Value="res"/> + </General> + <BuildModes> + <Item Name="Default" Default="True"/> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <UseFileFilters Value="True"/> + </PublishOptions> + <RunParams> + <FormatVersion Value="2"/> + </RunParams> + <Units> + <Unit> + <Filename Value="wasmregexpdemo.pp"/> + <IsPartOfProject Value="True"/> + </Unit> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <Target> + <Filename Value="wasmregexpdemo"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <OtherUnitFiles Value="../../src"/> + <UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <TargetCPU Value="wasm32"/> + <TargetOS Value="wasi"/> + </CodeGeneration> + <Linking> + <Debugging> + <GenerateDebugInfo Value="False"/> + </Debugging> + </Linking> + </CompilerOptions> + <Debugging> + <Exceptions> + <Item> + <Name Value="EAbort"/> + </Item> + <Item> + <Name Value="ECodetoolError"/> + </Item> + <Item> + <Name Value="EFOpenError"/> + </Item> + </Exceptions> + </Debugging> +</CONFIG> diff --git a/packages/wasm-utils/demo/regexp/wasmregexpdemo.pp b/packages/wasm-utils/demo/regexp/wasmregexpdemo.pp new file mode 100644 index 0000000000..019e5d2dc8 --- /dev/null +++ b/packages/wasm-utils/demo/regexp/wasmregexpdemo.pp @@ -0,0 +1,50 @@ +program wasmregexpdemo; + +uses sysutils, wasm.regexp.shared, wasm.regexp.api, wasm.regexp.objects; + +Const + SRegex = 'quick\s(?<color>brown).+?(jumps)'; + STest = 'The Quick Brown Fox Jumps Over The Lazy Dog'; + SFlags = 'dgi'; + +Var + Regex : TWasmRegExp; + Res : TRegExpResult; + I : Integer; + M : TRegExpMatch; + G : TRegExpGroup; + S : String; + +begin + Writeln('Regular expression: ',SRegex); + Writeln('Flags: ',SFlags); + Regex:=TWasmRegExp.Create(SRegex,SFlags); + Writeln('Test string: ',STest); + Res:=Regex.Exec(STest); + if Res.Index=0 then + Writeln('No match') + else + With Res do + begin + Writeln('Match at : ',Index); + I:=0; + For M in Matches do + begin + S:=Format('(%d) : "%s"',[I,M.Value]); + if (rfIndices in Regex.Flags) then + S:=S+Format(' [From pos %d to %d]',[M.StartIndex,M.StopIndex]); + Writeln(S); + Inc(I); + end; + Writeln('Named groups : ',Length(Groups)); + For G in Groups do + begin + S:=Format('(%d): "%s": "%s"',[I,G.Name,G.Value]); + if (rfIndices in Regex.Flags) then + S:=S+Format(' [From pos %d to %d]',[G.StartIndex,G.StopIndex]); + Writeln(S); + Inc(I); + end; + end; +end. + diff --git a/packages/wasm-utils/fpmake.pp b/packages/wasm-utils/fpmake.pp index 2c17c6449f..c95ac5fa85 100644 --- a/packages/wasm-utils/fpmake.pp +++ b/packages/wasm-utils/fpmake.pp @@ -43,6 +43,14 @@ begin T.Dependencies.AddUnit('wasm.websocket.api'); T.Dependencies.AddUnit('wasm.websocket.shared'); + T:=P.Targets.AddUnit('wasm.regexp.shared.pas'); + T:=P.Targets.AddUnit('wasm.regexp.api.pas'); + T.Dependencies.AddUnit('wasm.regexp.shared'); + + T:=P.Targets.AddUnit('wasm.regexp.objects.pas'); + T.Dependencies.AddUnit('wasm.regexp.api'); + T.Dependencies.AddUnit('wasm.regexp.shared'); + {$ifndef ALLPACKAGES} Run; end; diff --git a/packages/wasm-utils/src/wasm.regexp.api.pas b/packages/wasm-utils/src/wasm.regexp.api.pas new file mode 100644 index 0000000000..2beab03167 --- /dev/null +++ b/packages/wasm-utils/src/wasm.regexp.api.pas @@ -0,0 +1,26 @@ +unit wasm.regexp.api; + +{$mode ObjFPC}{$H+} + +interface + +uses wasm.regexp.shared; + +function __wasm_regexp_allocate(aExpr : PByte; aExprLen : longint; aFlags : Longint; aID : PWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_Allocate; +function __wasm_regexp_deallocate(aExprID : TWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_DeAllocate; +function __wasm_regexp_exec(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aIndex : PLongint; aResultCount : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Exec; +function __wasm_regexp_test(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aResult : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Test; +function __wasm_regexp_get_flags(aExprID : TWasmRegExpID; aFlags : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetFlags; +function __wasm_regexp_get_expression(aExprID : TWasmRegExpID; aExp : PByte; aExpLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetExpression; +function __wasm_regexp_get_last_index(aExprID : TWasmRegExpID; aLastIndex : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetLastIndex; +function __wasm_regexp_get_result_match(aExprID : TWasmRegExpID; aIndex : Longint; Res : PByte; ResLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetResultMatch; +function __wasm_regexp_get_group_count(aExprID : TWasmRegExpID; aCount: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupCount; +function __wasm_regexp_get_group_name(aExprID : TWasmRegExpID; aIndex : Longint; aName : PByte; aNameLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupName; +function __wasm_regexp_get_named_group(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Longint; aValue : PByte; aValueLen: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroup; +function __wasm_regexp_get_indexes(aExprID : TWasmRegExpID; aIndex : Longint; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetIndexes; +function __wasm_regexp_get_named_group_indexes(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Integer; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroupIndexes; + +implementation + +end. + diff --git a/packages/wasm-utils/src/wasm.regexp.objects.pas b/packages/wasm-utils/src/wasm.regexp.objects.pas new file mode 100644 index 0000000000..951626779b --- /dev/null +++ b/packages/wasm-utils/src/wasm.regexp.objects.pas @@ -0,0 +1,306 @@ +unit wasm.regexp.objects; + +{$mode ObjFPC}{$H+} +{$modeswitch typehelpers} + +interface + +uses +{$IFDEF FPC_DOTTEDUNITS} + System.SysUtils, +{$ELSE} + SysUtils, +{$ENDIF} + wasm.regexp.shared, + wasm.regexp.api; + +Type + EWasmRegExp = class(Exception); + + TRegexpFlag = (rfUnknown,rfDotAll,rfGlobal,rfIndices,rfIgnoreCase,rfMultiLine,rfSticky,rfUnicode,rfUnicodeSets); + TRegexpFlags = Set of TRegExpFlag; + + { TRegexpFlagHelper } + + TRegexpFlagHelper = type helper for TRegexpFlag + Public + function ToString: String; + Function AsFlag : Longint; + Property AsString : String Read ToString; + end; + + { TRegexpFlagsHelper } + + TRegexpFlagsHelper = type helper for TRegexpFlags + private + procedure SetAsFlags(const aValue: Longint); + public + function ToString: String; + Function ToFlags : Longint; + class function FromFlags(aFlags : Longint) : TRegExpFlags; static; + Property AsString : String Read ToString; + Property AsFlags : Longint Read ToFlags Write SetAsFlags; + end; + + TRegExpMatch = record + Value : UTF8String; + StartIndex, StopIndex : Integer; + end; + TRegExpMatchArray = array of TRegExpMatch; + + TRegExpGroup = record + Name,Value : UTF8String; + StartIndex, StopIndex : Integer; + end; + TRegExpGroupArray = array of TRegExpGroup; + + TRegExpResult = record + Matches : TRegExpMatchArray; + Input : UTF8String; + Index : Integer; + Groups : TRegExpGroupArray; + end; + + { TWasmRegExp } + + TWasmRegExp = Class(TObject) + private + FRegexpID: TWasmRegexpID; + FFlags : Longint; + function GetFlags: TRegexpFlags; + function GetGroups(aCount: Integer): TRegExpGroupArray; + function GetLastIndex: Longint; + function GetMatches(aCount: Integer): TRegExpMatchArray; + protected + function CheckRegExpResult(Res : TWasmRegexpResult; const aOperation : String; aRaise : Boolean = true) : Boolean; + Public + Constructor Create(const aExpression,aFlags : String); overload; + Constructor Create(const aExpression : String; aFlags : Longint); overload; + Constructor Create(const aExpression : String; aFlags : TRegexpFlags); overload; + destructor Destroy; override; + Function Exec(const aString : String) : TRegExpResult; + Function Test(const aString : String) : Boolean; + Property LastIndex : Longint Read GetLastIndex; + Property RegexpID : TWasmRegExpID Read FRegexpID; + Property FlagsAsInteger : Integer Read FFlags; + Property Flags : TRegexpFlags Read GetFlags; + end; + +implementation + +{ TRegexpFlagHelper } + +function TRegexpFlagHelper.ToString: String; +begin + +end; + +function TRegexpFlagHelper.AsFlag: Longint; + +Const + FlagValues : Array[TRegexpFlag] of longint + = (0, + WASMRE_FLAG_DOTALL, + WASMRE_FLAG_GLOBAL, + WASMRE_FLAG_INDICES, + WASMRE_FLAG_IGNORECASE, + WASMRE_FLAG_MULTILINE, + WASMRE_FLAG_STICKY, + WASMRE_FLAG_UNICODE, + WASMRE_FLAG_UNICODESETS); + + + +begin + Result:=FlagValues[Self]; +end; + +{ TRegexpFlagsHelper } + +procedure TRegexpFlagsHelper.SetAsFlags(const aValue: Longint); + +var + F : TRegexpFlag; + Res : TRegexpFlags; + +begin + Res:=[]; + for F in TRegexpFlag do + if (F.AsFlag and aValue)<>0 then + Include(Res,F); + Self:=Res; +end; + +function TRegexpFlagsHelper.ToString: String; + +begin + Result:=FlagsToString(AsFlags); +end; + +function TRegexpFlagsHelper.ToFlags: Longint; +var + F : TRegexpFlag; +begin + Result:=0; + For F in Self do + Result:=Result or F.AsFlag; +end; + +class function TRegexpFlagsHelper.FromFlags(aFlags: Longint): TRegExpFlags; +begin + Result.AsFlags:=aFlags; +end; + + +{ TWasmRegExp } + + +function TWasmRegExp.GetLastIndex: Longint; +begin + CheckRegExpResult(__wasm_regexp_get_last_index(FRegexpID,@Result),'get_last_index'); +end; + +function TWasmRegExp.GetFlags: TRegexpFlags; +begin + Result:=TRegExpFlags.FromFlags(FFlags) +end; + +function TWasmRegExp.CheckRegExpResult(Res: TWasmRegexpResult; const aOperation: String; aRaise: Boolean): Boolean; +begin + Result:=Res=WASMRE_RESULT_SUCCESS; + if (not Result) and aRaise then + Raise EWasmRegExp.CreateFmt('Error %d occured during "%s"',[Res,aOperation]); +end; + +constructor TWasmRegExp.Create(const aExpression, aFlags: String); +begin + Create(aExpression,StringToFlags(aFlags,False)); +end; + +constructor TWasmRegExp.Create(const aExpression: String; aFlags: Longint); + +var + R : RawByteString; + +begin + R:=UTF8Encode(aExpression); + FFlags:=aFlags; + CheckRegexpResult(__wasm_regexp_allocate(PByte(R),Length(R),aFlags,@FRegexpID),regexpFN_Allocate); +end; + +constructor TWasmRegExp.Create(const aExpression: String; aFlags: TRegexpFlags); +begin + Create(aExpression,aFlags.AsFlags); +end; + +destructor TWasmRegExp.Destroy; +begin + CheckRegExpResult(__wasm_regexp_deallocate(FRegexpID),regexpFN_Allocate,false); + inherited Destroy; +end; + +function TWasmRegExp.GetMatches(aCount: Integer): TRegExpMatchArray; + +var + I : Integer; + Len,lStart,lStop : Longint; + Res : TWasmRegexpResult; + S : RawByteString; + lGetIndexes : Boolean; + +begin + SetLength(Result,aCount); + lGetindexes:=rfIndices in Flags; + For I:=0 to aCount-1 do + begin + Len:=256; + Repeat + SetLength(S,Len); + Res:=__wasm_regexp_get_result_match(FRegexpID,I,Pbyte(S),@Len); + Until (Res<>WASMRE_RESULT_NO_MEM); + SetLength(S,Len); + CheckRegExpResult(Res,regexpFN_GetResultMatch); + Result[i].Value:=S; + S:=''; + if lGetIndexes then + CheckRegExpResult(__wasm_regexp_get_Indexes(FRegexpID,I,@lStart,@lStop),regexpFN_GetIndexes); + Result[i].StartIndex:=lStart+1; + Result[i].StopIndex:=lStop+1; + end; +end; + +function TWasmRegExp.GetGroups(aCount: Integer): TRegExpGroupArray; + +var + I : Integer; + Len,lStart,lStop : Longint; + Res : TWasmRegexpResult; + N,V : RawByteString; + lGetIndexes : Boolean; + +begin + N:=''; + V:=''; + SetLength(Result,aCount); + lGetindexes:=rfIndices in Flags; + For I:=0 to aCount-1 do + begin + Len:=256; + Repeat + SetLength(N,Len); + Res:=__wasm_regexp_get_group_name(FRegexpID,I,Pbyte(N),@Len); + Until (Res<>WASMRE_RESULT_NO_MEM); + CheckRegExpResult(Res,regexpFN_GetGroupName); + SetLength(N,Len); + Result[i].Name:=N; + Len:=256; + Repeat + SetLength(V,Len); + Res:=__wasm_regexp_get_named_group(FRegexpID,PByte(N),Length(N),Pbyte(V),@Len); + Until (Res<>WASMRE_RESULT_NO_MEM); + CheckRegExpResult(Res,regexpFN_GetNamedGroup); + SetLength(V,Len); + Result[I].Value:=V; + if lGetIndexes then + CheckRegExpResult(__wasm_regexp_get_named_group_indexes(FRegexpID,PByte(N),Length(N),@lStart,@lStop),regexpFN_GetNamedGroup); + Result[i].StartIndex:=lStart+1; + Result[i].StopIndex:=lStop+1; + end; +end; + + +function TWasmRegExp.Exec(const aString: String): TRegExpResult; + +var + lGroupCount, lIndex, lMatchCount : longint; + R : RawByteString; + +begin + Result:=Default(TRegExpResult); + R:=UTF8Encode(aString); + Result.Input:=R; + CheckRegexpResult(__wasm_regexp_exec(FRegexpID,Pbyte(R),Length(R),@lIndex,@lMatchCount),regexpFN_exec); + If lMatchCount=0 then + exit; + Result.Index:=lIndex+1; + Result.Matches:=GetMatches(lMatchCount); + CheckRegExpResult(__wasm_regexp_get_group_count(FRegexpID,@lGroupCount),regexpFN_GetGroupCount); + if lGroupCount>0 then + Result.Groups:=GetGroups(lGroupCount); +end; + + +function TWasmRegExp.Test(const aString: String): Boolean; + +var + R : RawByteString; + lRes : Longint; + +begin + R:=UTF8Encode(aString); + CheckRegexpResult(__wasm_regexp_test(FRegexpID,Pbyte(R),Length(R),@lRes),regexpFN_test); + Result:=(lRes<>0); +end; + +end. + diff --git a/packages/wasm-utils/src/wasm.regexp.shared.pas b/packages/wasm-utils/src/wasm.regexp.shared.pas new file mode 100644 index 0000000000..253bd7e206 --- /dev/null +++ b/packages/wasm-utils/src/wasm.regexp.shared.pas @@ -0,0 +1,133 @@ +unit wasm.regexp.shared; + +{$mode ObjFPC}{$H+} + +interface + +Type + TWasmRegexpID = Longint; + TWasmRegexpResult = Longint; + {$IFNDEF PAS2JS} + TWasmPointer = Pointer; + PWasmRegexpID = ^TWasmRegexpID; + {$ELSE} + TWasmPointer = Longint; + PWasmRegexpID = TWasmPointer; + {$ENDIF} + +Const + + WASMRE_RESULT_SUCCESS = 0; + WASMRE_RESULT_ERROR = -1; + WASMRE_RESULT_INVALIDID = -2; + WASMRE_RESULT_NO_MEM = -3; + WASMRE_RESULT_NO_REGEXP = -4; + WASMRE_RESULT_INVALIDIDX = -5; + WASMRE_RESULT_NOINDEXES = -6; + + WASMRE_FLAG_DOTALL = 1; + WASMRE_FLAG_GLOBAL = 2; + WASMRE_FLAG_INDICES = 4; + WASMRE_FLAG_IGNORECASE = 8; + WASMRE_FLAG_MULTILINE = 16; + WASMRE_FLAG_STICKY = 32; + WASMRE_FLAG_UNICODE = 64; + WASMRE_FLAG_UNICODESETS = 128; + + // Aliases that correspond to the letters used when creating a regexp + WASMRE_FLAG_S = WASMRE_FLAG_DOTALL; + WASMRE_FLAG_G = WASMRE_FLAG_GLOBAL; + WASMRE_FLAG_D = WASMRE_FLAG_INDICES; + WASMRE_FLAG_I = WASMRE_FLAG_IGNORECASE; + WASMRE_FLAG_M = WASMRE_FLAG_MULTILINE; + WASMRE_FLAG_Y = WASMRE_FLAG_STICKY; + WASMRE_FLAG_U = WASMRE_FLAG_UNICODE; + WASMRE_FLAG_V = WASMRE_FLAG_UNICODESETS; + + regexpExportName = 'regexp'; + regexpFN_Allocate = 'allocate'; + regexpFN_DeAllocate = 'deallocate'; + regexpFN_Exec = 'exec'; + regexpFN_Test = 'test'; + regexpFN_GetFlags = 'get_flags'; + regexpFN_GetExpression = 'get_expression'; + regexpFN_GetLastIndex = 'get_last_index'; + regexpFN_GetResultMatch = 'get_result_match'; + regexpFN_GetGroupCount = 'get_group_count'; + regexpFN_GetGroupName = 'get_group_name'; + regexpFN_GetNamedGroup = 'get_named_group'; + regexpFN_GetIndexes = 'get_indexes'; + regexpFN_GetNamedGroupIndexes = 'get_named_group_indexes'; + +Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint; +Function FlagsToString(S : Longint) : String; + +implementation + +uses +{$IFDEF FPC_DOTTEDUNITS} + System.SysUtils; +{$ELSE} + SysUtils; +{$ENDIF} + + +Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint; + +var + C : Char; + Flag : Longint; + +begin + Result:=0; + for C in S do + begin + case C of + 's': Flag:=WASMRE_FLAG_S; + 'g': Flag:=WASMRE_FLAG_G; + 'd': Flag:=WASMRE_FLAG_D; + 'i': Flag:=WASMRE_FLAG_I; + 'm': Flag:=WASMRE_FLAG_M; + 'y': Flag:=WASMRE_FLAG_Y; + 'u': Flag:=WASMRE_FLAG_U; + 'v': Flag:=WASMRE_FLAG_V; + else + if not IgnoreUnknown then + Raise EConvertError.CreateFmt('Unknown regexp flag: %s',[C]); + Flag:=0; + end; + Result:=Result or Flag; + end; +end; + +Function FlagsToString(S : Longint) : String; + +var + C,I : Longint; + Flag : Char; + +begin + Result:=''; + for I:=0 to 7 do + begin + C:=S and (1 shl i); + case C of + WASMRE_FLAG_S : Flag:='s'; + WASMRE_FLAG_G : Flag:='g'; + WASMRE_FLAG_D : Flag:='d'; + WASMRE_FLAG_I : Flag:='i'; + WASMRE_FLAG_M : Flag:='m'; + WASMRE_FLAG_Y : Flag:='y'; + WASMRE_FLAG_U : Flag:='u'; + WASMRE_FLAG_V : Flag:='v'; + else + Flag:='0'; + end; + if Flag<>'0' then + Result:=Result + Flag; + end; +end; + + +end. +