* Regexp support for wasm target

This commit is contained in:
Michaël Van Canneyt 2024-09-07 23:46:07 +02:00
parent 424686ce34
commit 46a9fcc5f9
7 changed files with 594 additions and 0 deletions

View File

@ -22,3 +22,8 @@ packages/fcl-web/examples/websocket/server
``` ```
is needed, since this is the websocket server that the demo program will is needed, since this is the websocket server that the demo program will
connect to. connect to.
For the regexp demo, you need the corresponding pas2js host program
```
demos/wasienv/regexp
```

View File

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="12"/>
<General>
<Flags>
<MainUnitHasCreateFormStatements Value="False"/>
<MainUnitHasTitleStatement Value="False"/>
<MainUnitHasScaledStatement Value="False"/>
</Flags>
<SessionStorage Value="InProjectDir"/>
<Title Value="wasmregexpdemo"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
</General>
<BuildModes>
<Item Name="Default" Default="True"/>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
<UseFileFilters Value="True"/>
</PublishOptions>
<RunParams>
<FormatVersion Value="2"/>
</RunParams>
<Units>
<Unit>
<Filename Value="wasmregexpdemo.pp"/>
<IsPartOfProject Value="True"/>
</Unit>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="wasmregexpdemo"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
<OtherUnitFiles Value="../../src"/>
<UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
<CodeGeneration>
<TargetCPU Value="wasm32"/>
<TargetOS Value="wasi"/>
</CodeGeneration>
<Linking>
<Debugging>
<GenerateDebugInfo Value="False"/>
</Debugging>
</Linking>
</CompilerOptions>
<Debugging>
<Exceptions>
<Item>
<Name Value="EAbort"/>
</Item>
<Item>
<Name Value="ECodetoolError"/>
</Item>
<Item>
<Name Value="EFOpenError"/>
</Item>
</Exceptions>
</Debugging>
</CONFIG>

View File

@ -0,0 +1,50 @@
program wasmregexpdemo;
uses sysutils, wasm.regexp.shared, wasm.regexp.api, wasm.regexp.objects;
Const
SRegex = 'quick\s(?<color>brown).+?(jumps)';
STest = 'The Quick Brown Fox Jumps Over The Lazy Dog';
SFlags = 'dgi';
Var
Regex : TWasmRegExp;
Res : TRegExpResult;
I : Integer;
M : TRegExpMatch;
G : TRegExpGroup;
S : String;
begin
Writeln('Regular expression: ',SRegex);
Writeln('Flags: ',SFlags);
Regex:=TWasmRegExp.Create(SRegex,SFlags);
Writeln('Test string: ',STest);
Res:=Regex.Exec(STest);
if Res.Index=0 then
Writeln('No match')
else
With Res do
begin
Writeln('Match at : ',Index);
I:=0;
For M in Matches do
begin
S:=Format('(%d) : "%s"',[I,M.Value]);
if (rfIndices in Regex.Flags) then
S:=S+Format(' [From pos %d to %d]',[M.StartIndex,M.StopIndex]);
Writeln(S);
Inc(I);
end;
Writeln('Named groups : ',Length(Groups));
For G in Groups do
begin
S:=Format('(%d): "%s": "%s"',[I,G.Name,G.Value]);
if (rfIndices in Regex.Flags) then
S:=S+Format(' [From pos %d to %d]',[G.StartIndex,G.StopIndex]);
Writeln(S);
Inc(I);
end;
end;
end.

View File

@ -43,6 +43,14 @@ begin
T.Dependencies.AddUnit('wasm.websocket.api'); T.Dependencies.AddUnit('wasm.websocket.api');
T.Dependencies.AddUnit('wasm.websocket.shared'); T.Dependencies.AddUnit('wasm.websocket.shared');
T:=P.Targets.AddUnit('wasm.regexp.shared.pas');
T:=P.Targets.AddUnit('wasm.regexp.api.pas');
T.Dependencies.AddUnit('wasm.regexp.shared');
T:=P.Targets.AddUnit('wasm.regexp.objects.pas');
T.Dependencies.AddUnit('wasm.regexp.api');
T.Dependencies.AddUnit('wasm.regexp.shared');
{$ifndef ALLPACKAGES} {$ifndef ALLPACKAGES}
Run; Run;
end; end;

View File

@ -0,0 +1,26 @@
unit wasm.regexp.api;
{$mode ObjFPC}{$H+}
interface
uses wasm.regexp.shared;
function __wasm_regexp_allocate(aExpr : PByte; aExprLen : longint; aFlags : Longint; aID : PWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_Allocate;
function __wasm_regexp_deallocate(aExprID : TWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_DeAllocate;
function __wasm_regexp_exec(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aIndex : PLongint; aResultCount : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Exec;
function __wasm_regexp_test(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aResult : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Test;
function __wasm_regexp_get_flags(aExprID : TWasmRegExpID; aFlags : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetFlags;
function __wasm_regexp_get_expression(aExprID : TWasmRegExpID; aExp : PByte; aExpLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetExpression;
function __wasm_regexp_get_last_index(aExprID : TWasmRegExpID; aLastIndex : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetLastIndex;
function __wasm_regexp_get_result_match(aExprID : TWasmRegExpID; aIndex : Longint; Res : PByte; ResLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetResultMatch;
function __wasm_regexp_get_group_count(aExprID : TWasmRegExpID; aCount: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupCount;
function __wasm_regexp_get_group_name(aExprID : TWasmRegExpID; aIndex : Longint; aName : PByte; aNameLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupName;
function __wasm_regexp_get_named_group(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Longint; aValue : PByte; aValueLen: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroup;
function __wasm_regexp_get_indexes(aExprID : TWasmRegExpID; aIndex : Longint; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetIndexes;
function __wasm_regexp_get_named_group_indexes(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Integer; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroupIndexes;
implementation
end.

View File

@ -0,0 +1,306 @@
unit wasm.regexp.objects;
{$mode ObjFPC}{$H+}
{$modeswitch typehelpers}
interface
uses
{$IFDEF FPC_DOTTEDUNITS}
System.SysUtils,
{$ELSE}
SysUtils,
{$ENDIF}
wasm.regexp.shared,
wasm.regexp.api;
Type
EWasmRegExp = class(Exception);
TRegexpFlag = (rfUnknown,rfDotAll,rfGlobal,rfIndices,rfIgnoreCase,rfMultiLine,rfSticky,rfUnicode,rfUnicodeSets);
TRegexpFlags = Set of TRegExpFlag;
{ TRegexpFlagHelper }
TRegexpFlagHelper = type helper for TRegexpFlag
Public
function ToString: String;
Function AsFlag : Longint;
Property AsString : String Read ToString;
end;
{ TRegexpFlagsHelper }
TRegexpFlagsHelper = type helper for TRegexpFlags
private
procedure SetAsFlags(const aValue: Longint);
public
function ToString: String;
Function ToFlags : Longint;
class function FromFlags(aFlags : Longint) : TRegExpFlags; static;
Property AsString : String Read ToString;
Property AsFlags : Longint Read ToFlags Write SetAsFlags;
end;
TRegExpMatch = record
Value : UTF8String;
StartIndex, StopIndex : Integer;
end;
TRegExpMatchArray = array of TRegExpMatch;
TRegExpGroup = record
Name,Value : UTF8String;
StartIndex, StopIndex : Integer;
end;
TRegExpGroupArray = array of TRegExpGroup;
TRegExpResult = record
Matches : TRegExpMatchArray;
Input : UTF8String;
Index : Integer;
Groups : TRegExpGroupArray;
end;
{ TWasmRegExp }
TWasmRegExp = Class(TObject)
private
FRegexpID: TWasmRegexpID;
FFlags : Longint;
function GetFlags: TRegexpFlags;
function GetGroups(aCount: Integer): TRegExpGroupArray;
function GetLastIndex: Longint;
function GetMatches(aCount: Integer): TRegExpMatchArray;
protected
function CheckRegExpResult(Res : TWasmRegexpResult; const aOperation : String; aRaise : Boolean = true) : Boolean;
Public
Constructor Create(const aExpression,aFlags : String); overload;
Constructor Create(const aExpression : String; aFlags : Longint); overload;
Constructor Create(const aExpression : String; aFlags : TRegexpFlags); overload;
destructor Destroy; override;
Function Exec(const aString : String) : TRegExpResult;
Function Test(const aString : String) : Boolean;
Property LastIndex : Longint Read GetLastIndex;
Property RegexpID : TWasmRegExpID Read FRegexpID;
Property FlagsAsInteger : Integer Read FFlags;
Property Flags : TRegexpFlags Read GetFlags;
end;
implementation
{ TRegexpFlagHelper }
function TRegexpFlagHelper.ToString: String;
begin
end;
function TRegexpFlagHelper.AsFlag: Longint;
Const
FlagValues : Array[TRegexpFlag] of longint
= (0,
WASMRE_FLAG_DOTALL,
WASMRE_FLAG_GLOBAL,
WASMRE_FLAG_INDICES,
WASMRE_FLAG_IGNORECASE,
WASMRE_FLAG_MULTILINE,
WASMRE_FLAG_STICKY,
WASMRE_FLAG_UNICODE,
WASMRE_FLAG_UNICODESETS);
begin
Result:=FlagValues[Self];
end;
{ TRegexpFlagsHelper }
procedure TRegexpFlagsHelper.SetAsFlags(const aValue: Longint);
var
F : TRegexpFlag;
Res : TRegexpFlags;
begin
Res:=[];
for F in TRegexpFlag do
if (F.AsFlag and aValue)<>0 then
Include(Res,F);
Self:=Res;
end;
function TRegexpFlagsHelper.ToString: String;
begin
Result:=FlagsToString(AsFlags);
end;
function TRegexpFlagsHelper.ToFlags: Longint;
var
F : TRegexpFlag;
begin
Result:=0;
For F in Self do
Result:=Result or F.AsFlag;
end;
class function TRegexpFlagsHelper.FromFlags(aFlags: Longint): TRegExpFlags;
begin
Result.AsFlags:=aFlags;
end;
{ TWasmRegExp }
function TWasmRegExp.GetLastIndex: Longint;
begin
CheckRegExpResult(__wasm_regexp_get_last_index(FRegexpID,@Result),'get_last_index');
end;
function TWasmRegExp.GetFlags: TRegexpFlags;
begin
Result:=TRegExpFlags.FromFlags(FFlags)
end;
function TWasmRegExp.CheckRegExpResult(Res: TWasmRegexpResult; const aOperation: String; aRaise: Boolean): Boolean;
begin
Result:=Res=WASMRE_RESULT_SUCCESS;
if (not Result) and aRaise then
Raise EWasmRegExp.CreateFmt('Error %d occured during "%s"',[Res,aOperation]);
end;
constructor TWasmRegExp.Create(const aExpression, aFlags: String);
begin
Create(aExpression,StringToFlags(aFlags,False));
end;
constructor TWasmRegExp.Create(const aExpression: String; aFlags: Longint);
var
R : RawByteString;
begin
R:=UTF8Encode(aExpression);
FFlags:=aFlags;
CheckRegexpResult(__wasm_regexp_allocate(PByte(R),Length(R),aFlags,@FRegexpID),regexpFN_Allocate);
end;
constructor TWasmRegExp.Create(const aExpression: String; aFlags: TRegexpFlags);
begin
Create(aExpression,aFlags.AsFlags);
end;
destructor TWasmRegExp.Destroy;
begin
CheckRegExpResult(__wasm_regexp_deallocate(FRegexpID),regexpFN_Allocate,false);
inherited Destroy;
end;
function TWasmRegExp.GetMatches(aCount: Integer): TRegExpMatchArray;
var
I : Integer;
Len,lStart,lStop : Longint;
Res : TWasmRegexpResult;
S : RawByteString;
lGetIndexes : Boolean;
begin
SetLength(Result,aCount);
lGetindexes:=rfIndices in Flags;
For I:=0 to aCount-1 do
begin
Len:=256;
Repeat
SetLength(S,Len);
Res:=__wasm_regexp_get_result_match(FRegexpID,I,Pbyte(S),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
SetLength(S,Len);
CheckRegExpResult(Res,regexpFN_GetResultMatch);
Result[i].Value:=S;
S:='';
if lGetIndexes then
CheckRegExpResult(__wasm_regexp_get_Indexes(FRegexpID,I,@lStart,@lStop),regexpFN_GetIndexes);
Result[i].StartIndex:=lStart+1;
Result[i].StopIndex:=lStop+1;
end;
end;
function TWasmRegExp.GetGroups(aCount: Integer): TRegExpGroupArray;
var
I : Integer;
Len,lStart,lStop : Longint;
Res : TWasmRegexpResult;
N,V : RawByteString;
lGetIndexes : Boolean;
begin
N:='';
V:='';
SetLength(Result,aCount);
lGetindexes:=rfIndices in Flags;
For I:=0 to aCount-1 do
begin
Len:=256;
Repeat
SetLength(N,Len);
Res:=__wasm_regexp_get_group_name(FRegexpID,I,Pbyte(N),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
CheckRegExpResult(Res,regexpFN_GetGroupName);
SetLength(N,Len);
Result[i].Name:=N;
Len:=256;
Repeat
SetLength(V,Len);
Res:=__wasm_regexp_get_named_group(FRegexpID,PByte(N),Length(N),Pbyte(V),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
CheckRegExpResult(Res,regexpFN_GetNamedGroup);
SetLength(V,Len);
Result[I].Value:=V;
if lGetIndexes then
CheckRegExpResult(__wasm_regexp_get_named_group_indexes(FRegexpID,PByte(N),Length(N),@lStart,@lStop),regexpFN_GetNamedGroup);
Result[i].StartIndex:=lStart+1;
Result[i].StopIndex:=lStop+1;
end;
end;
function TWasmRegExp.Exec(const aString: String): TRegExpResult;
var
lGroupCount, lIndex, lMatchCount : longint;
R : RawByteString;
begin
Result:=Default(TRegExpResult);
R:=UTF8Encode(aString);
Result.Input:=R;
CheckRegexpResult(__wasm_regexp_exec(FRegexpID,Pbyte(R),Length(R),@lIndex,@lMatchCount),regexpFN_exec);
If lMatchCount=0 then
exit;
Result.Index:=lIndex+1;
Result.Matches:=GetMatches(lMatchCount);
CheckRegExpResult(__wasm_regexp_get_group_count(FRegexpID,@lGroupCount),regexpFN_GetGroupCount);
if lGroupCount>0 then
Result.Groups:=GetGroups(lGroupCount);
end;
function TWasmRegExp.Test(const aString: String): Boolean;
var
R : RawByteString;
lRes : Longint;
begin
R:=UTF8Encode(aString);
CheckRegexpResult(__wasm_regexp_test(FRegexpID,Pbyte(R),Length(R),@lRes),regexpFN_test);
Result:=(lRes<>0);
end;
end.

View File

@ -0,0 +1,133 @@
unit wasm.regexp.shared;
{$mode ObjFPC}{$H+}
interface
Type
TWasmRegexpID = Longint;
TWasmRegexpResult = Longint;
{$IFNDEF PAS2JS}
TWasmPointer = Pointer;
PWasmRegexpID = ^TWasmRegexpID;
{$ELSE}
TWasmPointer = Longint;
PWasmRegexpID = TWasmPointer;
{$ENDIF}
Const
WASMRE_RESULT_SUCCESS = 0;
WASMRE_RESULT_ERROR = -1;
WASMRE_RESULT_INVALIDID = -2;
WASMRE_RESULT_NO_MEM = -3;
WASMRE_RESULT_NO_REGEXP = -4;
WASMRE_RESULT_INVALIDIDX = -5;
WASMRE_RESULT_NOINDEXES = -6;
WASMRE_FLAG_DOTALL = 1;
WASMRE_FLAG_GLOBAL = 2;
WASMRE_FLAG_INDICES = 4;
WASMRE_FLAG_IGNORECASE = 8;
WASMRE_FLAG_MULTILINE = 16;
WASMRE_FLAG_STICKY = 32;
WASMRE_FLAG_UNICODE = 64;
WASMRE_FLAG_UNICODESETS = 128;
// Aliases that correspond to the letters used when creating a regexp
WASMRE_FLAG_S = WASMRE_FLAG_DOTALL;
WASMRE_FLAG_G = WASMRE_FLAG_GLOBAL;
WASMRE_FLAG_D = WASMRE_FLAG_INDICES;
WASMRE_FLAG_I = WASMRE_FLAG_IGNORECASE;
WASMRE_FLAG_M = WASMRE_FLAG_MULTILINE;
WASMRE_FLAG_Y = WASMRE_FLAG_STICKY;
WASMRE_FLAG_U = WASMRE_FLAG_UNICODE;
WASMRE_FLAG_V = WASMRE_FLAG_UNICODESETS;
regexpExportName = 'regexp';
regexpFN_Allocate = 'allocate';
regexpFN_DeAllocate = 'deallocate';
regexpFN_Exec = 'exec';
regexpFN_Test = 'test';
regexpFN_GetFlags = 'get_flags';
regexpFN_GetExpression = 'get_expression';
regexpFN_GetLastIndex = 'get_last_index';
regexpFN_GetResultMatch = 'get_result_match';
regexpFN_GetGroupCount = 'get_group_count';
regexpFN_GetGroupName = 'get_group_name';
regexpFN_GetNamedGroup = 'get_named_group';
regexpFN_GetIndexes = 'get_indexes';
regexpFN_GetNamedGroupIndexes = 'get_named_group_indexes';
Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
Function FlagsToString(S : Longint) : String;
implementation
uses
{$IFDEF FPC_DOTTEDUNITS}
System.SysUtils;
{$ELSE}
SysUtils;
{$ENDIF}
Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
var
C : Char;
Flag : Longint;
begin
Result:=0;
for C in S do
begin
case C of
's': Flag:=WASMRE_FLAG_S;
'g': Flag:=WASMRE_FLAG_G;
'd': Flag:=WASMRE_FLAG_D;
'i': Flag:=WASMRE_FLAG_I;
'm': Flag:=WASMRE_FLAG_M;
'y': Flag:=WASMRE_FLAG_Y;
'u': Flag:=WASMRE_FLAG_U;
'v': Flag:=WASMRE_FLAG_V;
else
if not IgnoreUnknown then
Raise EConvertError.CreateFmt('Unknown regexp flag: %s',[C]);
Flag:=0;
end;
Result:=Result or Flag;
end;
end;
Function FlagsToString(S : Longint) : String;
var
C,I : Longint;
Flag : Char;
begin
Result:='';
for I:=0 to 7 do
begin
C:=S and (1 shl i);
case C of
WASMRE_FLAG_S : Flag:='s';
WASMRE_FLAG_G : Flag:='g';
WASMRE_FLAG_D : Flag:='d';
WASMRE_FLAG_I : Flag:='i';
WASMRE_FLAG_M : Flag:='m';
WASMRE_FLAG_Y : Flag:='y';
WASMRE_FLAG_U : Flag:='u';
WASMRE_FLAG_V : Flag:='v';
else
Flag:='0';
end;
if Flag<>'0' then
Result:=Result + Flag;
end;
end;
end.