* Regexp support for wasm target

This commit is contained in:
Michaël Van Canneyt 2024-09-07 23:46:07 +02:00
parent 424686ce34
commit 46a9fcc5f9
7 changed files with 594 additions and 0 deletions

View File

@ -22,3 +22,8 @@ packages/fcl-web/examples/websocket/server
```
is needed, since this is the websocket server that the demo program will
connect to.
For the regexp demo, you need the corresponding pas2js host program
```
demos/wasienv/regexp
```

View File

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="12"/>
<General>
<Flags>
<MainUnitHasCreateFormStatements Value="False"/>
<MainUnitHasTitleStatement Value="False"/>
<MainUnitHasScaledStatement Value="False"/>
</Flags>
<SessionStorage Value="InProjectDir"/>
<Title Value="wasmregexpdemo"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
</General>
<BuildModes>
<Item Name="Default" Default="True"/>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
<UseFileFilters Value="True"/>
</PublishOptions>
<RunParams>
<FormatVersion Value="2"/>
</RunParams>
<Units>
<Unit>
<Filename Value="wasmregexpdemo.pp"/>
<IsPartOfProject Value="True"/>
</Unit>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="wasmregexpdemo"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
<OtherUnitFiles Value="../../src"/>
<UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
<CodeGeneration>
<TargetCPU Value="wasm32"/>
<TargetOS Value="wasi"/>
</CodeGeneration>
<Linking>
<Debugging>
<GenerateDebugInfo Value="False"/>
</Debugging>
</Linking>
</CompilerOptions>
<Debugging>
<Exceptions>
<Item>
<Name Value="EAbort"/>
</Item>
<Item>
<Name Value="ECodetoolError"/>
</Item>
<Item>
<Name Value="EFOpenError"/>
</Item>
</Exceptions>
</Debugging>
</CONFIG>

View File

@ -0,0 +1,50 @@
program wasmregexpdemo;
uses sysutils, wasm.regexp.shared, wasm.regexp.api, wasm.regexp.objects;
Const
SRegex = 'quick\s(?<color>brown).+?(jumps)';
STest = 'The Quick Brown Fox Jumps Over The Lazy Dog';
SFlags = 'dgi';
Var
Regex : TWasmRegExp;
Res : TRegExpResult;
I : Integer;
M : TRegExpMatch;
G : TRegExpGroup;
S : String;
begin
Writeln('Regular expression: ',SRegex);
Writeln('Flags: ',SFlags);
Regex:=TWasmRegExp.Create(SRegex,SFlags);
Writeln('Test string: ',STest);
Res:=Regex.Exec(STest);
if Res.Index=0 then
Writeln('No match')
else
With Res do
begin
Writeln('Match at : ',Index);
I:=0;
For M in Matches do
begin
S:=Format('(%d) : "%s"',[I,M.Value]);
if (rfIndices in Regex.Flags) then
S:=S+Format(' [From pos %d to %d]',[M.StartIndex,M.StopIndex]);
Writeln(S);
Inc(I);
end;
Writeln('Named groups : ',Length(Groups));
For G in Groups do
begin
S:=Format('(%d): "%s": "%s"',[I,G.Name,G.Value]);
if (rfIndices in Regex.Flags) then
S:=S+Format(' [From pos %d to %d]',[G.StartIndex,G.StopIndex]);
Writeln(S);
Inc(I);
end;
end;
end.

View File

@ -43,6 +43,14 @@ begin
T.Dependencies.AddUnit('wasm.websocket.api');
T.Dependencies.AddUnit('wasm.websocket.shared');
T:=P.Targets.AddUnit('wasm.regexp.shared.pas');
T:=P.Targets.AddUnit('wasm.regexp.api.pas');
T.Dependencies.AddUnit('wasm.regexp.shared');
T:=P.Targets.AddUnit('wasm.regexp.objects.pas');
T.Dependencies.AddUnit('wasm.regexp.api');
T.Dependencies.AddUnit('wasm.regexp.shared');
{$ifndef ALLPACKAGES}
Run;
end;

View File

@ -0,0 +1,26 @@
unit wasm.regexp.api;
{$mode ObjFPC}{$H+}
interface
uses wasm.regexp.shared;
function __wasm_regexp_allocate(aExpr : PByte; aExprLen : longint; aFlags : Longint; aID : PWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_Allocate;
function __wasm_regexp_deallocate(aExprID : TWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_DeAllocate;
function __wasm_regexp_exec(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aIndex : PLongint; aResultCount : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Exec;
function __wasm_regexp_test(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aResult : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Test;
function __wasm_regexp_get_flags(aExprID : TWasmRegExpID; aFlags : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetFlags;
function __wasm_regexp_get_expression(aExprID : TWasmRegExpID; aExp : PByte; aExpLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetExpression;
function __wasm_regexp_get_last_index(aExprID : TWasmRegExpID; aLastIndex : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetLastIndex;
function __wasm_regexp_get_result_match(aExprID : TWasmRegExpID; aIndex : Longint; Res : PByte; ResLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetResultMatch;
function __wasm_regexp_get_group_count(aExprID : TWasmRegExpID; aCount: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupCount;
function __wasm_regexp_get_group_name(aExprID : TWasmRegExpID; aIndex : Longint; aName : PByte; aNameLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupName;
function __wasm_regexp_get_named_group(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Longint; aValue : PByte; aValueLen: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroup;
function __wasm_regexp_get_indexes(aExprID : TWasmRegExpID; aIndex : Longint; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetIndexes;
function __wasm_regexp_get_named_group_indexes(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Integer; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroupIndexes;
implementation
end.

View File

@ -0,0 +1,306 @@
unit wasm.regexp.objects;
{$mode ObjFPC}{$H+}
{$modeswitch typehelpers}
interface
uses
{$IFDEF FPC_DOTTEDUNITS}
System.SysUtils,
{$ELSE}
SysUtils,
{$ENDIF}
wasm.regexp.shared,
wasm.regexp.api;
Type
EWasmRegExp = class(Exception);
TRegexpFlag = (rfUnknown,rfDotAll,rfGlobal,rfIndices,rfIgnoreCase,rfMultiLine,rfSticky,rfUnicode,rfUnicodeSets);
TRegexpFlags = Set of TRegExpFlag;
{ TRegexpFlagHelper }
TRegexpFlagHelper = type helper for TRegexpFlag
Public
function ToString: String;
Function AsFlag : Longint;
Property AsString : String Read ToString;
end;
{ TRegexpFlagsHelper }
TRegexpFlagsHelper = type helper for TRegexpFlags
private
procedure SetAsFlags(const aValue: Longint);
public
function ToString: String;
Function ToFlags : Longint;
class function FromFlags(aFlags : Longint) : TRegExpFlags; static;
Property AsString : String Read ToString;
Property AsFlags : Longint Read ToFlags Write SetAsFlags;
end;
TRegExpMatch = record
Value : UTF8String;
StartIndex, StopIndex : Integer;
end;
TRegExpMatchArray = array of TRegExpMatch;
TRegExpGroup = record
Name,Value : UTF8String;
StartIndex, StopIndex : Integer;
end;
TRegExpGroupArray = array of TRegExpGroup;
TRegExpResult = record
Matches : TRegExpMatchArray;
Input : UTF8String;
Index : Integer;
Groups : TRegExpGroupArray;
end;
{ TWasmRegExp }
TWasmRegExp = Class(TObject)
private
FRegexpID: TWasmRegexpID;
FFlags : Longint;
function GetFlags: TRegexpFlags;
function GetGroups(aCount: Integer): TRegExpGroupArray;
function GetLastIndex: Longint;
function GetMatches(aCount: Integer): TRegExpMatchArray;
protected
function CheckRegExpResult(Res : TWasmRegexpResult; const aOperation : String; aRaise : Boolean = true) : Boolean;
Public
Constructor Create(const aExpression,aFlags : String); overload;
Constructor Create(const aExpression : String; aFlags : Longint); overload;
Constructor Create(const aExpression : String; aFlags : TRegexpFlags); overload;
destructor Destroy; override;
Function Exec(const aString : String) : TRegExpResult;
Function Test(const aString : String) : Boolean;
Property LastIndex : Longint Read GetLastIndex;
Property RegexpID : TWasmRegExpID Read FRegexpID;
Property FlagsAsInteger : Integer Read FFlags;
Property Flags : TRegexpFlags Read GetFlags;
end;
implementation
{ TRegexpFlagHelper }
function TRegexpFlagHelper.ToString: String;
begin
end;
function TRegexpFlagHelper.AsFlag: Longint;
Const
FlagValues : Array[TRegexpFlag] of longint
= (0,
WASMRE_FLAG_DOTALL,
WASMRE_FLAG_GLOBAL,
WASMRE_FLAG_INDICES,
WASMRE_FLAG_IGNORECASE,
WASMRE_FLAG_MULTILINE,
WASMRE_FLAG_STICKY,
WASMRE_FLAG_UNICODE,
WASMRE_FLAG_UNICODESETS);
begin
Result:=FlagValues[Self];
end;
{ TRegexpFlagsHelper }
procedure TRegexpFlagsHelper.SetAsFlags(const aValue: Longint);
var
F : TRegexpFlag;
Res : TRegexpFlags;
begin
Res:=[];
for F in TRegexpFlag do
if (F.AsFlag and aValue)<>0 then
Include(Res,F);
Self:=Res;
end;
function TRegexpFlagsHelper.ToString: String;
begin
Result:=FlagsToString(AsFlags);
end;
function TRegexpFlagsHelper.ToFlags: Longint;
var
F : TRegexpFlag;
begin
Result:=0;
For F in Self do
Result:=Result or F.AsFlag;
end;
class function TRegexpFlagsHelper.FromFlags(aFlags: Longint): TRegExpFlags;
begin
Result.AsFlags:=aFlags;
end;
{ TWasmRegExp }
function TWasmRegExp.GetLastIndex: Longint;
begin
CheckRegExpResult(__wasm_regexp_get_last_index(FRegexpID,@Result),'get_last_index');
end;
function TWasmRegExp.GetFlags: TRegexpFlags;
begin
Result:=TRegExpFlags.FromFlags(FFlags)
end;
function TWasmRegExp.CheckRegExpResult(Res: TWasmRegexpResult; const aOperation: String; aRaise: Boolean): Boolean;
begin
Result:=Res=WASMRE_RESULT_SUCCESS;
if (not Result) and aRaise then
Raise EWasmRegExp.CreateFmt('Error %d occured during "%s"',[Res,aOperation]);
end;
constructor TWasmRegExp.Create(const aExpression, aFlags: String);
begin
Create(aExpression,StringToFlags(aFlags,False));
end;
constructor TWasmRegExp.Create(const aExpression: String; aFlags: Longint);
var
R : RawByteString;
begin
R:=UTF8Encode(aExpression);
FFlags:=aFlags;
CheckRegexpResult(__wasm_regexp_allocate(PByte(R),Length(R),aFlags,@FRegexpID),regexpFN_Allocate);
end;
constructor TWasmRegExp.Create(const aExpression: String; aFlags: TRegexpFlags);
begin
Create(aExpression,aFlags.AsFlags);
end;
destructor TWasmRegExp.Destroy;
begin
CheckRegExpResult(__wasm_regexp_deallocate(FRegexpID),regexpFN_Allocate,false);
inherited Destroy;
end;
function TWasmRegExp.GetMatches(aCount: Integer): TRegExpMatchArray;
var
I : Integer;
Len,lStart,lStop : Longint;
Res : TWasmRegexpResult;
S : RawByteString;
lGetIndexes : Boolean;
begin
SetLength(Result,aCount);
lGetindexes:=rfIndices in Flags;
For I:=0 to aCount-1 do
begin
Len:=256;
Repeat
SetLength(S,Len);
Res:=__wasm_regexp_get_result_match(FRegexpID,I,Pbyte(S),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
SetLength(S,Len);
CheckRegExpResult(Res,regexpFN_GetResultMatch);
Result[i].Value:=S;
S:='';
if lGetIndexes then
CheckRegExpResult(__wasm_regexp_get_Indexes(FRegexpID,I,@lStart,@lStop),regexpFN_GetIndexes);
Result[i].StartIndex:=lStart+1;
Result[i].StopIndex:=lStop+1;
end;
end;
function TWasmRegExp.GetGroups(aCount: Integer): TRegExpGroupArray;
var
I : Integer;
Len,lStart,lStop : Longint;
Res : TWasmRegexpResult;
N,V : RawByteString;
lGetIndexes : Boolean;
begin
N:='';
V:='';
SetLength(Result,aCount);
lGetindexes:=rfIndices in Flags;
For I:=0 to aCount-1 do
begin
Len:=256;
Repeat
SetLength(N,Len);
Res:=__wasm_regexp_get_group_name(FRegexpID,I,Pbyte(N),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
CheckRegExpResult(Res,regexpFN_GetGroupName);
SetLength(N,Len);
Result[i].Name:=N;
Len:=256;
Repeat
SetLength(V,Len);
Res:=__wasm_regexp_get_named_group(FRegexpID,PByte(N),Length(N),Pbyte(V),@Len);
Until (Res<>WASMRE_RESULT_NO_MEM);
CheckRegExpResult(Res,regexpFN_GetNamedGroup);
SetLength(V,Len);
Result[I].Value:=V;
if lGetIndexes then
CheckRegExpResult(__wasm_regexp_get_named_group_indexes(FRegexpID,PByte(N),Length(N),@lStart,@lStop),regexpFN_GetNamedGroup);
Result[i].StartIndex:=lStart+1;
Result[i].StopIndex:=lStop+1;
end;
end;
function TWasmRegExp.Exec(const aString: String): TRegExpResult;
var
lGroupCount, lIndex, lMatchCount : longint;
R : RawByteString;
begin
Result:=Default(TRegExpResult);
R:=UTF8Encode(aString);
Result.Input:=R;
CheckRegexpResult(__wasm_regexp_exec(FRegexpID,Pbyte(R),Length(R),@lIndex,@lMatchCount),regexpFN_exec);
If lMatchCount=0 then
exit;
Result.Index:=lIndex+1;
Result.Matches:=GetMatches(lMatchCount);
CheckRegExpResult(__wasm_regexp_get_group_count(FRegexpID,@lGroupCount),regexpFN_GetGroupCount);
if lGroupCount>0 then
Result.Groups:=GetGroups(lGroupCount);
end;
function TWasmRegExp.Test(const aString: String): Boolean;
var
R : RawByteString;
lRes : Longint;
begin
R:=UTF8Encode(aString);
CheckRegexpResult(__wasm_regexp_test(FRegexpID,Pbyte(R),Length(R),@lRes),regexpFN_test);
Result:=(lRes<>0);
end;
end.

View File

@ -0,0 +1,133 @@
unit wasm.regexp.shared;
{$mode ObjFPC}{$H+}
interface
Type
TWasmRegexpID = Longint;
TWasmRegexpResult = Longint;
{$IFNDEF PAS2JS}
TWasmPointer = Pointer;
PWasmRegexpID = ^TWasmRegexpID;
{$ELSE}
TWasmPointer = Longint;
PWasmRegexpID = TWasmPointer;
{$ENDIF}
Const
WASMRE_RESULT_SUCCESS = 0;
WASMRE_RESULT_ERROR = -1;
WASMRE_RESULT_INVALIDID = -2;
WASMRE_RESULT_NO_MEM = -3;
WASMRE_RESULT_NO_REGEXP = -4;
WASMRE_RESULT_INVALIDIDX = -5;
WASMRE_RESULT_NOINDEXES = -6;
WASMRE_FLAG_DOTALL = 1;
WASMRE_FLAG_GLOBAL = 2;
WASMRE_FLAG_INDICES = 4;
WASMRE_FLAG_IGNORECASE = 8;
WASMRE_FLAG_MULTILINE = 16;
WASMRE_FLAG_STICKY = 32;
WASMRE_FLAG_UNICODE = 64;
WASMRE_FLAG_UNICODESETS = 128;
// Aliases that correspond to the letters used when creating a regexp
WASMRE_FLAG_S = WASMRE_FLAG_DOTALL;
WASMRE_FLAG_G = WASMRE_FLAG_GLOBAL;
WASMRE_FLAG_D = WASMRE_FLAG_INDICES;
WASMRE_FLAG_I = WASMRE_FLAG_IGNORECASE;
WASMRE_FLAG_M = WASMRE_FLAG_MULTILINE;
WASMRE_FLAG_Y = WASMRE_FLAG_STICKY;
WASMRE_FLAG_U = WASMRE_FLAG_UNICODE;
WASMRE_FLAG_V = WASMRE_FLAG_UNICODESETS;
regexpExportName = 'regexp';
regexpFN_Allocate = 'allocate';
regexpFN_DeAllocate = 'deallocate';
regexpFN_Exec = 'exec';
regexpFN_Test = 'test';
regexpFN_GetFlags = 'get_flags';
regexpFN_GetExpression = 'get_expression';
regexpFN_GetLastIndex = 'get_last_index';
regexpFN_GetResultMatch = 'get_result_match';
regexpFN_GetGroupCount = 'get_group_count';
regexpFN_GetGroupName = 'get_group_name';
regexpFN_GetNamedGroup = 'get_named_group';
regexpFN_GetIndexes = 'get_indexes';
regexpFN_GetNamedGroupIndexes = 'get_named_group_indexes';
Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
Function FlagsToString(S : Longint) : String;
implementation
uses
{$IFDEF FPC_DOTTEDUNITS}
System.SysUtils;
{$ELSE}
SysUtils;
{$ENDIF}
Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
var
C : Char;
Flag : Longint;
begin
Result:=0;
for C in S do
begin
case C of
's': Flag:=WASMRE_FLAG_S;
'g': Flag:=WASMRE_FLAG_G;
'd': Flag:=WASMRE_FLAG_D;
'i': Flag:=WASMRE_FLAG_I;
'm': Flag:=WASMRE_FLAG_M;
'y': Flag:=WASMRE_FLAG_Y;
'u': Flag:=WASMRE_FLAG_U;
'v': Flag:=WASMRE_FLAG_V;
else
if not IgnoreUnknown then
Raise EConvertError.CreateFmt('Unknown regexp flag: %s',[C]);
Flag:=0;
end;
Result:=Result or Flag;
end;
end;
Function FlagsToString(S : Longint) : String;
var
C,I : Longint;
Flag : Char;
begin
Result:='';
for I:=0 to 7 do
begin
C:=S and (1 shl i);
case C of
WASMRE_FLAG_S : Flag:='s';
WASMRE_FLAG_G : Flag:='g';
WASMRE_FLAG_D : Flag:='d';
WASMRE_FLAG_I : Flag:='i';
WASMRE_FLAG_M : Flag:='m';
WASMRE_FLAG_Y : Flag:='y';
WASMRE_FLAG_U : Flag:='u';
WASMRE_FLAG_V : Flag:='v';
else
Flag:='0';
end;
if Flag<>'0' then
Result:=Result + Flag;
end;
end;
end.