lazutils: moved MergeSort to lazutilities, laz2_xmlcfg: sort nodes to find non existing and out of order nodes faster

git-svn-id: trunk@47164 -
This commit is contained in:
mattias 2014-12-10 02:16:39 +00:00
parent b6b7d60218
commit 4b3c76e18a
7 changed files with 258 additions and 161 deletions

1
.gitattributes vendored
View File

@ -2729,6 +2729,7 @@ components/lazutils/lazutf16.pas svneol=native#text/pascal
components/lazutils/lazutf8.pas svneol=native#text/pascal
components/lazutils/lazutf8classes.pas svneol=native#text/pascal
components/lazutils/lazutf8sysutils.pas svneol=native#text/pascal
components/lazutils/lazutilities.pas svneol=native#text/plain
components/lazutils/lazutils.lpk svneol=native#text/plain
components/lazutils/lazutils.pas svneol=native#text/pascal
components/lazutils/lazutilsstrconsts.pas svneol=native#text/pascal

View File

@ -39,8 +39,8 @@ uses
{$IFDEF Windows}
Windows,
{$ENDIF}
Classes, SysUtils, LazUTF8, LazDbgLog, LazFileCache, LazFileUtils,
LazUTF8Classes, LazLogger, AVL_Tree, CodeToolsStrConsts;
Classes, SysUtils, LazUtilities, LazUTF8, LazDbgLog, LazFileCache,
LazFileUtils, LazUTF8Classes, LazLogger, AVL_Tree, CodeToolsStrConsts;
type
TFPCStreamSeekType = int64;
@ -224,16 +224,16 @@ function GetEnvironmentVariableUTF8(const EnvVar: String): String; inline;
procedure InvalidateFileStateCache(const Filename: string = ''); inline;
// basic utility -> should go to RTL
// basic utility -> moved to LazUtilities
function ComparePointers(p1, p2: Pointer): integer; inline;
procedure MergeSort(List: PPointer; ListLength: PtrInt;
Compare: TListSortCompare);
const Compare: TListSortCompare); inline;
function GetNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer): string;
var Position: integer): string; inline;
function HasDelimitedItem(const List: string; Delimiter: char; FindItem: string
): boolean;
): boolean; inline;
function FindNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer; FindItem: string): string;
var Position: integer; FindItem: string): string; inline;
function AVLTreeHasDoubles(Tree: TAVLTree): TAVLTreeNode;
const DateAsCfgStrFormat='YYYYMMDD';
@ -1815,112 +1815,31 @@ end;
function ComparePointers(p1, p2: Pointer): integer;
begin
if p1>p2 then
Result:=1
else if p1<p2 then
Result:=-1
else
Result:=0;
Result:=LazUtilities.ComparePointers(p1,p2);
end;
procedure MergeSort(List: PPointer; ListLength: PtrInt;
Compare: TListSortCompare);
var
MergeList: PPointer;
procedure Merge(Pos1, Pos2, Pos3: PtrInt);
// merge two sorted arrays
// the first array ranges Pos1..Pos2-1, the second ranges Pos2..Pos3
var Src1Pos,Src2Pos,DestPos,cmp,i:PtrInt;
begin
while (Pos3>=Pos2) and (Compare(List[Pos2-1],List[Pos3])<=0) do
dec(Pos3);
if (Pos1>=Pos2) or (Pos2>Pos3) then exit;
Src1Pos:=Pos2-1;
Src2Pos:=Pos3;
DestPos:=Pos3;
while (Src2Pos>=Pos2) and (Src1Pos>=Pos1) do begin
cmp:=Compare(List[Src1Pos],List[Src2Pos]);
if cmp>0 then begin
MergeList[DestPos]:=List[Src1Pos];
dec(Src1Pos);
end else begin
MergeList[DestPos]:=List[Src2Pos];
dec(Src2Pos);
end;
dec(DestPos);
end;
while Src2Pos>=Pos2 do begin
MergeList[DestPos]:=List[Src2Pos];
dec(Src2Pos);
dec(DestPos);
end;
for i:=DestPos+1 to Pos3 do
List[i]:=MergeList[i];
end;
procedure Sort(const Pos1, Pos2: PtrInt);
// sort List from Pos1 to Pos2, usig MergeList as temporary buffer
var cmp, mid: PtrInt;
begin
if Pos1>=Pos2 then begin
// one element is always sorted -> nothing to do
end else if Pos1+1=Pos2 then begin
// two elements can be sorted easily
cmp:=Compare(List[Pos1],List[Pos2]);
if cmp>0 then begin
MergeList[Pos1]:=List[Pos1];
List[Pos1]:=List[Pos2];
List[Pos2]:=MergeList[Pos1];
end;
end else begin
mid:=(Pos1+Pos2) shr 1;
Sort(Pos1,mid);
Sort(mid+1,Pos2);
Merge(Pos1,mid+1,Pos2);
end;
end;
// sort ascending
const Compare: TListSortCompare);
begin
if ListLength<=1 then exit;
GetMem(MergeList,SizeOf(Pointer)*ListLength);
try
Sort(0,ListLength-1);
finally
FreeMem(MergeList);
end;
LazUtilities.MergeSort(List,ListLength,Compare);
end;
function GetNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer): string;
var
StartPos: LongInt;
begin
StartPos:=Position;
while (Position<=length(List)) and (List[Position]<>Delimiter) do
inc(Position);
Result:=copy(List,StartPos,Position-StartPos);
if Position<=length(List) then inc(Position); // skip Delimiter
Result:=LazUtilities.GetNextDelimitedItem(List,Delimiter,Position);
end;
function HasDelimitedItem(const List: string; Delimiter: char; FindItem: string
): boolean;
var
p: Integer;
begin
p:=1;
Result:=FindNextDelimitedItem(List,Delimiter,p,FindItem)<>'';
Result:=LazUtilities.HasDelimitedItem(List,Delimiter,FindItem);
end;
function FindNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer; FindItem: string): string;
begin
while Position<=length(List) do begin
Result:=GetNextDelimitedItem(List,Delimiter,Position);
if Result=FindItem then exit;
end;
Result:='';
Result:=LazUtilities.FindNextDelimitedItem(List,Delimiter,Position,FindItem);
end;
function AVLTreeHasDoubles(Tree: TAVLTree): TAVLTreeNode;
@ -2403,13 +2322,13 @@ end;
function CompareCTLineInfoCacheItems(Data1, Data2: Pointer): integer;
begin
Result:=ComparePointers(PCTLineInfoCacheItem(Data1)^.Addr,
Result:=LazUtilities.ComparePointers(PCTLineInfoCacheItem(Data1)^.Addr,
PCTLineInfoCacheItem(Data2)^.Addr);
end;
function CompareAddrWithCTLineInfoCacheItem(Addr, Item: Pointer): integer;
begin
Result:=ComparePointers(Addr,PCTLineInfoCacheItem(Item)^.Addr);
Result:=LazUtilities.ComparePointers(Addr,PCTLineInfoCacheItem(Item)^.Addr);
end;
function FileAgeToStr(aFileAge: longint): string;

View File

@ -17,7 +17,7 @@ unit FPCAdds;
{$mode objfpc}{$H+}{$inline on}
{$IF defined(EnableUTF8RTL) and (FPC_FULLVERSION<20701)}
{$error UTF8 RTl requires fpc 2.7.1+}
{$error UTF8 RTL requires fpc 2.7.1+}
{$ENDIF}
interface
@ -25,11 +25,10 @@ interface
uses
Classes, SysUtils;
// current TStream calculates in int64, old in longint
type
TStreamSeekType = int64;
TMemStreamSeekType = integer;
TCompareMemSize = integer;
TMemStreamSeekType = PtrInt;
TCompareMemSize = PtrUInt;
PHandle = ^THandle;
function StrToWord(const s: string): word;

View File

@ -25,7 +25,7 @@ interface
uses
{$IFDEF MEM_CHECK}MemCheck,{$ENDIF}
Classes, sysutils, LazFileCache,
Laz2_DOM, Laz2_XMLRead, Laz2_XMLWrite,
Laz2_DOM, Laz2_XMLRead, Laz2_XMLWrite, LazUtilities,
typinfo;
type
@ -44,26 +44,33 @@ type
FWriteFlags: TXMLWriterFlags;
procedure CreateConfigNode;
procedure SetFilename(const AFilename: String);
protected
type
TNodeCache = record
Node: TDomNode;
ChildrenValid: boolean;
Children: array of TDomNode; // nodes with NodeName<>'' and sorted
end;
protected
doc: TXMLDocument;
FModified: Boolean;
fDoNotLoadFromFile: boolean;
fAutoLoadFromSource: string;
fPathCache: string;
fPathNodeCache: array of TDomNode; // starting with doc.DocumentElement, then first child node of first sub path
fPathNodeCache: array of TNodeCache; // starting with doc.DocumentElement, then first child node of first sub path
procedure Loaded; override;
function ExtendedToStr(const e: extended): string;
function StrToExtended(const s: string; const ADefault: extended): extended;
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); virtual;
procedure WriteXMLFile(ADoc: TXMLDocument; const AFileName: String); virtual;
procedure FreeDoc; virtual;
procedure SetPathNodeCache(Index: integer; Node: TDomNode);
function GetPathNodeCache(Index: integer): TDomNode; inline;
procedure SetPathNodeCache(Index: integer; aNode: TDomNode);
function GetCachedPathNode(Index: integer): TDomNode; inline;
procedure InvalidateCacheTilEnd(StartIndex: integer);
function InternalFindNode(const APath: String; PathLen: integer;
CreateNodes: boolean = false): TDomNode;
procedure InternalCleanNode(Node: TDomNode);
function FindChildNode(Parent, StartChild: TDomNode; const aName: string): TDomNode;
function FindChildNode(PathIndex: integer; const aName: string): TDomNode;
public
constructor Create(AOwner: TComponent); override; overload;
constructor Create(const AFilename: String); overload; // create and load
@ -123,13 +130,23 @@ type
// ===================================================================
function CompareDomNodeNames(DOMNode1, DOMNode2: Pointer): integer;
implementation
function CompareDomNodeNames(DOMNode1, DOMNode2: Pointer): integer;
var
Node1: TDOMNode absolute DomNode1;
Node2: TDOMNode absolute DomNode2;
begin
Result:=CompareStr(Node1.NodeName,Node2.NodeName);
end;
// inline
function TXMLConfig.GetPathNodeCache(Index: integer): TDomNode;
function TXMLConfig.GetCachedPathNode(Index: integer): TDomNode;
begin
if Index<length(fPathNodeCache) then
Result:=fPathNodeCache[Index]
Result:=fPathNodeCache[Index].Node
else
Result:=nil;
end;
@ -460,24 +477,24 @@ begin
FreeAndNil(doc);
end;
procedure TXMLConfig.SetPathNodeCache(Index: integer; Node: TDomNode);
procedure TXMLConfig.SetPathNodeCache(Index: integer; aNode: TDomNode);
var
OldLength: Integer;
i: LongInt;
NewSize: Integer;
OldLength, NewLength: Integer;
begin
OldLength:=length(fPathNodeCache);
if OldLength<=Index then begin
if OldLength<8 then
NewSize:=8
NewLength:=8
else
NewSize:=OldLength*2;
if NewSize<Index then NewSize:=Index;
SetLength(fPathNodeCache,NewSize);
for i:=OldLength to length(fPathNodeCache)-1 do
fPathNodeCache[i]:=nil;
NewLength:=OldLength*2;
if NewLength<Index then NewLength:=Index;
SetLength(fPathNodeCache,NewLength);
FillByte(fPathNodeCache[OldLength],SizeOf(TNodeCache)*(NewLength-OldLength),0);
end;
with fPathNodeCache[Index] do begin
Node:=aNode;
ChildrenValid:=false;
end;
fPathNodeCache[Index]:=Node;
end;
procedure TXMLConfig.InvalidateCacheTilEnd(StartIndex: integer);
@ -485,8 +502,11 @@ var
i: LongInt;
begin
for i:=StartIndex to length(fPathNodeCache)-1 do begin
if fPathNodeCache[i]=nil then break;
fPathNodeCache[i]:=nil;
with fPathNodeCache[i] do begin
if Node=nil then break;
Node:=nil;
ChildrenValid:=false;
end;
end;
end;
@ -502,7 +522,7 @@ var
begin
//debugln(['TXMLConfig.InternalFindNode APath="',copy(APath,1,PathLen),'" CreateNodes=',CreateNodes]);
PathIndex:=0;
Result:=GetPathNodeCache(PathIndex);
Result:=GetCachedPathNode(PathIndex);
if (Result=nil) and (doc<>nil) then begin
Result:=TDOMElement(doc.FindNode('CONFIG'));
SetPathNodeCache(PathIndex,Result);
@ -516,17 +536,14 @@ begin
if NameLen=0 then break;
inc(PathIndex);
Parent:=Result;
if PathIndex<length(fPathNodeCache) then // GetPathNodeCache inlined
Result:=fPathNodeCache[PathIndex]
else
Result:=nil;
Result:=GetCachedPathNode(PathIndex);
if Result<>nil then
NdName:=Result.NodeName;
if (Result=nil) or (length(NdName)<>NameLen)
or not CompareMem(PChar(NdName),@APath[StartPos],NameLen) then begin
// different path => search
NodePath:=copy(APath,StartPos,NameLen);
Result:=FindChildNode(Parent,Result,NodePath);
Result:=FindChildNode(PathIndex-1,NodePath);
InvalidateCacheTilEnd(PathIndex);
if Result=nil then begin
if not CreateNodes then exit;
@ -559,40 +576,59 @@ begin
end;
end;
function TXMLConfig.FindChildNode(Parent, StartChild: TDomNode;
const aName: string): TDomNode;
function TXMLConfig.FindChildNode(PathIndex: integer; const aName: string
): TDomNode;
var
Start: boolean;
PrevChild: TDOMNode;
NextChild: TDOMNode;
aParent, aChild: TDOMNode;
aCount: Integer;
NewLength: Integer;
l, r, m: Integer;
cmp: Integer;
begin
if StartChild<>nil then begin
// usually the config is written in a similar order as it is read
// => the next config node is probably near the last node (StartChild)
// -> search beginning at StartChild in both directions
if StartChild.NodeName=aName then
exit(StartChild);
Start:=true;
repeat
if Start then
NextChild:=StartChild.NextSibling;
if (NextChild<>nil) then begin
if (NextChild.NodeName=aName) then
exit(NextChild);
NextChild:=NextChild.NextSibling;
Result:=fPathNodeCache[PathIndex].Node.FindNode(aName);
exit;
with fPathNodeCache[PathIndex] do begin
if not ChildrenValid then begin
// collect all children and sort
aParent:=Node;
aCount:=0;
aChild:=aParent.FirstChild;
while aChild<>nil do begin
if aChild.NodeName<>'' then begin
if aCount=length(Children) then begin
NewLength:=length(Children);
if NewLength<8 then
NewLength:=8
else
NewLength:=NewLength*2;
SetLength(Children,NewLength);
end;
Children[aCount]:=aChild;
inc(aCount);
end;
aChild:=aChild.NextSibling;
end;
if Start then
PrevChild:=StartChild.PreviousSibling;
if (PrevChild<>nil) then begin
if (PrevChild.NodeName=aName) then
exit(PrevChild);
PrevChild:=PrevChild.PreviousSibling;
end;
Start:=false;
until (PrevChild=nil) and (NextChild=nil);
SetLength(Children,aCount);
if aCount>1 then
MergeSort(@Children[0],aCount,@CompareDomNodeNames); // soet ascending [0]<[1]
ChildrenValid:=true;
end;
// binary search
l:=0;
r:=length(Children)-1;
while l<=r do begin
m:=(l+r) shr 1;
cmp:=CompareStr(aName,Children[m].NodeName);
if cmp<0 then
r:=m-1
else if cmp>0 then
l:=m+1
else
exit(Children[m]);
end;
Result:=nil;
end else
Result:=Parent.FindNode(aName);
end;
end;
constructor TXMLConfig.Create(AOwner: TComponent);

View File

@ -0,0 +1,138 @@
unit LazUtilities;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils;
function ComparePointers(p1, p2: Pointer): integer; inline;
procedure MergeSort(List: PPointer; ListLength: PtrInt;
const Compare: TListSortCompare); // sort ascending, e.g. Compare(List[0],List[1])<0
function GetNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer): string;
function HasDelimitedItem(const List: string; Delimiter: char; FindItem: string
): boolean;
function FindNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer; FindItem: string): string;
implementation
function ComparePointers(p1, p2: Pointer): integer;
begin
if p1>p2 then
Result:=1
else if p1<p2 then
Result:=-1
else
Result:=0;
end;
procedure MergeSort(List: PPointer; ListLength: PtrInt;
const Compare: TListSortCompare);
var
MergeList: PPointer;
procedure Merge(Pos1, Pos2, Pos3: PtrInt);
// merge two sorted arrays
// the first array ranges Pos1..Pos2-1, the second ranges Pos2..Pos3
var
Src1Pos, Src2Pos, DestPos, cmp, i: PtrInt;
begin
while (Pos3>=Pos2) and (Compare(List[Pos2-1],List[Pos3])<=0) do
dec(Pos3);
if (Pos1>=Pos2) or (Pos2>Pos3) then exit;
Src1Pos:=Pos2-1;
Src2Pos:=Pos3;
DestPos:=Pos3;
while (Src2Pos>=Pos2) and (Src1Pos>=Pos1) do begin
cmp:=Compare(List[Src1Pos],List[Src2Pos]);
if cmp>0 then begin
MergeList[DestPos]:=List[Src1Pos];
dec(Src1Pos);
end else begin
MergeList[DestPos]:=List[Src2Pos];
dec(Src2Pos);
end;
dec(DestPos);
end;
while Src2Pos>=Pos2 do begin
MergeList[DestPos]:=List[Src2Pos];
dec(Src2Pos);
dec(DestPos);
end;
for i:=DestPos+1 to Pos3 do
List[i]:=MergeList[i];
end;
procedure Sort(const Pos1, Pos2: PtrInt);
// sort List from Pos1 to Pos2, using MergeList as temporary buffer
var
cmp, mid: PtrInt;
p: Pointer;
begin
if Pos1>=Pos2 then begin
// one element is always sorted -> nothing to do
end else if Pos1+1=Pos2 then begin
// two elements can be sorted easily
cmp:=Compare(List[Pos1],List[Pos2]);
if cmp>0 then begin
p:=List[Pos1];
List[Pos1]:=List[Pos2];
List[Pos2]:=p;
end;
end else begin
mid:=(Pos1+Pos2) shr 1;
Sort(Pos1,mid);
Sort(mid+1,Pos2);
Merge(Pos1,mid+1,Pos2);
end;
end;
// sort ascending
begin
if ListLength<=1 then exit;
GetMem(MergeList,SizeOf(Pointer)*ListLength);
try
Sort(0,ListLength-1);
finally
FreeMem(MergeList);
end;
end;
function GetNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer): string;
var
StartPos: LongInt;
begin
StartPos:=Position;
while (Position<=length(List)) and (List[Position]<>Delimiter) do
inc(Position);
Result:=copy(List,StartPos,Position-StartPos);
if Position<=length(List) then inc(Position); // skip Delimiter
end;
function HasDelimitedItem(const List: string; Delimiter: char; FindItem: string
): boolean;
var
p: Integer;
begin
p:=1;
Result:=FindNextDelimitedItem(List,Delimiter,p,FindItem)<>'';
end;
function FindNextDelimitedItem(const List: string; Delimiter: char;
var Position: integer; FindItem: string): string;
begin
while Position<=length(List) do begin
Result:=GetNextDelimitedItem(List,Delimiter,Position);
if Result=FindItem then exit;
end;
Result:='';
end;
end.

View File

@ -2,6 +2,7 @@
<CONFIG>
<Package Version="4">
<Name Value="LazUtils"/>
<Type Value="RunAndDesignTime"/>
<Author Value="Lazarus Team"/>
<CompilerOptions>
<Version Value="11"/>
@ -15,7 +16,7 @@
<Description Value="Useful units for Lazarus packages."/>
<License Value="Modified LGPL-2"/>
<Version Major="1"/>
<Files Count="74">
<Files Count="75">
<Item1>
<Filename Value="laz2_dom.pas"/>
<UnitName Value="Laz2_DOM"/>
@ -90,7 +91,7 @@
</Item18>
<Item19>
<Filename Value="paswstring.pas"/>
<UnitName Value="paswstring"/>
<UnitName Value="PasWString"/>
</Item19>
<Item20>
<Filename Value="fileutil.pas"/>
@ -313,13 +314,16 @@
<Filename Value="fpcadds.pas"/>
<UnitName Value="FPCAdds"/>
</Item74>
<Item75>
<Filename Value="lazutilities.pas"/>
<UnitName Value="lazutilities"/>
</Item75>
</Files>
<LazDoc Paths="../../docs/xml/lazutils"/>
<i18n>
<EnableI18N Value="True"/>
<OutDir Value="languages"/>
</i18n>
<Type Value="RunAndDesignTime"/>
<RequiredPkgs Count="1">
<Item1>
<PackageName Value="FCL"/>

View File

@ -9,14 +9,14 @@ interface
uses
Laz2_DOM, Laz2_XMLCfg, laz2_XMLRead, laz2_xmlutils, laz2_XMLWrite, Laz_DOM,
Laz_XMLCfg, Laz_XMLRead, Laz_XMLStreaming, Laz_XMLWrite, LazFileUtils,
LazFileCache, LazUTF8, LazDbgLog, paswstring, FileUtil, LazUTF8Classes,
LazFileCache, LazUTF8, LazDbgLog, PasWString, FileUtil, LazUTF8Classes,
Masks, LazUtilsStrConsts, LConvEncoding, lazutf16, lazutf8sysutils,
LazMethodList, AvgLvlTree, LazLogger, LazFreeType, TTCache, TTCalc, TTCMap,
TTDebug, TTError, TTFile, TTGLoad, TTInterp, TTLoad, TTMemory, TTObjs,
TTProfile, TTRASTER, TTTables, TTTypes, EasyLazFreeType, LazLoggerBase,
LazLoggerDummy, LazClasses, LazFreeTypeFontCollection, LazConfigStorage,
UTF8Process, laz2_xpath, DictionaryStringList, LazLoggerProfiling, FPCAdds,
LazarusPackageIntf;
LazUtilities, LazarusPackageIntf;
implementation