mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-22 06:49:27 +02:00
* sitemaps now initially scanned, recursive scanning fixed.
git-svn-id: trunk@15649 -
This commit is contained in:
parent
b1f29d4473
commit
4b063ea245
@ -25,7 +25,7 @@ unit chmfilewriter;
|
||||
interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils, chmwriter, inifiles, contnrs,
|
||||
Strings, Classes, SysUtils, chmwriter, inifiles, contnrs, chmsitemap, avl_tree,
|
||||
{for html scanning } dom,SAX_HTML,dom_html;
|
||||
|
||||
type
|
||||
@ -59,11 +59,17 @@ type
|
||||
fScanHtmlContents : Boolean;
|
||||
fOtherFiles : TStrings; // Files found in a scan.
|
||||
fAllowedExtensions: TStringList;
|
||||
fTotalFileList : TAvlTree;
|
||||
FSpareString : TStringIndex;
|
||||
FBasePath : String; // location of the .hhp file. Needed to resolve relative paths
|
||||
protected
|
||||
function GetData(const DataName: String; out PathInChm: String; out FileName: String; var Stream: TStream): Boolean;
|
||||
procedure LastFileAdded(Sender: TObject);
|
||||
procedure readIniOptions(keyvaluepairs:tstringlist);
|
||||
procedure ScanHtml;
|
||||
procedure ScanList(toscan,newfiles:TStrings;recursion:boolean);
|
||||
procedure ScanSitemap(sitemap:TChmSiteMap;newfiles:TStrings;recursion:boolean);
|
||||
function FileInTotalList(const s:String):boolean;
|
||||
public
|
||||
constructor Create; virtual;
|
||||
destructor Destroy; override;
|
||||
@ -111,7 +117,7 @@ Const
|
||||
|
||||
implementation
|
||||
|
||||
uses XmlCfg, chmsitemap, CHMTypes;
|
||||
uses XmlCfg, CHMTypes;
|
||||
|
||||
{ TChmProject }
|
||||
|
||||
@ -185,6 +191,8 @@ begin
|
||||
FWindows:=TObjectList.Create(True);
|
||||
FMergeFiles:=TStringlist.Create;
|
||||
ScanHtmlContents:=False;
|
||||
FTotalFileList:=TAvlTree.Create(@CompareStrings);
|
||||
FSparestring :=TStringIndex.Create;
|
||||
end;
|
||||
|
||||
destructor TChmProject.Destroy;
|
||||
@ -196,10 +204,12 @@ begin
|
||||
FFiles.Free;
|
||||
FOtherFiles.Free;
|
||||
FWindows.Free;
|
||||
FSpareString.Free;
|
||||
FTotalFileList.FreeAndClear;
|
||||
FTotalFileList.Free;
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
|
||||
Type
|
||||
TSectionEnum = (secOptions,secWindows,secFiles,secMergeFiles,secAlias,secMap,secInfoTypes,secTextPopups,secUnknown);
|
||||
TOptionEnum = (OPTAUTO_INDEX,OPTAUTO_TOC,OPTBINARY_INDEX,OPTBINARY_TOC,OPTCITATION,
|
||||
@ -221,8 +231,6 @@ Const
|
||||
'FULL-TEXT SEARCH STOP LIST','FULL-TEXT SEARCH','IGNORE','INDEX FILE','LANGUAGE','PREFIX',
|
||||
'SAMPLE STAGING PATH','SAMPLE LIST FILE','TMPDIR','TITLE','CUSTOM TAB','UNKNOWN');
|
||||
|
||||
|
||||
|
||||
function FindSectionName (const name:string):TSectionEnum;
|
||||
|
||||
begin
|
||||
@ -303,6 +311,7 @@ begin
|
||||
Cfg := TXMLConfig.Create(nil);
|
||||
Cfg.Filename := AFileName;
|
||||
FileName := AFileName;
|
||||
FBasePath:=extractfilepath(expandfilename(afilename));
|
||||
|
||||
Files.Clear;
|
||||
FileCount := Cfg.GetValue('Files/Count/Value', 0);
|
||||
@ -312,7 +321,7 @@ begin
|
||||
nd.urlname:=Cfg.GetValue('Files/FileName'+IntToStr(I)+'/Value','');
|
||||
nd.contextnumber:=Cfg.GetValue('Files/FileName'+IntToStr(I)+'/ContextNumber',0);
|
||||
nd.contextname:=Cfg.GetValue('Files/FileName'+IntToStr(I)+'/ContextName','');
|
||||
Files.AddObject(nd.urlname,nd);
|
||||
Files.AddObject(nd.URLNAME,nd);
|
||||
end;
|
||||
|
||||
FileCount := Cfg.GetValue('OtherFiles/Count/Value', 0);
|
||||
@ -526,7 +535,7 @@ var
|
||||
begin
|
||||
{ Defaults other than global }
|
||||
MakeBinaryIndex:=True;
|
||||
|
||||
FBasePath:=extractfilepath(expandfilename(afilename));
|
||||
Fini:=TMeminiFile.Create(AFileName);
|
||||
secs := TStringList.create;
|
||||
strs := TStringList.create;
|
||||
@ -679,25 +688,75 @@ begin
|
||||
OnError(self,errorkind,msg,detaillevel);
|
||||
end;
|
||||
|
||||
procedure TChmProject.ScanHtml;
|
||||
const
|
||||
protocols : array[0..2] of string = ('HTTP:','FTP:','MS-ITS:');
|
||||
protocollen : array[0..2] of integer= ( 5 ,4 ,7);
|
||||
|
||||
function sanitizeurl(const basepath,instring:string;var outstring:String):Boolean;
|
||||
var i,j,len : integer;
|
||||
begin
|
||||
result:=true; outstring:='';
|
||||
if instring='' then
|
||||
exit(false);
|
||||
|
||||
len:=length(instring);
|
||||
if len=0 then
|
||||
exit(false);
|
||||
i:=0;
|
||||
while (i<=high(protocols)) do
|
||||
begin
|
||||
if strlicomp(@protocols[i][1],@instring[1],protocollen[i])=0 then
|
||||
exit(false);
|
||||
inc(i);
|
||||
end;
|
||||
outstring:=instring;
|
||||
|
||||
i:=pos('#',outstring);
|
||||
if i<>0 then
|
||||
delete(outstring,i,length(outstring)-i+1);
|
||||
|
||||
outstring:=expandfilename(StringReplace(outstring,'%20',' ',[rfReplaceAll]));// expandfilename(instring));
|
||||
|
||||
outstring:=extractrelativepath(basepath,outstring);
|
||||
outstring:=StringReplace(outstring,'\','/',[rfReplaceAll]);
|
||||
end;
|
||||
|
||||
function TChmProject.FileInTotalList(const s:String):boolean;
|
||||
|
||||
begin
|
||||
FSpareString.TheString:=S;
|
||||
result:=assigned(fTotalFileList.FindKey(FSpareString,@CompareStrings));
|
||||
end;
|
||||
|
||||
procedure TChmProject.ScanList(toscan,newfiles:TStrings;recursion:boolean);
|
||||
// toscan, list to search for htmlfiles to scan.
|
||||
// newfiles, the resulting list of files.
|
||||
// totalfilelist, the list that contains all found and specified files to check against.
|
||||
// localfilelist (local var), files found in this file.
|
||||
var
|
||||
localpath : string;
|
||||
|
||||
procedure checkattributes(node:TDomNode;attributename:string;filelist :TStringList);
|
||||
var
|
||||
Attributes: TDOMNamedNodeMap;
|
||||
atnode : TDomNode;
|
||||
fn : String;
|
||||
n : integer;
|
||||
begin
|
||||
if assigned(node) then
|
||||
begin
|
||||
Attributes:=node.Attributes;
|
||||
if assigned(attributes) then
|
||||
begin
|
||||
atnode :=attributes.GetNamedItem(attributename);
|
||||
if assigned(atnode) then
|
||||
for n:=0 to attributes.length-1 do
|
||||
begin
|
||||
fn:=atnode.nodevalue;
|
||||
if (fn<>'') then
|
||||
filelist.add(fn);
|
||||
atnode :=attributes[n];
|
||||
if assigned(atnode) and (uppercase(atnode.nodename)=attributename) then
|
||||
begin
|
||||
if sanitizeurl(fbasepath,localpath+atnode.nodevalue,fn) then
|
||||
if not FileInTotalList(uppercase(fn)) then
|
||||
filelist.add(fn);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
@ -707,6 +766,7 @@ end;
|
||||
function scantags(prnt:TDomNode;filelist:TStringlist):TDomNode;
|
||||
// Seach first matching tag in siblings
|
||||
var chld: TDomNode;
|
||||
s : ansistring;
|
||||
begin
|
||||
result:=nil;
|
||||
if assigned(prnt ) then
|
||||
@ -717,18 +777,22 @@ begin
|
||||
scantags(chld,filelist); // depth first.
|
||||
if (chld is TDomElement) then
|
||||
begin
|
||||
// writeln(tdomelement(chld).tagname,' ',chld.classname );
|
||||
if tdomelement(chld).tagname='link'then
|
||||
s:=uppercase(tdomelement(chld).tagname);
|
||||
if s='LINK' then
|
||||
begin
|
||||
//printattributes(chld,'');
|
||||
checkattributes(chld,'href',filelist);
|
||||
checkattributes(chld,'HREF',filelist);
|
||||
end;
|
||||
if tdomelement(chld).tagname='img'then
|
||||
if s='IMG'then
|
||||
begin
|
||||
//printattributes(chld,'');
|
||||
checkattributes(chld,'src',filelist);
|
||||
checkattributes(chld,'SRC',filelist);
|
||||
end;
|
||||
if s='A'then
|
||||
begin
|
||||
//printattributes(chld,'');
|
||||
checkattributes(chld,'HREF',filelist);
|
||||
end;
|
||||
|
||||
end;
|
||||
chld:=chld.nextsibling;
|
||||
end;
|
||||
@ -736,18 +800,44 @@ begin
|
||||
end;
|
||||
|
||||
var
|
||||
filelist, localfilelist: TStringList;
|
||||
localfilelist: TStringList;
|
||||
domdoc : THTMLDocument;
|
||||
i,j : Integer;
|
||||
fn,s : string;
|
||||
ext : String;
|
||||
begin
|
||||
filelist:= TStringList.create;
|
||||
localfilelist:= TStringList.create;
|
||||
i,j : Integer;
|
||||
fn,s : string;
|
||||
ext : String;
|
||||
tmplst : Tstringlist;
|
||||
strrec : TStringIndex;
|
||||
//localpath : string;
|
||||
|
||||
for j:=0 to Files.count-1 do
|
||||
function trypath(const vn:string):boolean;
|
||||
var vn2: String;
|
||||
strrec : TStringIndex;
|
||||
begin
|
||||
vn2:=uppercase(vn);
|
||||
if FileInTotalList(vn2) then
|
||||
begin
|
||||
fn:=files[j];
|
||||
Error(ChmNote,'Found duplicate file '+vn+' while scanning '+fn,1);
|
||||
exit(false);
|
||||
end;
|
||||
|
||||
result:=false;
|
||||
if fileexists(vn) then // correct for relative path .html file?
|
||||
begin
|
||||
result:=true;
|
||||
StrRec:=TStringIndex.Create;
|
||||
StrRec.TheString:=vn2;
|
||||
StrRec.Strid :=0;
|
||||
fTotalFileList.Add(StrRec);
|
||||
newfiles.add(vn);
|
||||
Error(ChmNote,'Found file '+vn+' while scanning '+fn,1);
|
||||
end;
|
||||
end;
|
||||
|
||||
begin
|
||||
localfilelist:=TStringList.Create;
|
||||
for j:=0 to toscan.count-1 do
|
||||
begin
|
||||
fn:=toscan[j];
|
||||
localfilelist.clear;
|
||||
if (FAllowedExtensions.Indexof(uppercase(extractfileext(fn)))<>-1) then
|
||||
begin
|
||||
@ -757,20 +847,16 @@ begin
|
||||
try
|
||||
Error(chmnote,'Scanning file '+fn+'.',5);
|
||||
ReadHtmlFile(domdoc,fn);
|
||||
|
||||
localpath:=extractfilepath(fn);
|
||||
if (length(localpath)>0) and not (localpath[length(localpath)] in ['/','\']) then
|
||||
localpath:=localpath+pathsep;
|
||||
scantags(domdoc,localfilelist);
|
||||
for i:=0 to localFilelist.count-1 do
|
||||
begin
|
||||
s:=localfilelist[i];
|
||||
if fileexists(s) then // correct for relative path .html file?
|
||||
begin
|
||||
filelist.add(s);
|
||||
Error(ChmNote,'Found file '+s+' while scanning '+fn,1);
|
||||
end
|
||||
else
|
||||
begin
|
||||
if not trypath(s) then
|
||||
// if not trypath(localpath+s) then
|
||||
Error(ChmWarning,'Found file '+s+' while scanning '+fn+', but couldn''t find it on disk',2);
|
||||
end
|
||||
end;
|
||||
except
|
||||
on e:exception do
|
||||
@ -786,21 +872,148 @@ begin
|
||||
else
|
||||
Error(chmnote,'Not scanning file because of unknown extension '+fn,5);
|
||||
end;
|
||||
if filelist.count>0 then
|
||||
for i:=0 to filelist.count-1 do
|
||||
begin
|
||||
if otherfiles.indexof(filelist[i])=-1 then
|
||||
begin
|
||||
otherfiles.add(filelist[i]);
|
||||
Error(chmnote,'Added media file '+filelist[i],5);
|
||||
end
|
||||
else
|
||||
Error(chmnote,'Ignored duplicate found file '+filelist[i],5);
|
||||
end;
|
||||
filelist.free;
|
||||
localfilelist.free;
|
||||
if (newfiles.count>0) and recursion then
|
||||
begin
|
||||
tmplst:=TStringList.Create;
|
||||
scanlist(newfiles,tmplst,true);
|
||||
newfiles.addstrings(tmplst);
|
||||
tmplst.free;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TChmProject.ScanSitemap(sitemap:TChmSiteMap;newfiles:TStrings;recursion:boolean);
|
||||
|
||||
procedure scanitems(it:TChmSiteMapItems);
|
||||
|
||||
var i : integer;
|
||||
x : TChmSiteMapItem;
|
||||
s : string;
|
||||
strrec : TStringIndex;
|
||||
|
||||
begin
|
||||
for i:=0 to it.count -1 do
|
||||
begin
|
||||
x:=it.item[i];
|
||||
if sanitizeurl(fbasepath,x.local,S) then // sanitize, remove stuff etc.
|
||||
begin
|
||||
if not FileInTotalList(uppercase(s)) then
|
||||
begin
|
||||
if fileexists(s) then
|
||||
begin
|
||||
Error(chmnote,'Good url: '+s+'.',5);
|
||||
StrRec:=TStringIndex.Create;
|
||||
StrRec.TheString:=uppercase(s);
|
||||
StrRec.Strid :=0;
|
||||
fTotalFileList.Add(StrRec);
|
||||
newfiles.add(s);
|
||||
end
|
||||
else
|
||||
Error(chmnote,'duplicate url: '+s+'.',5);
|
||||
end
|
||||
else
|
||||
Error(chmnote,'duplicate url: '+s+'.',5);
|
||||
end
|
||||
else
|
||||
Error(chmnote,'Bad url: '+s+'.',5);
|
||||
|
||||
if assigned(x.children) and (x.children.count>0) then
|
||||
scanitems(x.children);
|
||||
end;
|
||||
end;
|
||||
|
||||
var i : integer;
|
||||
localfilelist: TStringList;
|
||||
|
||||
begin
|
||||
localfilelist:=TStringList.Create;
|
||||
scanitems(sitemap.items);
|
||||
scanlist(newfiles,localfilelist,true);
|
||||
newfiles.addstrings(localfilelist);
|
||||
localfilelist.free;
|
||||
end;
|
||||
|
||||
procedure TChmProject.ScanHtml;
|
||||
var
|
||||
helplist,
|
||||
localfilelist: TStringList;
|
||||
i : integer;
|
||||
x : TChmSiteMap;
|
||||
strrec : TStringIndex;
|
||||
begin
|
||||
|
||||
for i:=0 to otherfiles.count-1 do
|
||||
begin
|
||||
StrRec:=TStringIndex.Create;
|
||||
StrRec.TheString:=uppercase(otherfiles[i]);
|
||||
StrRec.Strid :=0;
|
||||
fTotalFileList.Add(StrRec);
|
||||
end;
|
||||
|
||||
for i:=0 to files.count-1 do
|
||||
begin
|
||||
StrRec:=TStringIndex.Create;
|
||||
StrRec.TheString:=uppercase(files[i]);
|
||||
StrRec.Strid :=0;
|
||||
fTotalFileList.Add(StrRec);
|
||||
end;
|
||||
|
||||
localfilelist:= TStringList.create;
|
||||
scanlist(ffiles,localfilelist,true);
|
||||
otherfiles.addstrings(localfilelist);
|
||||
localfilelist.clear;
|
||||
if (FDefaultpage<>'') and (not FileInTotalList(uppercase(fdefaultpage))) then
|
||||
begin
|
||||
Error(chmnote,'Scanning default file : '+fdefaultpage+'.',3);
|
||||
helplist:=TStringlist.Create;
|
||||
helplist.add(fdefaultpage);
|
||||
scanlist(helplist,localfilelist,true);
|
||||
otherfiles.addstrings(localfilelist);
|
||||
localfilelist.clear;
|
||||
end;
|
||||
if FTableOfContentsFileName<>'' then
|
||||
begin
|
||||
if fileexists(FTableOfContentsFileName) then
|
||||
begin
|
||||
Error(chmnote,'Scanning TOC file : '+FTableOfContentsFileName+'.',3);
|
||||
x:=TChmSiteMap.Create(sttoc);
|
||||
try
|
||||
x.loadfromfile(FTableOfcontentsFilename);
|
||||
scansitemap(x,localfilelist,true);
|
||||
otherfiles.addstrings(localfilelist);
|
||||
except
|
||||
on e: Exception do
|
||||
error(chmerror,'Error loading TOC file '+FTableOfContentsFileName);
|
||||
end;
|
||||
x.free;
|
||||
end
|
||||
else
|
||||
error(chmerror,'Can''t find TOC file'+FTableOfContentsFileName);
|
||||
end;
|
||||
LocalFileList.clear;
|
||||
if FIndexFileName<>'' then
|
||||
begin
|
||||
if fileexists(FIndexFileName) then
|
||||
begin
|
||||
Error(chmnote,'Scanning Index file : '+FIndexFileName+'.',3);
|
||||
x:=TChmSiteMap.Create(stindex);
|
||||
try
|
||||
x.loadfromfile(FIndexFileName);
|
||||
scansitemap(x,localfilelist,true);
|
||||
otherfiles.addstrings(localfilelist);
|
||||
except
|
||||
on e: Exception do
|
||||
error(chmerror,'Error loading index file '+FIndexFileName);
|
||||
end;
|
||||
x.free;
|
||||
end
|
||||
else
|
||||
error(chmerror,'Can''t find TOC index file '+FIndexFileName);
|
||||
end;
|
||||
localfilelist.free;
|
||||
end;
|
||||
|
||||
|
||||
procedure TChmProject.WriteChm(AOutStream: TStream);
|
||||
var
|
||||
Writer : TChmWriter;
|
||||
|
@ -38,7 +38,7 @@ Type
|
||||
// Stream : the file opened with DataName should be written to this stream
|
||||
|
||||
Type
|
||||
TStringIndex = Class // AVLTree needs wrapping in non automated reference type
|
||||
TStringIndex = Class // AVLTree needs wrapping in non automated reference type also used in filewriter.
|
||||
TheString : String;
|
||||
StrId : Integer;
|
||||
end;
|
||||
@ -205,6 +205,8 @@ Type
|
||||
property DefaultWindow : string read fdefaultwindow write fdefaultwindow;
|
||||
end;
|
||||
|
||||
Function CompareStrings(Node1, Node2: Pointer): integer; // also used in filewriter
|
||||
|
||||
implementation
|
||||
uses dateutils, sysutils, paslzxcomp, chmFiftiMain;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user