mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-29 19:10:25 +02:00
wiki: started option to download recently changed pages
git-svn-id: trunk@35616 -
This commit is contained in:
parent
ec110f77c0
commit
61a12a5cc6
@ -39,6 +39,20 @@ uses
|
|||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
WikiParser, WikiFormat;
|
WikiParser, WikiFormat;
|
||||||
|
|
||||||
|
const
|
||||||
|
IgnorePrefixes: array[1..11] of string = (
|
||||||
|
'Special:',
|
||||||
|
'Help:',
|
||||||
|
'Random:',
|
||||||
|
'User:',
|
||||||
|
'http:',
|
||||||
|
'https:',
|
||||||
|
'doc:',
|
||||||
|
'Category:',
|
||||||
|
'User:',
|
||||||
|
'User_talk:',
|
||||||
|
'index.php'
|
||||||
|
);
|
||||||
type
|
type
|
||||||
|
|
||||||
{ TFetchWikiPage }
|
{ TFetchWikiPage }
|
||||||
@ -54,6 +68,7 @@ type
|
|||||||
private
|
private
|
||||||
FBaseURL: string;
|
FBaseURL: string;
|
||||||
FFirstPage: string;
|
FFirstPage: string;
|
||||||
|
FIgnoreFilesYoungerThanMin: integer;
|
||||||
FImagesDir: string;
|
FImagesDir: string;
|
||||||
FNoWrite: boolean;
|
FNoWrite: boolean;
|
||||||
FOutputDir: string;
|
FOutputDir: string;
|
||||||
@ -63,6 +78,7 @@ type
|
|||||||
protected
|
protected
|
||||||
procedure DoRun; override;
|
procedure DoRun; override;
|
||||||
procedure GetAll;
|
procedure GetAll;
|
||||||
|
procedure GetRecent(Days: integer);
|
||||||
procedure DownloadPage(Page: string);
|
procedure DownloadPage(Page: string);
|
||||||
procedure DownloadFirstNeededPage;
|
procedure DownloadFirstNeededPage;
|
||||||
procedure CheckNotUsedPages(Show, Delete: boolean);
|
procedure CheckNotUsedPages(Show, Delete: boolean);
|
||||||
@ -76,6 +92,7 @@ type
|
|||||||
function PageToFilename(Page: string; IsInternalLink: boolean): string;
|
function PageToFilename(Page: string; IsInternalLink: boolean): string;
|
||||||
function ImageToFilename(Image: string; IsInternalLink, KeepScheme: boolean): string;
|
function ImageToFilename(Image: string; IsInternalLink, KeepScheme: boolean): string;
|
||||||
function EscapeDocumentName(aName: string): string;
|
function EscapeDocumentName(aName: string): string;
|
||||||
|
function IsIgnoredPage(Page: string): boolean;
|
||||||
procedure Test;
|
procedure Test;
|
||||||
public
|
public
|
||||||
constructor Create(TheOwner: TComponent); override;
|
constructor Create(TheOwner: TComponent); override;
|
||||||
@ -85,6 +102,7 @@ type
|
|||||||
property ImagesDir: string read FImagesDir;
|
property ImagesDir: string read FImagesDir;
|
||||||
property BaseURL: string read FBaseURL;
|
property BaseURL: string read FBaseURL;
|
||||||
property NoWrite: boolean read FNoWrite;
|
property NoWrite: boolean read FNoWrite;
|
||||||
|
property IgnoreFilesYoungerThanMin: integer read FIgnoreFilesYoungerThanMin;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TWikiGet }
|
{ TWikiGet }
|
||||||
@ -109,10 +127,12 @@ var
|
|||||||
ErrorMsg: String;
|
ErrorMsg: String;
|
||||||
i: Integer;
|
i: Integer;
|
||||||
Param: String;
|
Param: String;
|
||||||
|
NeedSinglePage: Boolean;
|
||||||
|
RecentDays: Integer;
|
||||||
begin
|
begin
|
||||||
//Test;
|
//Test;
|
||||||
// quick check parameters
|
// quick check parameters
|
||||||
ErrorMsg:=CheckOptions('h','help dir: images: baseurl: page: allmissing nowrite'
|
ErrorMsg:=CheckOptions('h','help dir: images: baseurl: page: allmissing recent: ignore-recent: nowrite'
|
||||||
+' shownotusedpages deletenotusedpages'
|
+' shownotusedpages deletenotusedpages'
|
||||||
+' shownotusedimages deletenotusedimages');
|
+' shownotusedimages deletenotusedimages');
|
||||||
if ErrorMsg<>'' then
|
if ErrorMsg<>'' then
|
||||||
@ -151,19 +171,34 @@ begin
|
|||||||
if copy(BaseURL,1,7)<>'http://' then
|
if copy(BaseURL,1,7)<>'http://' then
|
||||||
E('invalid baseurl "'+BaseURL+'"');
|
E('invalid baseurl "'+BaseURL+'"');
|
||||||
|
|
||||||
if HasOption('allmissing') then begin
|
if HasOption('ignore-recent') then begin
|
||||||
GetAll;
|
fIgnoreFilesYoungerThanMin:=StrToIntDef(GetOptionValue('ignore-recent'),-1);
|
||||||
end else begin
|
if IgnoreFilesYoungerThanMin<0 then
|
||||||
for i:=1 to GetParamCount do begin
|
E('invalid --ignore-recent value "'+GetOptionValue('ignore-recent')+'"');
|
||||||
Param:=GetParams(i);
|
|
||||||
//writeln('TWikiGet.DoRun Param="',Param,'"');
|
|
||||||
if copy(Param,1,length(pPage))=pPage then
|
|
||||||
NeedWikiPage(WikiInternalLinkToPage(copy(Param,length(pPage)+1,length(Param))));
|
|
||||||
end;
|
|
||||||
if FNeededPages.Tree.Count=0 then
|
|
||||||
E('nothing to do',true);
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
NeedSinglePage:=true;
|
||||||
|
if HasOption('allmissing') or HasOption('recent') then begin
|
||||||
|
NeedSinglePage:=false;
|
||||||
|
RecentDays:=-1;
|
||||||
|
if HasOption('recent') then begin
|
||||||
|
RecentDays:=StrToIntDef(GetOptionValue('recent'),-1);
|
||||||
|
if RecentDays<1 then
|
||||||
|
E('invalid --recent value "'+GetOptionValue('recent')+'"');
|
||||||
|
end;
|
||||||
|
GetAll;
|
||||||
|
if RecentDays>0 then
|
||||||
|
GetRecent(RecentDays);
|
||||||
|
end;
|
||||||
|
for i:=1 to GetParamCount do begin
|
||||||
|
Param:=GetParams(i);
|
||||||
|
//writeln('TWikiGet.DoRun Param="',Param,'"');
|
||||||
|
if copy(Param,1,length(pPage))=pPage then
|
||||||
|
NeedWikiPage(WikiInternalLinkToPage(copy(Param,length(pPage)+1,length(Param))));
|
||||||
|
end;
|
||||||
|
if (NeedSinglePage) and (FNeededPages.Tree.Count=0) then
|
||||||
|
E('nothing to do',true);
|
||||||
|
|
||||||
while FNeededPages.Tree.Count>0 do
|
while FNeededPages.Tree.Count>0 do
|
||||||
DownloadFirstNeededPage;
|
DownloadFirstNeededPage;
|
||||||
|
|
||||||
@ -177,18 +212,6 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWikiGet.GetAll;
|
procedure TWikiGet.GetAll;
|
||||||
const
|
|
||||||
IgnorePrefixes: array[1..9] of string = (
|
|
||||||
'Special:',
|
|
||||||
'Help:',
|
|
||||||
'Random:',
|
|
||||||
'User:',
|
|
||||||
'http:',
|
|
||||||
'https:',
|
|
||||||
'doc:',
|
|
||||||
'Category:',
|
|
||||||
'index.php'
|
|
||||||
);
|
|
||||||
var
|
var
|
||||||
Client: TFPHTTPClient;
|
Client: TFPHTTPClient;
|
||||||
Response: TMemoryStream;
|
Response: TMemoryStream;
|
||||||
@ -199,7 +222,6 @@ var
|
|||||||
StartPos: SizeInt;
|
StartPos: SizeInt;
|
||||||
URLs: TStringList;
|
URLs: TStringList;
|
||||||
i: Integer;
|
i: Integer;
|
||||||
j: Integer;
|
|
||||||
Page: String;
|
Page: String;
|
||||||
SaveTOC: Boolean;
|
SaveTOC: Boolean;
|
||||||
begin
|
begin
|
||||||
@ -270,21 +292,13 @@ begin
|
|||||||
Page:=copy(s,StartPos,p-StartPos);
|
Page:=copy(s,StartPos,p-StartPos);
|
||||||
while (Page<>'') and (Page[1]='/') do
|
while (Page<>'') and (Page[1]='/') do
|
||||||
System.Delete(Page,1,1);
|
System.Delete(Page,1,1);
|
||||||
if (Page<>'') then begin;
|
if (Page<>'') and (not IsIgnoredPage(Page)) then begin;
|
||||||
j:=low(IgnorePrefixes);
|
//writeln('TWikiGet.GetAll Page="',Page,'"');
|
||||||
while j<=high(IgnorePrefixes) do begin
|
Filename:=PageToFilename(Page,false);
|
||||||
if copy(Page,1,length(IgnorePrefixes[j]))=IgnorePrefixes[j] then
|
AddWikiPage(Page);
|
||||||
break;
|
if not FileExistsUTF8(Filename) then begin
|
||||||
inc(j);
|
writeln('TWikiGet.GetAll missing Page="',Page,'"');
|
||||||
end;
|
NeedWikiPage(Page);
|
||||||
if j>high(IgnorePrefixes) then begin
|
|
||||||
//writeln('TWikiGet.GetAll Page="',Page,'"');
|
|
||||||
Filename:=PageToFilename(Page,false);
|
|
||||||
AddWikiPage(Page);
|
|
||||||
if not FileExistsUTF8(Filename) then begin
|
|
||||||
writeln('TWikiGet.GetAll missing Page="',Page,'"');
|
|
||||||
NeedWikiPage(Page);
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
System.Delete(s,1,p);
|
System.Delete(s,1,p);
|
||||||
@ -298,6 +312,80 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TWikiGet.GetRecent(Days: integer);
|
||||||
|
const
|
||||||
|
linksstart = '<a href="/index.php?title=';
|
||||||
|
var
|
||||||
|
Client: TFPHTTPClient;
|
||||||
|
Response: TMemoryStream;
|
||||||
|
URL: String;
|
||||||
|
s: string;
|
||||||
|
Page: String;
|
||||||
|
href: String;
|
||||||
|
p: SizeInt;
|
||||||
|
Filename: String;
|
||||||
|
NowDate: LongInt;
|
||||||
|
AgeInMin: Integer;
|
||||||
|
CheckedPages: TStringToStringTree;
|
||||||
|
begin
|
||||||
|
//writeln('TWikiGet.GetRecent Days=',Days);
|
||||||
|
Client:=nil;
|
||||||
|
CheckedPages:=TStringToStringTree.Create(true);
|
||||||
|
try
|
||||||
|
Client:=TFPHTTPClient.Create(nil);
|
||||||
|
Response:=TMemoryStream.Create;
|
||||||
|
URL:=BaseURL+'index.php?title=Special:Recentchanges&days='+IntToStr(Days)+'&limit=500';
|
||||||
|
writeln('getting page "',URL,'" ...');
|
||||||
|
Client.Get(URL,Response);
|
||||||
|
//Client.ResponseHeaders.SaveToFile('responseheaders.txt');
|
||||||
|
//Response.SaveToFile('test.html');
|
||||||
|
NowDate:=DateTimeToFileDate(Now);
|
||||||
|
if Response.Size>0 then begin
|
||||||
|
SetLength(s,Response.Size);
|
||||||
|
Response.Position:=0;
|
||||||
|
Response.Read(s[1],length(s));
|
||||||
|
repeat
|
||||||
|
// find next a href tag
|
||||||
|
p:=Pos(linksstart,s);
|
||||||
|
if p<1 then break;
|
||||||
|
Delete(s,1,p+length(linksstart)-1);
|
||||||
|
// get href attribute
|
||||||
|
p:=1;
|
||||||
|
while (p<=length(s)) and (not (s[p] in ['"'])) do inc(p);
|
||||||
|
if p>length(s) then break;
|
||||||
|
href:=LeftStr(s,p-1);
|
||||||
|
//writeln('TWikiGet.GetRecent href="'+href+'"');
|
||||||
|
Delete(s,1,p);
|
||||||
|
if Pos('&diff=',href)<1 then begin
|
||||||
|
// this is not a change
|
||||||
|
continue;
|
||||||
|
end;
|
||||||
|
// a change
|
||||||
|
Page:=LeftStr(href,Pos('&',href)-1);
|
||||||
|
//writeln('TWikiGet.GetRecent page="'+Page+'"');
|
||||||
|
if CheckedPages.Contains(Page) then continue;
|
||||||
|
if IsIgnoredPage(Page) then continue;
|
||||||
|
if FNeededPages.Contains(Page) then continue;
|
||||||
|
CheckedPages[Page]:='1';
|
||||||
|
Filename:=PageToFilename(Page,false);
|
||||||
|
//writeln('TWikiGet.GetRecent recent diff page="'+Page+'" File="',Filename,'"');
|
||||||
|
if FileExistsUTF8(Filename) then begin
|
||||||
|
AgeInMin:=(NowDate-FileAgeUTF8(Filename)) div 60;
|
||||||
|
//writeln('TWikiGet.GetRecent FileAge=',AgeInMin,' Ignore=',IgnoreFilesYoungerThanMin,' File="',Filename,'"');
|
||||||
|
if AgeInMin<IgnoreFilesYoungerThanMin then continue;
|
||||||
|
end;
|
||||||
|
writeln(' recently changed: "',Page,'" File="',Filename,'"');
|
||||||
|
NeedWikiPage(Page);
|
||||||
|
until false;
|
||||||
|
end;
|
||||||
|
|
||||||
|
finally
|
||||||
|
CheckedPages.Free;
|
||||||
|
Client.Free;
|
||||||
|
Response.Free;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure TWikiGet.DownloadPage(Page: string);
|
procedure TWikiGet.DownloadPage(Page: string);
|
||||||
var
|
var
|
||||||
Response: TMemoryStream;
|
Response: TMemoryStream;
|
||||||
@ -594,6 +682,17 @@ begin
|
|||||||
Delete(Result,1,1);
|
Delete(Result,1,1);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TWikiGet.IsIgnoredPage(Page: string): boolean;
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
begin
|
||||||
|
for i:=low(IgnorePrefixes) to high(IgnorePrefixes) do begin
|
||||||
|
if LeftStr(Page,length(IgnorePrefixes[i]))=IgnorePrefixes[i] then
|
||||||
|
exit(true);
|
||||||
|
end;
|
||||||
|
Result:=false;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure TWikiGet.Test;
|
procedure TWikiGet.Test;
|
||||||
|
|
||||||
procedure w(URL: string);
|
procedure w(URL: string);
|
||||||
@ -628,6 +727,7 @@ begin
|
|||||||
FAllPages:=TStringToPointerTree.Create(true);
|
FAllPages:=TStringToPointerTree.Create(true);
|
||||||
FNeededPages:=TStringToPointerTree.Create(true);
|
FNeededPages:=TStringToPointerTree.Create(true);
|
||||||
FAllImages:=TStringToStringTree.Create(true);
|
FAllImages:=TStringToStringTree.Create(true);
|
||||||
|
FIgnoreFilesYoungerThanMin:=60;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
destructor TWikiGet.Destroy;
|
destructor TWikiGet.Destroy;
|
||||||
@ -647,14 +747,22 @@ begin
|
|||||||
writeln('--baseurl=<URL> : URL of the wiki. Default: ',BaseURL);
|
writeln('--baseurl=<URL> : URL of the wiki. Default: ',BaseURL);
|
||||||
writeln('--page=<pagename> : download this wiki page. Can be given multiple times.');
|
writeln('--page=<pagename> : download this wiki page. Can be given multiple times.');
|
||||||
writeln('--allmissing : download all wiki pages, if file not already there.');
|
writeln('--allmissing : download all wiki pages, if file not already there.');
|
||||||
|
writeln('--recent=<days> : download pages again if changed in the last days on the site.');
|
||||||
|
writeln(' includes --allmissing.');
|
||||||
|
writeln('--ignore-recent=<minutes> : do not download again files younger than this on disk.');
|
||||||
|
writeln(' combine with --recent. Default: ',IgnoreFilesYoungerThanMin);
|
||||||
writeln('--shownotusedpages : show not used files in the output directory.');
|
writeln('--shownotusedpages : show not used files in the output directory.');
|
||||||
writeln('--deletenotusedpages : delete the files in the output directory that are not used.');
|
writeln('--deletenotusedpages : delete the files in the output directory that are not used.');
|
||||||
writeln('--shownotusedimages : show not used files in the images directory.');
|
writeln('--shownotusedimages : show not used files in the images directory.');
|
||||||
writeln('--deletenotusedimages : delete the files in the images directory that are not used.');
|
writeln('--deletenotusedimages : delete the files in the images directory that are not used.');
|
||||||
writeln('--nowrite : do not write files, just print what would be written.');
|
writeln('--nowrite : do not write files, just print what would be written.');
|
||||||
writeln;
|
writeln;
|
||||||
writeln('Example:');
|
writeln('Example: download one page');
|
||||||
writeln(' ',ExeName,' --dir=. --images=images --page=Install_Packages');
|
writeln(' ',ExeName,' --dir=html --images=images --page=Install_Packages');
|
||||||
|
writeln('Example: download the whole wiki');
|
||||||
|
writeln(' ',ExeName,' --allmissing');
|
||||||
|
writeln('Example: call this to download new files once per week');
|
||||||
|
writeln(' ',ExeName,' --recent=8');
|
||||||
end;
|
end;
|
||||||
|
|
||||||
var
|
var
|
||||||
|
Loading…
Reference in New Issue
Block a user