From 59aa0329866f1c310ef9b9311737ccf7741267e2 Mon Sep 17 00:00:00 2001
From: marco <marco@freepascal.org>
Date: Thu, 26 Dec 2013 18:36:55 +0000
Subject: [PATCH]  * Mantis #25276, improvements for CHMCMD anchor handling,
 specially combined    with errorhandling.

git-svn-id: trunk@26289 -
---
 packages/chm/src/chmcmd.lpr        |   6 +-
 packages/chm/src/chmfilewriter.pas | 122 ++++++++++++++++++++++-------
 2 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/packages/chm/src/chmcmd.lpr b/packages/chm/src/chmcmd.lpr
index 2fe2879f9f..53f81ace10 100644
--- a/packages/chm/src/chmcmd.lpr
+++ b/packages/chm/src/chmcmd.lpr
@@ -26,7 +26,7 @@ uses
   Classes, Sysutils, chmfilewriter, GetOpts;
 
 Const
-  CHMCMDVersion = '2.6.0';
+  CHMCMDVersion = '2.6.1';
 
 Procedure Usage;
 
@@ -101,7 +101,7 @@ var
 
 procedure OnError (Project: TChmProject;errorkind:TChmProjectErrorKind;msg:String;detailevel:integer=0);
 begin
-  if detailevel<=alloweddetaillevel then
+  if (detailevel<=alloweddetaillevel) or (errorkind < chmnote) then
     if errorkind<>chmnone then
       writeln(ChmErrorKindText[errorkind],': ',msg)
     else
@@ -150,6 +150,8 @@ begin
     end;
   OutStream := TFileStream.Create(Project.OutputFileName, fmCreate, fmOpenWrite);
   Project.WriteChm(OutStream);
+  if Project.ScanHtmlContents then
+    Project.ShowUndefinedAnchors;
   if ishhp and GenerateXMLForHHP then
     begin
       Writeln('Generating XML ',xmlname,'.');
diff --git a/packages/chm/src/chmfilewriter.pas b/packages/chm/src/chmfilewriter.pas
index 33d6a85e48..106b201305 100644
--- a/packages/chm/src/chmfilewriter.pas
+++ b/packages/chm/src/chmfilewriter.pas
@@ -60,6 +60,7 @@ type
     fOtherFiles : TStrings; // Files found in a scan.
     fAllowedExtensions: TStringList;
     fTotalFileList : TAvlTree;
+    fAnchorList    : TStringList;
     FSpareString   : TStringIndex;
     FBasePath      : String;     // location of the .hhp file. Needed to resolve relative paths
     FReadmeMessage : String;     // readme message
@@ -75,6 +76,7 @@ type
     procedure ScanList(toscan,newfiles:TStrings;recursion:boolean);
     procedure ScanSitemap(sitemap:TChmSiteMap;newfiles:TStrings;recursion:boolean);
     function  FileInTotalList(const s:String):boolean;
+    function  SanitizeURL(const basepath, instring, localpath, localname:string; var outstring:String):Boolean;
   public
     constructor Create; virtual;
     destructor Destroy; override;
@@ -82,6 +84,7 @@ type
     procedure LoadFromhhp (AFileName:String;LeaveInclude:Boolean); virtual;
     procedure SaveToFile(AFileName: String); virtual;
     procedure WriteChm(AOutStream: TStream); virtual;
+    procedure ShowUndefinedAnchors;
     function ProjectDir: String;
     procedure LoadSitemaps;
     procedure AddFileWithContext(contextid:integer;filename:ansistring;contextname:ansistring='');
@@ -126,6 +129,23 @@ implementation
 
 uses XmlCfg, CHMTypes;
 
+type
+
+  { TFirstReference }
+
+  TFirstReference = class
+  public
+   Location: string;
+   constructor Create(Const ALocation: string);
+  end;
+
+{ TFirstReference }
+
+constructor TFirstReference.Create(const ALocation: string);
+begin
+  Location := ALocation;
+end;
+
 { TChmProject }
 
 function TChmProject.GetData(const DataName: String; out PathInChm: String; out
@@ -187,6 +207,9 @@ begin
   ScanHtmlContents:=False;
   FTotalFileList:=TAvlTree.Create(@CompareStrings);
   FSparestring  :=TStringIndex.Create;
+  fAnchorList := TStringList.Create;
+  fAnchorList.Sorted := True;
+  fAnchorList.OwnsObjects := True;
 end;
 
 destructor TChmProject.Destroy;
@@ -206,6 +229,7 @@ begin
   FIndex.free;
   FTocStream.Free;
   FIndexStream.Free;
+  fAnchorList.Free;
   inherited Destroy;
 end;
 
@@ -690,11 +714,12 @@ begin
 end;
 
 const
-   protocols   : array[0..2] of string = ('HTTP:','FTP:','MS-ITS:');
-   protocollen : array[0..2] of integer= ( 5 ,4 ,7);
+   protocols   : array[0..3] of string = ('HTTP:','FTP:','MS-ITS:', 'MAILTO:');
+   protocollen : array[0..3] of integer= ( 5 ,4 ,7, 7);
 
-function sanitizeurl(const basepath,instring:string;var outstring:String):Boolean;
+function TChmProject.SanitizeURL(const basepath,instring,localpath,localname:string;var outstring:String):Boolean;
 var i,j,len : integer;
+  Anchor: String;
 begin
   result:=true; outstring:='';
   if instring='' then
@@ -703,6 +728,7 @@ begin
   len:=length(instring);
   if len=0 then
     exit(false);
+  { Check for protocols before adding local path }
   i:=0;
   while (i<=high(protocols)) do
     begin
@@ -710,11 +736,22 @@ begin
         exit(false);
       inc(i);
     end;
-   outstring:=instring;
+   outstring:=localpath+instring;
 
    i:=pos('#',outstring);
-   if i<>0 then
+   if i<>0 then begin
+     if i > 1 then
+       Anchor := outstring
+     else
+       Anchor := localname+outstring;
+     j := fAnchorList.IndexOf(Anchor);
+     if j < 0 then begin
+       fAnchorList.AddObject(Anchor,TFirstReference.Create(localname));
+       Anchor := '(new) '+Anchor;
+     end;
+     Error(CHMNote, 'Anchor found '+Anchor+' while scanning '+localname,1);
      delete(outstring,i,length(outstring)-i+1);
+   end;
 
   outstring:=expandfilename(StringReplace(outstring,'%20',' ',[rfReplaceAll]));// expandfilename(instring));
 
@@ -737,37 +774,44 @@ procedure TChmProject.ScanList(toscan,newfiles:TStrings;recursion:boolean);
 var
   localpath : string;
 
-procedure checkattributes(node:TDomNode;attributename:string;filelist :TStringList);
+function findattribute(node:TDomNode;attributename:string):String;
 var
     Attributes: TDOMNamedNodeMap;
     atnode    : TDomNode;
-    fn        : String;
     n         : integer;
 begin
+  Result := '';
   if assigned(node) then
     begin
       Attributes:=node.Attributes;
       if assigned(attributes) then
-         begin
-           for n:=0 to attributes.length-1 do
-             begin
-               atnode :=attributes[n];
-               if assigned(atnode) and (uppercase(atnode.nodename)=attributename) then
-                 begin
-                   if sanitizeurl(fbasepath,localpath+atnode.nodevalue,fn) then
-                    if not FileInTotalList(uppercase(fn)) then
-                      filelist.add(fn);
-                 end;
-             end;
-         end;
+        for n:=0 to attributes.length-1 do
+          begin
+            atnode :=attributes[n];
+            if assigned(atnode) and (uppercase(atnode.nodename)=attributename) then
+              exit(atnode.nodevalue);
+          end;
     end;
 end;
 
+procedure checkattributes(node:TDomNode;attributename:string; const localname: string; filelist :TStringList);
+var
+    fn  : String;
+    val : String;
+begin
+  val := findattribute(node,attributename);
+  if sanitizeurl(fbasepath,val,localpath,localname,fn) then
+    if (Length(fn) > 0) { Skip links to self using named anchors }
+        and not FileInTotalList(uppercase(fn)) then
+      filelist.add(fn);
+end;
 
-function scantags(prnt:TDomNode;filelist:TStringlist):TDomNode;
+
+function scantags(prnt:TDomNode; const localname: string; filelist:TStringlist):TDomNode;
 // Seach first matching tag in siblings
 var chld: TDomNode;
     s   : ansistring;
+    i   : Integer;
 begin
   result:=nil;
   if assigned(prnt )  then
@@ -775,24 +819,39 @@ begin
       chld:=prnt.firstchild;
       while assigned(chld) do
         begin
-          scantags(chld,filelist);  // depth first.
+          scantags(chld, localname, filelist);  // depth first.
           if (chld is TDomElement) then
             begin
               s:=uppercase(tdomelement(chld).tagname);
               if s='LINK' then
                 begin
                   //printattributes(chld,'');
-                  checkattributes(chld,'HREF',filelist);
+                  checkattributes(chld,'HREF',localname,filelist);
                 end;
              if s='IMG'then
                begin
                   //printattributes(chld,'');
-                  checkattributes(chld,'SRC',filelist);
+                  checkattributes(chld,'SRC',localname,filelist);
                end;
              if s='A'then
                begin
                   //printattributes(chld,'');
-                  checkattributes(chld,'HREF',filelist);
+                  checkattributes(chld,'HREF',localname,filelist);
+                  s := findattribute(chld,'NAME');
+                  if s <> '' then
+                    begin
+                      i := fAnchorList.IndexOf(localname+'#'+s);
+                      if i < 0 then begin
+                        fAnchorList.Add(localname+'#'+s);
+                        Error(ChmNote,'New Anchor with name '+s+' found while scanning '+localname,1);
+                      end else if fAnchorList.Objects[i] = nil then
+                        Error(chmwarning,'Duplicate anchor definitions with name '+s+' found while scanning '+localname,1)
+                      else begin
+                        fAnchorList.Objects[i].Free;
+                        fAnchorList.Objects[i] := nil;
+                        Error(ChmNote,'Anchor with name '+s+' defined while scanning '+localname,1);
+                      end;
+                    end;
                 end;
             end;
           chld:=chld.nextsibling;
@@ -818,7 +877,7 @@ begin
   if FileInTotalList(vn2) then
    begin
      Error(ChmNote,'Found duplicate file '+vn+' while scanning '+fn,1);
-     exit(false);
+     exit(true);
    end;
 
   result:=false;
@@ -851,7 +910,7 @@ begin
                localpath:=extractfilepath(fn);
                if (length(localpath)>0) and not (localpath[length(localpath)] in ['/','\']) then
                  localpath:=localpath+pathsep;
-               scantags(domdoc,localfilelist);
+               scantags(domdoc,extractfilename(fn),localfilelist);
                for i:=0 to localFilelist.count-1 do
                  begin
                    s:=localfilelist[i];
@@ -896,7 +955,7 @@ begin
   for i:=0 to it.count -1 do
     begin
       x:=it.item[i];
-      if sanitizeurl(fbasepath,x.local,S) then   // sanitize, remove stuff etc.
+      if sanitizeurl(fbasepath,x.local,'','Site Map for '+x.Text,S) then   // sanitize, remove stuff etc.
         begin
           if not FileInTotalList(uppercase(s)) then
             begin
@@ -1066,6 +1125,15 @@ begin
   Writer.Free;
 end;
 
+procedure TChmProject.ShowUndefinedAnchors;
+var
+  i:Integer;
+begin
+  for i := 0 to fAnchorList.Count-1 do
+    if fAnchorList.Objects[i] <> nil then
+      Error(chmerror,'Anchor '+fAnchorList[i]+' undefined; first use '+TFirstReference(fAnchorList.Objects[i]).Location);
+end;
+
 procedure TChmProject.LoadSitemaps;
 // #IDXHDR (merged files) goes into the system file, and need to keep  TOC sitemap around
 begin