+ added new code to deal better with

html files generated from HTeX
  * Trying to get tables to format better

git-svn-id: trunk@5884 -
This commit is contained in:
pierre 2007-01-11 12:18:35 +00:00
parent ceec409cff
commit d2711a7013
3 changed files with 272 additions and 38 deletions

View File

@ -22,6 +22,7 @@ type
PTextFile = ^TTextFile;
TTextFile = object(TObject)
function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
function GetFileName : string; virtual;
end;
PMemoryTextFile = ^TMemoryTextFile;
@ -29,6 +30,7 @@ type
constructor Init;
procedure AddLine(const S: string); virtual;
function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
function GetFileName : string; virtual;
destructor Done; virtual;
private
Lines : PUnsortedStrCollection;
@ -37,6 +39,9 @@ type
PDOSTextFile = ^TDOSTextFile;
TDOSTextFile = object(TMemoryTextFile)
constructor Init(AFileName: string);
function GetFileName : string; virtual;
private
DosFileName : string;
end;
PSGMLParser = ^TSGMLParser;
@ -48,6 +53,7 @@ type
public
Line,LinePos: sw_integer;
procedure DocSoftBreak; virtual;
function GetFileName : string;
function DocAddTextChar(C: char): boolean; virtual;
procedure DocAddText(S: string); virtual;
procedure DocProcessTag(Tag: string); virtual;
@ -55,6 +61,7 @@ type
function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
private
CurTag: string;
FileName : string;
InTag,InComment,InString: boolean;
end;
@ -68,6 +75,7 @@ type
function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
public
TagName,TagParams: string;
DisableCrossIndexing : boolean;
procedure DocUnknownTag; virtual;
procedure DocTYPE; virtual;
procedure DocHTML(Entered: boolean); virtual;
@ -91,6 +99,8 @@ type
procedure DocStrong(Entered: boolean); virtual;
procedure DocTeleType(Entered: boolean); virtual;
procedure DocVariable(Entered: boolean); virtual;
procedure DocSpan(Entered: boolean); virtual;
procedure DocDiv(Entered: boolean); virtual;
procedure DocList(Entered: boolean); virtual;
procedure DocOrderedList(Entered: boolean); virtual;
procedure DocListItem; virtual;
@ -106,7 +116,8 @@ type
implementation
uses WUtils;
uses
WUtils;
function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
begin
@ -114,12 +125,23 @@ begin
GetLine:=false;
end;
function TTextFile.GetFileName : string;
begin
GetFileName:='unknown';
end;
constructor TMemoryTextFile.Init;
begin
inherited Init;
New(Lines, Init(500,500));
end;
function TMemoryTextFile.GetFileName : string;
begin
GetFileName:='unknown';
end;
procedure TMemoryTextFile.AddLine(const S: string);
begin
Lines^.Insert(NewStr(S));
@ -140,8 +162,10 @@ end;
destructor TMemoryTextFile.Done;
begin
if Lines<>nil then
Dispose(Lines, Done);
Lines:=nil;
inherited Done;
if Lines<>nil then Dispose(Lines, Done); Lines:=nil;
end;
constructor TDOSTextFile.Init(AFileName: string);
@ -167,28 +191,43 @@ constructor TDOSTextFile.Init(AFileName: string);
s[0]:=chr(i);
end;
{$endif}*)
var f: text;
var f: file;
linecomplete,hasCR: boolean;
S: string;
begin
inherited Init;
if AFileName='' then Fail;
{$I-}
Assign(f,AFileName);
Reset(f);
Reset(f,1);
if IOResult<>0 then Fail;
DosFileName:=AFileName;
Dispose(Lines,Done);
New(Lines, Init(500,2000));
while (Eof(f)=false) and (IOResult=0) do
begin
readln(f,S); { this is the one in WUTILS.PAS }
ReadlnFromFile(f,S,linecomplete,hasCR,true);
AddLine(S);
end;
Close(f);
{$I+}
end;
function TDosTextFile.GetFileName : string;
begin
GetFileName:=DosFileName;
end;
constructor TSGMLParser.Init;
begin
inherited Init;
FileName:='';
end;
function TSGMLParser.GetFileName : string;
begin
GetFileName:=FileName;
end;
function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
@ -198,12 +237,13 @@ begin
if HTMLFile=nil then Exit;
InTag:=false; InComment:=false; InString:=false; CurTag:='';
Line:=0; OK:=true;
FileName:=HTMLFile^.GetFileName;
repeat
LineOK:=HTMLFile^.GetLine(Line,S);
if LineOK then
begin
OK:=ProcessLine(S);
Inc(Line);
OK:=ProcessLine(S);
end;
until (LineOK=false) or (OK=false);
Process:=OK;
@ -295,9 +335,13 @@ begin
end;
{ whtml does not depend on whelp,
so I can not use hscLineBreak here. PM }
if InTag and InString then
CurTag:=CurTag+#0
else if WasThereAnyText then DocSoftBreak;
if InTag then
begin
if InString then
CurTag:=CurTag+#0;
end
else if WasThereAnyText then
DocSoftBreak;
ProcessLine:=true;
end;
@ -355,7 +399,7 @@ var Found: boolean;
Code: word;
CC: word;
begin
Found:=true; Code:=-1;
Found:=true; Code:=$ffff;
Name:=LowCaseStr(Name);
if copy(Name,1,1)='#' then
begin
@ -363,12 +407,23 @@ begin
Val('$'+copy(Name,3,255),Code,CC)
else
Val(copy(Name,2,255),Code,CC);
if CC<>0 then Code:=-1;
if CC<>0 then
begin
{$ifdef DEBUG}
DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
{$endif DEBUG}
Code:=$ffff;
end;
end;
if (Code=$22) or (Name='quot') then E:='"' else { double quote sign }
if (Code=$26) or (Name='amp') then E:='&' else { ampersand }
if (Code=$3C) or (Name='lt') then E:='<' else { less-than sign }
if (Code=$3E) or (Name='gt') then E:='>' else { greater-than sign }
if (Code=$26) or (Name='amp') then E:='&' else { ampersand }
if (Code=$22) or (Name='quot') then E:='"' else { double quote sign }
if (Code=$5B) then E:='[' else { [ }
if (Code=$5C) then E:='\' else { \ }
if (Code=$5D) then E:=']' else { ] }
if (Code=$5E) then E:='^' else { ^ }
if (Code=$5F) then E:='_' else { _ }
if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
if (Code=161) or (Name='iexcl') then E:='­' else { inverted excalamation mark }
if (Code=162) or (Name='cent') then E:='' else { cent sign }
@ -465,13 +520,24 @@ begin
if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
(* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
{ Special codes appearing in TeXH generated files }
if (Code=8217) then E:='''' else { acute accent as generated by TeXH }
if (code=$2c6) then E:='^' else { Modifier Letter Circumflex Accent }
if (code=$2013) then E:='-' else { En dash }
if (code=$2014) then E:='--' else { Em dash }
if (code=$201D) then E:='``' else { right double quotation marks }
if (Code=$FB00) then E:='ff' else { ff together }
if (Code=$FB01) then E:='fi' else { fi together }
if (Code=$FB02) then E:='fl' else { fl together }
if (Code=$FB03) then E:='ffi' else { ffi together }
Found:=false;
DocDecodeNamedEntity:=Found;
{$ifdef DEBUG}
if (Code<>$ffff) and not found then
begin
DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
end;
{$endif DEBUG}
end;
procedure THTMLParser.DocProcessTag(Tag: string);
@ -516,6 +582,8 @@ begin
if (ETagName='STRONG') then DocStrong(NotEndTag) else
if (ETagName='TT') then DocTeleType(NotEndTag) else
if (ETagName='VAR') then DocVariable(NotEndTag) else
if (ETagName='SPAN') then DocSpan(NotEndTag) else
if (ETagName='DIV') then DocDiv(NotEndTag) else
{ Unordered & ordered lists }
if (ETagName='UL') then DocList(NotEndTag) else
if (ETagName='OL') then DocOrderedList(NotEndTag) else
@ -680,6 +748,37 @@ procedure THTMLParser.DocVariable(Entered: boolean);
begin
end;
procedure THTMLParser.DocSpan(Entered: boolean);
begin
end;
procedure THTMLParser.DocDiv(Entered: boolean);
var
S: String;
begin
if Entered then
begin
if DocGetTagParam('CLASS',S) then
if S='crosslinks' then
begin
DisableCrossIndexing:=true;
{$ifdef DEBUG}
DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
{$endif DEBUG}
end;
end
else
begin
{$ifdef DEBUG}
if DisableCrossIndexing then
begin
DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
end;
{$endif DEBUG}
DisableCrossIndexing:=false;
end;
end;
procedure THTMLParser.DocList(Entered: boolean);
begin
end;

View File

@ -39,7 +39,7 @@ type
PTableElement = ^TTableElement;
TTableElement = object(Tobject)
TextBegin,TextEnd : sw_word;
TextBegin,TextEnd, TextLength, NumNL : sw_word;
Alignment : TParagraphAlign;
NextEl : PTableElement;
constructor init(AAlignment : TParagraphAlign);
@ -101,6 +101,7 @@ type
procedure DocStrong(Entered: boolean); virtual;
procedure DocTeleType(Entered: boolean); virtual;
procedure DocVariable(Entered: boolean); virtual;
procedure DocSpan(Entered: boolean); virtual;
procedure DocList(Entered: boolean); virtual;
procedure DocOrderedList(Entered: boolean); virtual;
procedure DocListItem; virtual;
@ -138,6 +139,8 @@ type
procedure AddChar(C: char);
procedure AddCharAt(C: char;AtPtr : sw_word);
function AddTextAt(const S: string;AtPtr : sw_word) : sw_word;
function ComputeTextLength(TStart,TEnd : sw_word) : sw_word;
end;
PCustomHTMLHelpFile = ^TCustomHTMLHelpFile;
@ -198,7 +201,8 @@ procedure RegisterHelpType;
implementation
uses Views,WConsts,WUtils,WViews,WHTMLScn;
uses
Views,WConsts,WUtils,WViews,WHTMLScn;
@ -281,6 +285,8 @@ procedure TTable.TextInsert(Pos : sw_word;const S : string);
var
i : sw_word;
begin
if S='' then
exit;
i:=Renderer^.AddTextAt(S,Pos+GlobalOffset);
GlobalOffset:=GlobalOffset+i;
end;
@ -293,26 +299,48 @@ type
PLengthArray = ^TLengthArray;
var
ColLengthArray : PLengthArray;
RowSizeArray : PLengthArray;
CurLine : PTableLine;
CurEl : PTableElement;
Align : TParagraphAlign;
TextBegin,TextEnd : sw_word;
i,j,Length : sw_word;
i,j,k,Length : sw_word;
begin
GetMem(ColLengthArray,Sizeof(sw_word)*NumCols);
FillChar(ColLengthArray^,Sizeof(sw_word)*NumCols,#0);
GetMem(RowSizeArray,Sizeof(sw_word)*NumLines);
FillChar(RowSizeArray^,Sizeof(sw_word)*NumLines,#0);
{ Compute the largest cell }
CurLine:=FirstLine;
For i:=1 to NumLines do
begin
CurEl:=CurLine^.FirstEl;
RowSizeArray^[i]:=1;
For j:=1 to NumCols do
begin
if not assigned(CurEl) then
break;
Length:=CurEl^.TextEnd-CurEl^.TextBegin;
Length:=CurEl^.TextLength;
if assigned(CurEl^.NextEl) and
(CurEl^.NextEl^.TextBegin>CurEl^.TextEnd) then
Inc(Length,Renderer^.ComputeTextLength(
CurEl^.NextEl^.TextBegin+GlobalOffset,
CurEl^.TextBegin+GlobalOffset));
if Length>ColLengthArray^[j] then
ColLengthArray^[j]:=Length;
{ We need to handle multiline cells... }
if CurEl^.NumNL>=RowSizeArray^[i] then
RowSizeArray^[i]:=CurEl^.NumNL;
{ We don't handle multiline cells yet... }
if CurEl^.NumNL>=1 then
begin
for k:=CurEl^.TextBegin+GlobalOffset to
CurEl^.TextEnd+GlobalOffset do
if Renderer^.Topic^.Text^[k]=ord(hscLineBreak) then
Renderer^.Topic^.Text^[k]:=ord(' ');
end;
CurEl:=CurEl^.NextEl;
end;
CurLine:=CurLine^.NextLine;
@ -346,14 +374,16 @@ begin
begin
TextBegin:=CurEl^.TextBegin;
TextEnd:=CurEl^.TextEnd;
While (TextEnd>TextBegin) and
{While (TextEnd>TextBegin) and
(Renderer^.Topic^.Text^[TextEnd+GlobalOffset]=ord(hscLineBreak)) do
dec(TextEnd);
Length:=TextEnd-TextBegin;
dec(TextEnd); }
Length:=CurEl^.TextLength;
Align:=CurEl^.Alignment;
end;
if WithBorder then
TextInsert(TextBegin,#179);
TextInsert(TextBegin,#179)
else
TextInsert(TextBegin,' ');
if Length<ColLengthArray^[j] then
begin
case Align of
@ -373,6 +403,7 @@ begin
end;
if WithBorder then
TextInsert(TextEnd,#179);
//TextInsert(TextEnd,hscLineBreak);
CurLine:=CurLine^.NextLine;
end;
If (NumLines>0) and WithBorder then
@ -389,6 +420,8 @@ begin
TextInsert(TextEnd,hscLineBreak);
End;
FreeMem(ColLengthArray,Sizeof(sw_word)*NumCols);
FreeMem(RowSizeArray,Sizeof(sw_word)*NumLines);
end;
destructor TTable.Done;
@ -555,7 +588,7 @@ end;
procedure THTMLTopicRenderer.DocSoftBreak;
begin
if InPreformatted then DocBreak else
if AnyCharsInLine then
if AnyCharsInLine and not assigned(CurrentTable) then
begin
AddChar(' ');
LastTextChar:=' ';
@ -609,16 +642,20 @@ begin
begin
if DocGetTagParam('HREF',HRef)=false then HRef:='';
if DocGetTagParam('NAME',Name)=false then Name:='';
if (HRef='') and (Name='') then
if DocGetTagParam('ID',Name)=false then
Name:='';
if Name<>'' then
begin
Topic^.NamedMarks^.InsertStr(Name);
AddChar(hscNamedMark);
end;
if (HRef<>'') then
if (HRef<>'')then
begin
InAnchor:=true;
AddChar(hscLink);
if LinkPtr<MaxTopicLinks then
if (LinkPtr<MaxTopicLinks){and
not DisableCrossIndexing} then
begin
HRef:=CompleteURL(URL,HRef);
LinkIndexes[LinkPtr]:=TopicLinks^.AddItem(HRef);
@ -825,6 +862,10 @@ procedure THTMLTopicRenderer.DocVariable(Entered: boolean);
begin
end;
procedure THTMLTopicRenderer.DocSpan(Entered: boolean);
begin
end;
procedure THTMLTopicRenderer.DocList(Entered: boolean);
begin
if Entered then
@ -892,7 +933,8 @@ begin
CurrentTable:=ATable;
CurrentTable^.Renderer:=@Self;
if DocGetTagParam('BORDER',border) then
CurrentTable^.WithBorder:=true;
if Border<>'0' then
CurrentTable^.WithBorder:=true;
end
else
begin
@ -907,7 +949,9 @@ procedure THTMLTopicRenderer.DocTableRow(Entered: boolean);
var
ATableLine : PTableLine;
begin
if AnyCharsInLine then
if AnyCharsInLine or
(assigned(CurrentTable) and
assigned(CurrentTable^.FirstLine)) then
begin
AddChar(hscLineBreak);
AnyCharsInLine:=false;
@ -924,6 +968,7 @@ end;
procedure THTMLTopicRenderer.DocTableItem(Entered: boolean);
var
Align : String;
i : sw_word;
NewEl : PTableElement;
PAlignEl : TParagraphAlign;
begin
@ -934,6 +979,9 @@ begin
begin
NewEl:=CurrentTable^.LastLine^.LastEl;
NewEl^.TextEnd:=TextPtr;
NewEl^.TextLength:=ComputeTextLength(
NewEl^.TextBegin+CurrentTable^.GlobalOffset,
TextPtr+CurrentTable^.GlobalOffset);
end;
PAlignEl:=paLeft;
if DocGetTagParam('ALIGN',Align) then
@ -948,6 +996,15 @@ begin
begin
NewEl:=CurrentTable^.LastLine^.LastEl;
NewEl^.TextEnd:=TextPtr;
NewEl^.TextLength:=ComputeTextLength(
NewEl^.TextBegin+CurrentTable^.GlobalOffset,
TextPtr+CurrentTable^.GlobalOffset);
NewEl^.NumNL:=0;
for i:=NewEl^.TextBegin to TextPtr do
begin
if Topic^.Text^[i]=ord(hscLineBreak) then
inc(NewEl^.NumNL);
end;
end;
end;
@ -991,6 +1048,36 @@ begin
AddChar(S[I]);
end;
function THTMLTopicRenderer.ComputeTextLength(TStart,TEnd : sw_word) : sw_word;
var I,tot: sw_integer;
begin
tot:=0;
i:=TStart;
while i<= TEnd-1 do
begin
inc(tot);
case chr(Topic^.Text^[i]) of
hscLink,hscCode,
hscCenter,hscRight,
hscNamedMark,hscNormText :
Dec(tot);{ Do not increase tot }
hscDirect:
begin
Inc(i); { Skip next }
//Inc(tot);
end;
hscTextAttr,
hscTextColor:
begin
Inc(i);
Dec(tot);
end;
end;
inc(i);
end;
ComputeTextLength:=tot;
end;
function THTMLTopicRenderer.AddTextAt(const S: String;AtPtr : sw_word) : sw_word;
var
i,slen,len : sw_word;
@ -1010,6 +1097,7 @@ begin
begin
Topic^.Text^[AtPtr]:=ord(S[i]);
Inc(TextPtr);
inc(AtPtr);
if (TextPtr=MaxBytes) then Exit;
end;
AddTextAt:=slen;
@ -1125,10 +1213,18 @@ begin
OK:=T<>nil;
if OK then
begin
if T^.HelpCtx=0 then Name:=DefaultFileName else
if T^.HelpCtx=0 then
begin
Name:=DefaultFileName;
P:=0;
end
else
begin
Link:=TopicLinks^.At((T^.HelpCtx and $ffff)-1)^;
Link:=FormatPath(Link);
{$ifdef DEBUG_WHTMLHLP}
DebugMessage(Link,' looking for',1,1);
{$endif DEBUG_WHTMLHLP}
P:=Pos('#',Link);
if P>0 then
begin
@ -1139,9 +1235,29 @@ begin
Name:=CompletePath(CurFileName,Link);}
Name:=Link;
end;
HTMLFile:=New(PDOSTextFile, Init(Name));
if HTMLFile=nil then
HTMLFile:=nil;
if Name<>'' then
HTMLFile:=New(PDOSTextFile, Init(Name));
if (HTMLFile=nil)and (CurFileName<>'') then
begin
Name:=CurFileName;
HTMLFile:=New(PDOSTextFile, Init(Name));
end;
if (HTMLFile=nil) then
begin
{$ifdef DEBUG}
DebugMessage(Link,' filename not known :(',1,1);
{$endif DEBUG}
end;
if (p>1) and (HTMLFile=nil) then
begin
{$ifdef DEBUG}
if p>0 then
DebugMessage(Name,Link+'#'+Bookmark+' not found',1,1)
else
DebugMessage(Name,Link+' not found',1,1);
{$endif DEBUG}
New(HTMLFile, Init);
HTMLFile^.AddLine('<HEAD><TITLE>'+msg_pagenotavailable+'</TITLE></HEAD>');
HTMLFile^.AddLine(
@ -1150,7 +1266,17 @@ begin
'</BODY>');
end;
OK:=Renderer^.BuildTopic(T,Name,HTMLFile,TopicLinks);
if OK then CurFileName:=Name;
if OK then
CurFileName:=Name
else
begin
{$ifdef DEBUG}
if p>0 then
DebugMessage(Name,Link+'#'+Bookmark+' not found',1,1)
else
DebugMessage(Name,Link+' not found',1,1);
{$endif DEBUG}
end;
if HTMLFile<>nil then Dispose(HTMLFile, Done);
if BookMark='' then
T^.StartNamedMark:=0
@ -1229,7 +1355,8 @@ begin
TLI:=TopicLinks^.AddItem(LS^.GetDocumentURL(I));
TLI:=EncodeHTMLCtx(ID,TLI+1);
for J:=0 to LS^.GetDocumentAliasCount(I)-1 do
IndexEntries^.Insert(NewIndexEntry(FormatAlias(LS^.GetDocumentAlias(I,J)),ID,TLI));
IndexEntries^.Insert(NewIndexEntry(
FormatAlias(LS^.GetDocumentAlias(I,J)),ID,TLI));
end;
Dispose(LS, Done);
end;

View File

@ -136,7 +136,8 @@ procedure RegisterWHTMLScan;
implementation
uses WUtils;
uses
WUtils;
const
RHTMLLinkScanDocument: TStreamRec = (
@ -183,7 +184,8 @@ begin
else
begin
CurLinkText:=Trim(CurLinkText);
if CheckURL(CurURL) and CheckText(CurLinkText) or InNameAnchor then
if InNameAnchor or
(CheckURL(CurURL) and CheckText(CurLinkText)and not DisableCrossIndexing) then
AddLink(CurLinkText,CurURL);
InNameAnchor:=false;
end;
@ -586,11 +588,17 @@ begin
CurDoc:=Doc^.GetDocumentURL;
New(F, Init(Doc^.GetDocumentURL));
if Assigned(F) then
begin
CurBaseURL:=CompleteURL(Doc^.GetDocumentURL,'');
Process(F);
Dispose(F, Done);
end;
begin
CurBaseURL:=CompleteURL(Doc^.GetDocumentURL,'');
Process(F);
Dispose(F, Done);
end
else
begin
{$ifdef DEBUG}
DebugMessage(CurDoc,'file not found',1,1);
{$endif DEBUG}
end;
Doc^.State:=ssScanned;
CurDoc:='';
end;