IDE: TSimpleHTMLControl.HTMLToCaption: skip header

git-svn-id: trunk@30445 -
This commit is contained in:
mattias 2011-04-24 08:36:36 +00:00
parent 1c02717748
commit 0f6f5b9c53
2 changed files with 47 additions and 15 deletions

View File

@ -35,7 +35,7 @@ interface
uses uses
// FCL+LCL // FCL+LCL
Classes, SysUtils, AVL_Tree, LCLProc, LCLIntf, LCLType, Forms, Controls, Buttons, Classes, SysUtils, AVL_Tree, LCLProc, LCLIntf, LCLType, Forms, Controls, Buttons,
StdCtrls, Dialogs, ExtCtrls, FileProcs, Graphics, ButtonPanel, StdCtrls, Dialogs, ExtCtrls, FileProcs, Graphics, ButtonPanel, LConvEncoding,
// CodeTools // CodeTools
BasicCodeTools, CodeToolManager, CodeAtom, CodeCache, CustomCodeTool, CodeTree, BasicCodeTools, CodeToolManager, CodeAtom, CodeCache, CustomCodeTool, CodeTree,
PascalParserTool, FindDeclarationTool, PascalParserTool, FindDeclarationTool,
@ -265,19 +265,29 @@ function TSimpleHTMLControl.HTMLToCaption(const s: string; MaxLines: integer
var var
p: Integer; p: Integer;
EndPos: Integer; EndPos: Integer;
CurTag: String;
NewTag: String; NewTag: String;
Line: Integer; Line: Integer;
sp: LongInt; sp: LongInt;
InHeader: Boolean;
CurTagName: String;
begin begin
Result:=s; Result:=s;
//debugln(['TSimpleHTMLControl.HTMLToCaption HTML="',Result,'"']); //debugln(['TSimpleHTMLControl.HTMLToCaption HTML="',Result,'"']);
Line:=1; Line:=1;
p:=1; p:=1;
// remove UTF8 BOM
if copy(Result,1,3)=UTF8BOM then
Result:=copy(s,4,length(Result));
InHeader:=false; // it could be a snippet
while p<=length(Result) do begin while p<=length(Result) do begin
if Result[p]='<' then begin if Result[p]='<' then begin
// removes html tags // removes html tags
EndPos:=p+1; EndPos:=p+1;
if (EndPos<=length(Result)) and (Result[EndPos]='/') then inc(EndPos);
while (EndPos<=length(Result))
and (not (Result[EndPos] in [' ','>','"','/',#9,#10,#13])) do
inc(EndPos);
CurTagName:=UpperCase(copy(Result,p+1,EndPos-p-1));
while (EndPos<=length(Result)) do begin while (EndPos<=length(Result)) do begin
if Result[EndPos]='"' then begin if Result[EndPos]='"' then begin
// skip " tag // skip " tag
@ -291,26 +301,47 @@ begin
end; end;
inc(EndPos); inc(EndPos);
end; end;
CurTag:=copy(Result,p,EndPos-p);
if ((SysUtils.CompareText(CurTag,'<P>')=0) if CurTagName='HTML' then
or (SysUtils.CompareText(CurTag,'</P>')=0)) begin
then begin // it's a whole page
InHeader:=true;
end;
if CurTagName='BODY' then
begin
// start of body => ignore header
InHeader:=false;
Result:=copy(Result,EndPos,length(Result));
p:=1;
EndPos:=1;
Line:=1;
end;
if CurTagName='/BODY' then
begin
// end of body
Result:=copy(Result,1,p-1);
break;
end;
if (CurTagName='P') or (CurTagName='/P') then begin
// add a line break if there is not already one // add a line break if there is not already one
sp:=p; sp:=p;
while (sp>1) and (Result[sp-1] in [' ',#9]) do dec(sp); while (sp>1) and (Result[sp-1] in [' ',#9]) do dec(sp);
if (sp>1) and (not (Result[sp-1] in [#10,#13])) then if (sp>1) and (not (Result[sp-1] in [#10,#13])) then
CurTag:='<BR>'; CurTagName:='BR';
end;
if (CurTagName='DIV') or (CurTagName='/DIV')
then begin
// add a line break if not in first line
if Line>1 then
CurTagName:='BR';
end; end;
if (p>1) if CurTagName='BR' then
and ((SysUtils.CompareText(CurTag,'<BR>')=0) begin
or (SysUtils.CompareText(CurTag,'<DIV>')=0)
or (SysUtils.CompareText(copy(CurTag,1,5),'<DIV ')=0)
or (SysUtils.CompareText(CurTag,'</DIV>')=0))
then begin
NewTag:=LineEnding; NewTag:=LineEnding;
inc(Line); if not InHeader then
inc(Line);
if Line>MaxLines then begin if Line>MaxLines then begin
Result:=copy(Result,1,p)+LineEnding+'...'; Result:=copy(Result,1,p)+LineEnding+'...';
break; break;

View File

@ -31,6 +31,7 @@ const
EncodingUTF8BOM = 'utf8bom'; // UTF-8 with byte order mark EncodingUTF8BOM = 'utf8bom'; // UTF-8 with byte order mark
EncodingUCS2LE = 'ucs2le'; // UCS 2 byte little endian EncodingUCS2LE = 'ucs2le'; // UCS 2 byte little endian
EncodingUCS2BE = 'ucs2be'; // UCS 2 byte big endian EncodingUCS2BE = 'ucs2be'; // UCS 2 byte big endian
UTF8BOM = #$EF#$BB#$BF;
function GuessEncoding(const s: string): string; function GuessEncoding(const s: string): string;
@ -5842,7 +5843,7 @@ begin
end; end;
// try UTF-8 BOM (Byte Order Mark) // try UTF-8 BOM (Byte Order Mark)
if CompareI(@s[1],#$EF#$BB#$BF,3) then begin if CompareI(@s[1],UTF8BOM,3) then begin
Result:=EncodingUTF8BOM; Result:=EncodingUTF8BOM;
exit; exit;
end; end;