
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@5319 8e941d3f-bd1b-0410-a28a-d453659cc2b4
1192 lines
38 KiB
ObjectPascal
1192 lines
38 KiB
ObjectPascal
unit fpsHTMLUtils;
|
||
|
||
{$mode objfpc}{$H+}
|
||
|
||
interface
|
||
|
||
uses
|
||
Classes, SysUtils, contnrs, fpstypes, fpspreadsheet;
|
||
|
||
type
|
||
TsHTMLEntity = record
|
||
E: String;
|
||
Ch: String;
|
||
N: Word;
|
||
end;
|
||
|
||
function CleanHTMLString(AText: String): String;
|
||
function IsHTMLEntity(AText: PChar; out AEntity: TsHTMLEntity): Boolean;
|
||
function RemoveHTMLEntities(const AText: String): String;
|
||
|
||
type
|
||
TsHTMLAttr = class
|
||
Name: String;
|
||
Value: String;
|
||
constructor Create(AName, AValue: String);
|
||
end;
|
||
|
||
TsHTMLAttrList = class(TObjectList)
|
||
private
|
||
function GetItem(AIndex: Integer): TsHTMLAttr;
|
||
procedure SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
||
protected
|
||
procedure ParseStyle(AStyle: String);
|
||
public
|
||
function IndexOfName(AName: String): Integer;
|
||
procedure Parse(AHTML: String);
|
||
property Items[AIndex: Integer]: TsHTMLAttr read GetItem write SetItem; default;
|
||
end;
|
||
|
||
TsTagCase = (tcLowercase, tcUppercase, tcProperCase);
|
||
|
||
procedure HTMLToRichText(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
const AHTMLText: String; out APlainText: String;
|
||
out ARichTextParams: TsRichTextParams);
|
||
|
||
procedure RichTextToHTML(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
const APlainText: String; const ARichTextParams: TsRichTextParams;
|
||
out AHTMLText: String; APrefix:String = ''; ATagCase: TsTagCase = tcLowercase);
|
||
|
||
|
||
implementation
|
||
|
||
uses
|
||
math, lazUtf8, fasthtmlparser,
|
||
fpsUtils, fpsClasses;
|
||
|
||
const
|
||
// http://unicode.e-workers.de/entities.php
|
||
// http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
|
||
HTMLEntities: array[0..250] of TsHTMLEntity = (
|
||
// A
|
||
(E: 'Acirc'; Ch: 'Â'; N: 194), // 0
|
||
(E: 'acirc'; Ch: 'â'; N: 226),
|
||
(E: 'acute'; Ch: '´'; N: 180),
|
||
(E: 'AElig'; Ch: 'Æ'; N: 198),
|
||
(E: 'aelig'; Ch: 'æ'; N: 230),
|
||
(E: 'Agrave'; Ch: 'À'; N: 192),
|
||
(E: 'agrave'; Ch: 'à'; N: 224),
|
||
(E: 'alefsym';Ch: #$E2#$84#$B5; N: 8501),
|
||
(E: 'Alpha'; Ch: 'Α'; N: 913),
|
||
(E: 'alpha'; Ch: 'α'; N: 945),
|
||
(E: 'amp'; Ch: '&'; N: 38), // 10
|
||
(E: 'and'; Ch: '∧'; N: 8743),
|
||
(E: 'ang'; Ch: '∠'; N: 8736),
|
||
(E: 'apos'; Ch: ''''; N: 39),
|
||
(E: 'Aring'; Ch: 'Å'; N: 197),
|
||
(E: 'aring'; Ch: 'å'; N: 229),
|
||
(E: 'asymp'; Ch: '≈'; N: 2248),
|
||
(E: 'Atilde'; Ch: 'Ã'; N: 195),
|
||
(E: 'atilde'; Ch: 'ã'; N: 227),
|
||
(E: 'Auml'; Ch: 'Ä'; N: 196),
|
||
(E: 'auml'; Ch: 'ä'; N: 228), // 20
|
||
// B
|
||
(E: 'bdquo'; Ch: '„'; N: 8222), // 21
|
||
(E: 'Beta'; Ch: 'Β'; N: 914),
|
||
(E: 'beta'; Ch: 'β'; N: 946),
|
||
(E: 'brvbar'; Ch: '¦'; N: 166),
|
||
(E: 'bull'; Ch: '•'; N: 8226),
|
||
// C
|
||
(E: 'cap'; Ch: '∩'; N: 8745), // 26
|
||
(E: 'Ccedil'; Ch: 'Ç'; N: 199),
|
||
(E: 'ccedil'; Ch: 'ç'; N: 231),
|
||
(E: 'cedil'; Ch: '¸'; N: 184),
|
||
(E: 'cent'; Ch: '¢'; N: 162), // 30
|
||
(E: 'Chi'; Ch: 'Χ'; N: 935),
|
||
(E: 'chi'; Ch: 'χ'; N: 967),
|
||
(E: 'circ'; Ch: 'ˆ'; N: 710),
|
||
(E: 'clubs'; Ch: '♣'; N: 9827),
|
||
(E: 'cong'; Ch: #$E2#$89#$85; N: 8773), // approximately equal
|
||
(E: 'copy'; Ch: '©'; N: 169),
|
||
(E: 'crarr'; Ch: #$E2#$86#$B5; N: 8629), // carriage return
|
||
(E: 'cup'; Ch: '∪'; N: 8746),
|
||
(E: 'curren'; Ch: '¤'; N: 164),
|
||
// D
|
||
(E: 'Dagger'; Ch: '‡'; N: 8225), // 40
|
||
(E: 'dagger'; Ch: '†'; N: 8224),
|
||
(E: 'dArr'; Ch: #$E2#$87#$93; N: 8659), // wide down-arrow
|
||
(E: 'darr'; Ch: '↓'; N: 8595), // narrow down-arrow
|
||
(E: 'deg'; Ch: '°'; N: 176),
|
||
(E: 'Delta'; Ch: 'Δ'; N: 916),
|
||
(E: 'delta'; Ch: 'δ'; N: 948),
|
||
(E: 'diams'; Ch: '♦'; N: 9830),
|
||
(E: 'divide'; Ch: '÷'; N: 247),
|
||
// E
|
||
(E: 'Eacute'; Ch: 'É'; N: 201), // 49
|
||
(E: 'eacute'; Ch: 'é'; N: 233), // 50
|
||
(E: 'Ecirc'; Ch: 'Ê'; N: 202),
|
||
(E: 'ecirc'; Ch: 'ê'; N: 234),
|
||
(E: 'Egrave'; Ch: 'È'; N: 200),
|
||
(E: 'egrave'; Ch: 'è'; N: 232),
|
||
(E: 'empty'; Ch: #$e2#$88#$85; N: 8709),
|
||
(E: 'emsp'; Ch: #$E2#$80#$83; N: 8195), // Space character width of "m"
|
||
(E: 'ensp'; Ch: #$E2#$80#$82; N: 8194), // Space character width of "n"
|
||
(E: 'Epsilon';Ch: 'Ε'; N: 917), // capital epsilon
|
||
(E: 'epsilon';Ch: 'ε'; N: 949),
|
||
(E: 'equiv'; Ch: '≡'; N: 8801), // 60
|
||
(E: 'Eta'; Ch: 'Η'; N: 919),
|
||
(E: 'eta'; Ch: 'η'; N: 951),
|
||
(E: 'ETH'; Ch: 'Ð'; N: 208),
|
||
(E: 'eth'; Ch: 'ð'; N: 240),
|
||
(E: 'Euml'; Ch: 'Ë'; N: 203),
|
||
(E: 'euml'; Ch: 'ë'; N: 235),
|
||
(E: 'euro'; Ch: '€'; N: 8364),
|
||
(E: 'exist'; Ch: '∃'; N: 8707),
|
||
// F
|
||
(E: 'fnof'; Ch: 'ƒ'; N: 402), // 70
|
||
(E: 'forall'; Ch: '∀'; N: 8704),
|
||
(E: 'frac12'; Ch: '½'; N: 189),
|
||
(E: 'frac14'; Ch: '¼'; N: 188),
|
||
(E: 'frac34'; Ch: '¾'; N: 190),
|
||
(E: 'frasl'; Ch: '⁄'; N: 8260),
|
||
// G
|
||
(E: 'Gamma'; Ch: 'Γ'; N: 915),
|
||
(E: 'gamma'; Ch: 'γ'; N: 947),
|
||
(E: 'ge'; Ch: '≥'; N: 8805),
|
||
(E: 'gt'; Ch: '>'; N: 62),
|
||
// H
|
||
(E: 'hArr'; Ch: '⇔'; N: 8660), // 80, wide horizontal double arrow
|
||
(E: 'harr'; Ch: '↔'; N: 8596), // narrow horizontal double arrow
|
||
(E: 'hearts'; Ch: '♥'; N: 9829),
|
||
(E: 'hellip'; Ch: '…'; N: 8230),
|
||
// I
|
||
(E: 'Iacute'; Ch: 'Í'; N: 205),
|
||
(E: 'iacute'; Ch: 'í'; N: 237),
|
||
(E: 'Icirc'; Ch: 'Î'; N: 206),
|
||
(E: 'icirc'; Ch: 'î'; N: 238),
|
||
(E: 'iexcl'; Ch: '¡'; N: 161),
|
||
(E: 'Igrave'; Ch: 'Ì'; N: 204),
|
||
(E: 'igrave'; Ch: 'ì'; N: 236), // 90
|
||
(E: 'image'; Ch: #$E2#$84#$91; N: 2465), // I in factura
|
||
(E: 'infin'; Ch: '∞'; N: 8734),
|
||
(E: 'int'; Ch: '∫'; N: 8747),
|
||
(E: 'Iota'; Ch: 'Ι'; N: 921),
|
||
(E: 'iota'; Ch: 'ι'; N: 953),
|
||
(E: 'iquest'; Ch: '¿'; N: 191),
|
||
(E: 'isin'; Ch: '∈'; N: 8712),
|
||
(E: 'Iuml'; Ch: 'Ï'; N: 207),
|
||
(E: 'iuml'; Ch: 'ï'; N: 239),
|
||
// K
|
||
(E: 'Kappa'; Ch: 'Κ'; N: 922), // 100
|
||
(E: 'kappa'; Ch: 'κ'; N: 254),
|
||
// L
|
||
(E: 'Lambda'; Ch: 'Λ'; N: 923),
|
||
(E: 'lambda'; Ch: 'λ'; N: 955),
|
||
(E: 'lang'; Ch: '⟨'; N: 9001), // Left-pointing angle bracket
|
||
(E: 'laquo'; Ch: '«'; N: 171),
|
||
(E: 'lArr'; Ch: #$E2#$87#$90; N: 8656), // Left-pointing wide arrow
|
||
(E: 'larr'; Ch: '←'; N: 8592),
|
||
(E: 'lceil'; Ch: '⌈'; N: 8968), // Left ceiling
|
||
(E: 'ldquo'; Ch: '“'; N: 8220),
|
||
(E: 'le'; Ch: '≤'; N: 8804), // 110
|
||
(E: 'lfloor'; Ch: '⌊'; N: 8970), // Left floor
|
||
(E: 'lowast'; Ch: #$e2#$88#$97; N: 8727), // Low asterisk
|
||
(E: 'loz'; Ch: '◊'; N: 9674),
|
||
(E: 'lrm'; Ch: ''; N: 8206), // Left-to-right mark
|
||
(E: 'lsaquo'; Ch: '‹'; N: 8249),
|
||
(E: 'lsquo'; Ch: '‘'; N: 8216),
|
||
(E: 'lt'; Ch: '<'; N: 60),
|
||
// M
|
||
(E: 'macr'; Ch: '¯'; N: 175),
|
||
(E: 'mdash'; Ch: '—'; N: 8212),
|
||
(E: 'micro'; Ch: 'µ'; N: 181), // 120
|
||
(E: 'middot'; Ch: '·'; N: 183),
|
||
(E: 'minus'; Ch: '−'; N: 8722),
|
||
(E: 'Mu'; Ch: 'Μ'; N: 924),
|
||
(E: 'mu'; Ch: 'μ'; N: 956),
|
||
// N
|
||
(E: 'nabla'; Ch: '∇'; N: 8711),
|
||
(E: 'nbsp'; Ch: ' '; N: 160), // 126
|
||
(E: 'ndash'; Ch: '–'; N: 8211),
|
||
(E: 'ne'; Ch: '≠'; N: 8800),
|
||
(E: 'ni'; Ch: '∋'; N: 8715),
|
||
(E: 'not'; Ch: '¬'; N: 172), // 130
|
||
(E: 'notin'; Ch: #$e2#$88#$89; N: 8713), // math: "not in"
|
||
(E: 'nsub'; Ch: #$e2#$8a#$84; N: 8836), // math: "not a subset of"
|
||
(E: 'Ntilde'; Ch: 'Ñ'; N: 209),
|
||
(E: 'ntilde'; Ch: 'ñ'; N: 241),
|
||
(E: 'Nu'; Ch: 'Ν'; N: 925),
|
||
(E: 'nu'; Ch: 'ν'; N: 957),
|
||
// O
|
||
(E: 'Oacute'; Ch: 'Ó'; N: 211),
|
||
(E: 'oacute'; Ch: 'ó'; N: 243),
|
||
(E: 'Ocirc'; Ch: 'Ô'; N: 212),
|
||
(E: 'ocirc'; Ch: 'ô'; N: 244),
|
||
(E: 'OElig'; Ch: 'Œ'; N: 338),
|
||
(E: 'oelig'; Ch: 'œ'; N: 339),
|
||
(E: 'Ograve'; Ch: 'Ò'; N: 210),
|
||
(E: 'ograve'; Ch: 'ò'; N: 242),
|
||
(E: 'oline'; Ch: '‾'; N: 8254),
|
||
(E: 'Omega'; Ch: 'Ω'; N: 937),
|
||
(E: 'omega'; Ch: 'ω'; N: 969),
|
||
(E: 'Omicron';Ch: 'Ο'; N: 927),
|
||
(E: 'omicron';Ch: 'ο'; N: 959),
|
||
(E: 'oplus'; Ch: #$e2#$8a#$95; N: 8853), // Circled plus
|
||
(E: 'or'; Ch: '∨'; N: 8744),
|
||
(E: 'ordf'; Ch: 'ª'; N: 170),
|
||
(E: 'ordm'; Ch: 'º'; N: 186),
|
||
(E: 'Oslash'; Ch: 'Ø'; N: 216),
|
||
(E: 'oslash'; Ch: 'ø'; N: 248),
|
||
(E: 'Otilde'; Ch: 'Õ'; N: 213),
|
||
(E: 'otilde'; Ch: 'õ'; N: 245),
|
||
(E: 'otimes'; Ch: #$E2#$8A#$97; N: 8855), // Circled times
|
||
(E: 'Ouml'; Ch: 'Ö'; N: 214),
|
||
(E: 'ouml'; Ch: 'ö'; N: 246),
|
||
// P
|
||
(E: 'para'; Ch: '¶'; N: 182),
|
||
(E: 'part'; Ch: '∂'; N: 8706),
|
||
(E: 'permil'; Ch: '‰'; N: 8240),
|
||
(E: 'perp'; Ch: '⊥'; N: 8869),
|
||
(E: 'Phi'; Ch: 'Φ'; N: 934),
|
||
(E: 'phi'; Ch: 'φ'; N: 966),
|
||
(E: 'Pi'; Ch: 'Π'; N: 928),
|
||
(E: 'pi'; Ch: 'π'; N: 960), // lower-case pi
|
||
(E: 'piv'; Ch: 'ϖ'; N: 982),
|
||
(E: 'plusmn'; Ch: '±'; N: 177),
|
||
(E: 'pound'; Ch: '£'; N: 163),
|
||
(E: 'Prime'; Ch: '″'; N: 8243),
|
||
(E: 'prime'; Ch: '′'; N: 8242),
|
||
(E: 'prod'; Ch: '∏'; N: 8719),
|
||
(E: 'prop'; Ch: '∝'; N: 8733),
|
||
(E: 'Psi'; Ch: 'Ψ'; N: 936),
|
||
(E: 'psi'; Ch: 'ψ'; N: 968),
|
||
// Q
|
||
(E: 'quot'; Ch: '"'; N: 34),
|
||
// R
|
||
(E: 'radic'; Ch: '√'; N: 8730),
|
||
(E: 'rang'; Ch: '⟩'; N: 9002), // right-pointing angle bracket
|
||
(E: 'raquo'; Ch: '»'; N: 187),
|
||
(E: 'rArr'; Ch: '⇒'; N: 8658),
|
||
(E: 'rarr'; Ch: '→'; N: 8594),
|
||
(E: 'rceil'; Ch: '⌉'; N: 8969), // right ceiling
|
||
(E: 'rdquo'; Ch: '”'; N: 8221),
|
||
(E: 'real'; Ch: #$E2#$84#$9C; N: 8476), // R in factura
|
||
(E: 'reg'; Ch: '®'; N: 174),
|
||
(E: 'rfloor'; Ch: '⌋'; N: 8971), // Right floor
|
||
(E: 'Rho'; Ch: 'Ρ'; N: 929),
|
||
(E: 'rho'; Ch: 'ρ'; N: 961),
|
||
(E: 'rlm'; Ch: ''; N: 8207), // right-to-left mark
|
||
(E: 'rsaquo'; Ch: '›'; N: 8250),
|
||
(E: 'rsquo'; Ch: '’'; N: 8217),
|
||
|
||
// S
|
||
(E: 'sbquo'; Ch: '‚'; N: 8218),
|
||
(E: 'Scaron'; Ch: 'Š'; N: 352),
|
||
(E: 'scaron'; Ch: 'š'; N: 353),
|
||
(E: 'sdot'; Ch: #$E2#$8B#$85; N: 8901), // math: dot operator
|
||
(E: 'sect'; Ch: '§'; N: 167),
|
||
(E: 'shy'; Ch: #$C2#$AD; N: 173), // conditional hyphen
|
||
(E: 'Sigma'; Ch: 'Σ'; N: 931),
|
||
(E: 'sigma'; Ch: 'σ'; N: 963),
|
||
(E: 'sigmaf'; Ch: 'ς'; N: 962),
|
||
(E: 'sim'; Ch: #$E2#$88#$BC; N: 8764), // similar
|
||
(E: 'spades'; Ch: '♠'; N: 9824),
|
||
(E: 'sub'; Ch: '⊂'; N: 8834),
|
||
(E: 'sube'; Ch: '⊆'; N: 8838),
|
||
(E: 'sum'; Ch: '∑'; N: 8721),
|
||
(E: 'sup'; Ch: '⊃'; N: 8835),
|
||
(E: 'sup1'; Ch: '¹'; N: 185),
|
||
(E: 'sup2'; Ch: '²'; N: 178),
|
||
(E: 'sup3'; Ch: '³'; N: 179),
|
||
(E: 'supe'; Ch: '⊇'; N: 8839),
|
||
(E: 'szlig'; Ch: 'ß'; N: 223),
|
||
//T
|
||
(E: 'Tau'; Ch: 'Τ'; N: 932),
|
||
(E: 'tau'; Ch: 'τ'; N: 964),
|
||
(E: 'there4'; Ch: '∴'; N: 8756),
|
||
(E: 'Theta'; Ch: 'Θ'; N: 920),
|
||
(E: 'theta'; Ch: 'θ'; N: 952),
|
||
(E: 'thetasym';Ch: 'ϑ'; N: 977),
|
||
(E: 'thinsp'; Ch: #$E2#$80#$89; N: 8201), // thin space
|
||
(E: 'THORN'; Ch: 'Þ'; N: 222),
|
||
(E: 'thorn'; Ch: 'þ'; N: 254),
|
||
(E: 'tilde'; Ch: '˜'; N: 732),
|
||
(E: 'times'; Ch: '×'; N: 215),
|
||
(E: 'trade'; Ch: '™'; N: 8482),
|
||
// U
|
||
(E: 'Uacute'; Ch: 'Ú'; N: 218),
|
||
(E: 'uacute'; Ch: 'ú'; N: 250),
|
||
(E: 'uArr'; Ch: #$E2#$87#$91; N: 8657), // wide up-arrow
|
||
(E: 'uarr'; Ch: '↑'; N: 8593),
|
||
(E: 'Ucirc'; Ch: 'Û'; N: 219),
|
||
(E: 'ucirc'; Ch: 'û'; N: 251),
|
||
(E: 'Ugrave'; Ch: 'Ù'; N: 217),
|
||
(E: 'ugrave'; Ch: 'ù'; N: 249),
|
||
(E: 'uml'; Ch: '¨'; N: 168),
|
||
(E: 'upsih'; Ch: 'ϒ'; N: 978),
|
||
(E: 'Upsilon';Ch: 'Υ'; N: 933),
|
||
(E: 'upsilon';Ch: 'υ'; N: 965),
|
||
(E: 'Uuml'; Ch: 'Ü'; N: 220),
|
||
(E: 'uuml'; Ch: 'ü'; N: 252),
|
||
// W
|
||
(E: 'weierp'; Ch: #$E2#$84#$98; N: 8472), // Script Capital P; Weierstrass Elliptic Function
|
||
// X
|
||
(E: 'Xi'; Ch: 'Ξ'; N: 926),
|
||
(E: 'xi'; Ch: 'ξ'; N: 958),
|
||
// Y
|
||
(E: 'Yacute'; Ch: 'Ý'; N: 221),
|
||
(E: 'yacute'; Ch: 'ý'; N: 253),
|
||
(E: 'yen'; Ch: '¥'; N: 165),
|
||
(E: 'Yuml'; Ch: 'Ÿ'; N: 376),
|
||
(E: 'yuml'; Ch: 'ÿ'; N: 255),
|
||
// Z
|
||
(E: 'Zeta'; Ch: 'Ζ'; N: 918),
|
||
(E: 'zeta'; Ch: 'ζ'; N: 950),
|
||
(E: 'zwj'; Ch: ''; N: 8205), // Zero-width joiner
|
||
(E: 'zwnj'; Ch: ''; N: 8204) // Zero-width non-joiner
|
||
);
|
||
|
||
function IsHTMLEntity(AText: PChar; out AEntity: TsHTMLEntity): Boolean;
|
||
|
||
function Compare(s: String): Boolean;
|
||
var
|
||
j: Integer;
|
||
begin
|
||
Result := false;
|
||
for j:=1 to Length(s) do
|
||
if s[j] <> PChar(AText)[j-1] then
|
||
exit;
|
||
if PChar(AText)[Length(s)] <> ';' then
|
||
exit;
|
||
Result := true;
|
||
end;
|
||
|
||
var
|
||
k: Integer;
|
||
equ: Boolean;
|
||
ch1: Char;
|
||
P: PChar;
|
||
|
||
begin
|
||
Result := false;
|
||
for k:=0 to High(HTMLEntities) do
|
||
begin
|
||
equ := Compare(HTMLEntities[k].E);
|
||
if not equ then
|
||
begin
|
||
P := AText;
|
||
ch1 := P^;
|
||
if ch1 = '#' then
|
||
begin
|
||
inc(P);
|
||
if ch1 = 'x' then
|
||
equ := Compare(Format('#x%x', [HTMLEntities[k].N]))
|
||
else
|
||
equ := Compare(Format('#%d', [HTMLEntities[k].N]));
|
||
end;
|
||
end;
|
||
if equ then
|
||
begin
|
||
AEntity := HTMLEntities[k];
|
||
Result := true;
|
||
exit;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function CleanHTMLString(AText: String): String;
|
||
var
|
||
ent: TsHTMLEntity;
|
||
P: PChar;
|
||
ch: Char;
|
||
hasStartSpace, hasEndSpace: Boolean;
|
||
begin
|
||
Result := '';
|
||
|
||
// Remove leading and trailing spaces and line endings coming from formatted
|
||
// source lines. Retain 1 single space, at the end even without spaces found.
|
||
// No idea if this is 100% correct - at least, looks good.
|
||
hasStartSpace := false;
|
||
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
|
||
begin
|
||
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
|
||
Delete(AText, 1, 1);
|
||
end;
|
||
|
||
hasEndSpace := false;
|
||
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
|
||
begin
|
||
hasEndSpace := true; // A trailing space will be added later
|
||
Delete(AText, Length(AText), 1);
|
||
end;
|
||
|
||
if AText = '' then
|
||
begin
|
||
if hasStartSpace or hasEndSpace then Result := ' ';
|
||
exit;
|
||
end;
|
||
|
||
// Replace HTML entities by their counter part UTF8 characters
|
||
P := @AText[1];
|
||
while (P^ <> #0) do begin
|
||
ch := P^;
|
||
case ch of
|
||
' ': begin
|
||
// collapse multiple spaces to a single space (HTML spec)
|
||
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
|
||
Result := Result + ' ';
|
||
inc(P);
|
||
while (P^ = ' ') do inc(P);
|
||
dec(P);
|
||
end;
|
||
'&': begin
|
||
inc(P);
|
||
if (P <> nil) and IsHTMLEntity(P, ent) then
|
||
begin
|
||
Result := Result + ent.Ch;
|
||
inc(P, Length(ent.E));
|
||
end else
|
||
begin
|
||
Result := Result + '&';
|
||
Continue;
|
||
end;
|
||
end;
|
||
else
|
||
Result := Result + ch;
|
||
end;
|
||
inc(P);
|
||
end;
|
||
|
||
// Add leading and trailing spaces from above.
|
||
if hasStartSpace then Result := ' ' + Result;
|
||
if hasEndSpace then Result := Result + ' ';
|
||
end;
|
||
|
||
function RemoveHTMLEntities(const AText: String): String;
|
||
var
|
||
ent: TsHTMLEntity;
|
||
P: PChar;
|
||
ch: AnsiChar;
|
||
begin
|
||
Result := '';
|
||
P := @AText[1];
|
||
while (P^ <> #0) do begin
|
||
ch := P^;
|
||
case ch of
|
||
'&': begin
|
||
inc(P);
|
||
if (P <> nil) and IsHTMLEntity(P, ent) then
|
||
begin
|
||
Result := Result + ent.Ch;
|
||
inc(P, Length(ent.E));
|
||
end else
|
||
begin
|
||
Result := Result + '&';
|
||
Continue;
|
||
end;
|
||
end;
|
||
else Result := Result + ch;
|
||
end;
|
||
inc(P);
|
||
end;
|
||
end;
|
||
|
||
{==============================================================================}
|
||
{ TsHTMLAttr }
|
||
{==============================================================================}
|
||
|
||
constructor TsHTMLAttr.Create(AName, AValue: String);
|
||
begin
|
||
Name := AName;
|
||
Value := AValue;
|
||
end;
|
||
|
||
|
||
{==============================================================================}
|
||
{ TsHTMLAttrList }
|
||
{==============================================================================}
|
||
|
||
function TsHTMLAttrList.GetItem(AIndex: Integer): TsHTMLAttr;
|
||
begin
|
||
Result := TsHTMLAttr(inherited GetItem(AIndex));
|
||
end;
|
||
|
||
function TsHTMLAttrList.IndexOfName(AName: String): Integer;
|
||
begin
|
||
AName := Lowercase(AName);
|
||
for Result := 0 to Count-1 do
|
||
if GetItem(Result).Name = AName then
|
||
exit;
|
||
Result := -1;
|
||
end;
|
||
|
||
{ AHTML is a HTML string beginning with a < tag. Seeks the first space to split
|
||
off the HTML tag. Then seeks for = and " characters to extract the attributes
|
||
which are split into name/value pairs at the = character. The value part is
|
||
unquoted. }
|
||
procedure TsHTMLAttrList.Parse(AHTML: String);
|
||
var
|
||
i: Integer;
|
||
len: Integer;
|
||
value, nam: String;
|
||
begin
|
||
Clear;
|
||
if (AHTML[1] <> '<') then // just for simplification
|
||
raise Exception.Create('[THTMLAttrList.Parse] HTML tags expected.');
|
||
|
||
// Find first space
|
||
i := 1;
|
||
len := Length(AHTML);
|
||
while (i <= len) and (AHTML[i] <> ' ') do inc(i);
|
||
|
||
// Parse attribute string
|
||
nam := '';
|
||
while (i <= len) do
|
||
begin
|
||
case AHTML[i] of
|
||
'=': begin
|
||
inc(i);
|
||
value := '';
|
||
if AHTML[i] = '"' then
|
||
begin
|
||
inc(i); // skip the initial '"'
|
||
while AHTML[i] <> '"' do
|
||
begin
|
||
value := value + AHTML[i];
|
||
inc(i);
|
||
end;
|
||
inc(i); // skip the final '"'
|
||
end else
|
||
while not (AHTML[i] in [' ', '>', '/']) do
|
||
begin
|
||
value := value + AHTML[i];
|
||
inc(i);
|
||
end;
|
||
Add(TsHTMLAttr.Create(lowercase(trim(nam)), trim(value)));
|
||
nam := '';
|
||
end;
|
||
' ', '/', '>': ;
|
||
else nam := nam + AHTML[i];
|
||
end;
|
||
inc(i);
|
||
end;
|
||
|
||
i := IndexOfName('style');
|
||
if i > -1 then ParseStyle(Items[i].Value);
|
||
end;
|
||
|
||
{ AStyle is the value part of a 'style="...."' HTML string. Splits the into
|
||
individual records at the semicolons (;) and into name-value pairs at the
|
||
colon (:). Adds the name-value pairs to the list. }
|
||
procedure TsHTMLAttrList.ParseStyle(AStyle: String);
|
||
var
|
||
i, len: Integer;
|
||
value, nam: String;
|
||
begin
|
||
i := 1;
|
||
len := Length(AStyle);
|
||
|
||
// skip white space
|
||
while (i <= len) and (AStyle[i] = ' ') do inc(i);
|
||
|
||
// iterate through string
|
||
nam := '';
|
||
while (i <= len) do
|
||
begin
|
||
case AStyle[i] of
|
||
':': begin // name-value separator
|
||
inc(i); // skip ':' ...
|
||
while (i <= len) and (AStyle[i] = ' ') do inc(i); // ... and white space
|
||
value := '';
|
||
while (i <= len) and (AStyle[i] <> ';') do
|
||
begin
|
||
value := value + AStyle[i];
|
||
inc(i);
|
||
end;
|
||
Add(TsHTMLAttr.Create(lowercase(trim(nam)), UnquoteStr(trim(value))));
|
||
nam := '';
|
||
end;
|
||
' ': ; // skip white space
|
||
else nam := nam + AStyle[i];
|
||
end;
|
||
inc(i);
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLAttrList.SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
||
begin
|
||
inherited SetItem(AIndex, AValue);
|
||
end;
|
||
|
||
|
||
{==============================================================================}
|
||
{ HTML-to-Rich-text conversion }
|
||
{==============================================================================}
|
||
type
|
||
TsHTMLAnalyzer = class(THTMLParser)
|
||
private
|
||
FWorkbook: TsWorkbook;
|
||
FPlainText: String;
|
||
FRichTextParams: TsRichTextParams;
|
||
FAttrList: TsHTMLAttrList;
|
||
FFontStack: TsIntegerStack;
|
||
FCurrFont: TsFont;
|
||
FPointSeparatorSettings: TFormatSettings;
|
||
FPreserveSpaces: Boolean;
|
||
function AddFont(AFont: TsFont): Integer;
|
||
procedure AddRichTextParam(AFont: TsFont; AHyperlinkIndex: Integer = -1);
|
||
procedure ProcessFontRestore;
|
||
procedure ReadFont(AFont: TsFont);
|
||
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
|
||
procedure TextFoundHandler(AText: string);
|
||
public
|
||
constructor Create(AWorkbook: TsWorkbook; AFont: TsFont; AText: String);
|
||
destructor Destroy; override;
|
||
property PlainText: String read FPlainText;
|
||
property RichTextParams: TsRichTextParams read FRichTextParams;
|
||
property PreserveSpaces: Boolean read FPreserveSpaces write FPreserveSpaces;
|
||
end;
|
||
|
||
constructor TsHTMLAnalyzer.Create(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
AText: String);
|
||
begin
|
||
if AWorkbook = nil then
|
||
raise Exception.Create('[TsHTMLAnalyzer.Create] Workbook required.');
|
||
if AFont = nil then
|
||
raise Exception.Create('[TsHTMLAnalyzer.Create] Font required.');
|
||
|
||
inherited Create(AText);
|
||
FWorkbook := AWorkbook;
|
||
|
||
OnFoundTag := @TagFoundHandler;
|
||
OnFoundText := @TextFoundHandler;
|
||
|
||
FPlainText := '';
|
||
SetLength(FRichTextParams, 0);
|
||
|
||
FAttrList := TsHTMLAttrList.Create;
|
||
FCurrFont := TsFont.Create;
|
||
FCurrFont.CopyOf(AFont);
|
||
|
||
FFontStack := TsIntegerStack.Create;
|
||
|
||
FPointSeparatorSettings := DefaultFormatSettings;
|
||
FPointSeparatorSettings.DecimalSeparator := '.';
|
||
end;
|
||
|
||
destructor TsHTMLAnalyzer.Destroy;
|
||
begin
|
||
FreeAndNil(FFontStack);
|
||
FreeAndNil(FCurrFont);
|
||
FreeAndNil(FAttrList);
|
||
inherited Destroy;
|
||
end;
|
||
|
||
{ Stores a font in the workbook's font list. Does not allow duplicates. }
|
||
function TsHTMLAnalyzer.AddFont(AFont: TsFont): Integer;
|
||
var
|
||
fnt: TsFont;
|
||
begin
|
||
// Is the font already stored in the workbook's font list?
|
||
Result := FWorkbook.FindFont(AFont.FontName, AFont.Size, AFont.Style, AFont.Color, AFont.Position);
|
||
if Result = -1 then
|
||
begin
|
||
// No. Create a new font, add it to the list, and return the new index.
|
||
fnt := TsFont.Create;
|
||
fnt.CopyOf(AFont);
|
||
Result := FWorkbook.AddFont(fnt);
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLAnalyzer.AddRichTextParam(AFont: TsFont;
|
||
AHyperlinkIndex: Integer = -1);
|
||
var
|
||
len: Integer;
|
||
fntIndex: Integer;
|
||
n: Integer;
|
||
begin
|
||
n := Length(FRichTextParams);
|
||
len := UTF8Length(FPlainText);
|
||
fntIndex := AddFont(AFont);
|
||
if (n > 0) and (FRichTextparams[n-1].FirstIndex = len+1) then
|
||
begin
|
||
// Avoid adding another rich-text parameter for the same text location:
|
||
// Update the previous one
|
||
FRichTextParams[n-1].FontIndex := fntIndex;
|
||
FRichTextParams[n-1].HyperlinkIndex := AHyperlinkIndex;
|
||
end else
|
||
begin
|
||
// Add a new rich-text parameter
|
||
SetLength(FRichTextParams, n+1);
|
||
FRichTextParams[n].FirstIndex := len + 1;
|
||
FRichTextParams[n].FontIndex := fntIndex;
|
||
FRichTextParams[n].HyperlinkIndex := AHyperlinkIndex;
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLAnalyzer.ProcessFontRestore;
|
||
var
|
||
fntIndex: Integer;
|
||
begin
|
||
fntIndex := FFontStack.Pop;
|
||
if fntIndex > -1 then
|
||
begin
|
||
FCurrFont.CopyOf(FWorkbook.GetFont(fntIndex));
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLAnalyzer.ReadFont(AFont: TsFont);
|
||
const
|
||
FACTOR = 1.2;
|
||
MIN_FONTSIZE = 6;
|
||
var
|
||
idx: Integer;
|
||
L: TStringList;
|
||
i, ip, im: Integer;
|
||
s: String;
|
||
f: Double;
|
||
defFntSize: Single;
|
||
begin
|
||
idx := FAttrList.IndexOfName('font-family'); // style tag
|
||
if idx = -1 then
|
||
idx := FAttrList.IndexOfName('face'); // html tag
|
||
if idx > -1 then begin
|
||
L := TStringList.Create;
|
||
try
|
||
L.StrictDelimiter := true;
|
||
L.DelimitedText := FAttrList[idx].Value;
|
||
AFont.FontName := L[0];
|
||
finally
|
||
L.Free;
|
||
end;
|
||
end;
|
||
|
||
idx := FAttrList.IndexOfName('font-size');
|
||
if idx = -1 then
|
||
idx := FAttrList.IndexOfName('size');
|
||
if idx > -1 then begin
|
||
defFntSize := FWorkbook.GetDefaultFont.Size;
|
||
s := FAttrList[idx].Value;
|
||
case s of
|
||
'medium', '3' : AFont.Size := defFntSize;
|
||
'large', '4' : AFont.Size := defFntSize*FACTOR;
|
||
'x-large', '5' : AFont.Size := defFntSize*FACTOR*FACTOR;
|
||
'xx-large', '6' : AFont.Size := defFntSize*FACTOR*FACTOR*FACTOR;
|
||
'small', '2' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR);
|
||
'x-small' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR/FACTOR);
|
||
'xx-small', '1' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR/FACTOR/FACTOR);
|
||
'larger' : AFont.Size := AFont.Size * FACTOR;
|
||
'smaller' : AFont.Size := Max(MIN_FONTSIZE, AFont.Size / FACTOR);
|
||
else
|
||
if s[1] in ['+', '-'] then
|
||
begin
|
||
TryStrToInt(s, i);
|
||
AFont.Size := defFntSize * IntPower(FACTOR, i);
|
||
end else
|
||
begin
|
||
i := 0;
|
||
im := 0;
|
||
ip := pos('%', s);
|
||
if ip = 0 then begin
|
||
im := pos('rem', s);
|
||
if im = 0 then
|
||
im := pos('em', s);
|
||
end;
|
||
if (ip > 0) then i := ip else
|
||
if (im > 0) then i := im;
|
||
if i > 0 then
|
||
begin
|
||
s := copy(s, 1, i-1);
|
||
if TryStrToFloat(s, f, FPointSeparatorSettings) then
|
||
begin
|
||
if ip > 0 then f := f * 0.01;
|
||
AFont.Size := Max(MIN_FONTSIZE, abs(f) * defFntSize);
|
||
end;
|
||
end else
|
||
AFont.Size := Max(MIN_FONTSIZE, HTMLLengthStrToPts(s));
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
idx := FAttrList.IndexOfName('font-style');
|
||
if idx > -1 then
|
||
case FAttrList[idx].Value of
|
||
'normal' : Exclude(AFont.Style, fssItalic);
|
||
'italic' : Include(AFont.Style, fssItalic);
|
||
'oblique' : Include(AFont.Style, fssItalic);
|
||
end;
|
||
|
||
idx := FAttrList.IndexOfName('font-weight');
|
||
if idx > -1 then
|
||
begin
|
||
s := FAttrList[idx].Value;
|
||
if TryStrToInt(s, i) and (i >= 700) then Include(AFont.Style, fssBold);
|
||
end;
|
||
|
||
idx := FAttrList.IndexOfName('text-decoration');
|
||
if idx > -1 then
|
||
begin
|
||
s := FAttrList[idx].Value;
|
||
if pos('underline', s) <> 0 then Include(AFont.Style, fssUnderline);
|
||
if pos('line-through', s) <> 0 then Include(AFont.Style, fssStrikeout);
|
||
end;
|
||
|
||
idx := FAttrList.IndexOfName('color');
|
||
if idx > -1 then
|
||
AFont.Color := HTMLColorStrToColor(FAttrList[idx].Value);
|
||
end;
|
||
|
||
procedure TsHTMLAnalyzer.TagFoundHandler(NoCaseTag, ActualTag: String);
|
||
begin
|
||
case NoCaseTag[2] of
|
||
'B': case NoCaseTag of
|
||
'<B>' : begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssBold);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'<BR>',
|
||
'<BR/>': FPlainText := FPlainText + FPS_LINE_ENDING;
|
||
else if (pos('<BR ', NoCaseTag) = 1) then
|
||
FPlainText := FPlainText + FPS_LINE_ENDING;
|
||
end;
|
||
'D': if (NoCaseTag = '<DEL>') then
|
||
begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssStrikeout);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'E': if (NoCaseTag = '<EM>') then
|
||
begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssItalic);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'F': if (pos('<FONT ', NoCaseTag) = 1) then
|
||
begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
FAttrList.Parse(ActualTag);
|
||
ReadFont(FCurrFont);
|
||
AddRichTextparam(FCurrFont);
|
||
end;
|
||
'I': case NoCaseTag of
|
||
'<I>' : begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssItalic);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'<INS>': begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssUnderline);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
end;
|
||
'S': case NoCaseTag of
|
||
'<S>' : begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssStrikeout);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'<STRONG>':begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssBold);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'<SUB>': begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
FCurrFont.Position := fpSubscript;
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'<SUP>': begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
FCurrFont.Position := fpSuperscript;
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
end;
|
||
'U': if (NoCaseTag = '<U>') then
|
||
begin
|
||
FFontStack.Push(AddFont(FCurrFont));
|
||
Include(FCurrFont.Style, fssUnderline);
|
||
AddRichTextParam(FCurrFont);
|
||
end;
|
||
'/': case NoCaseTag[3] of
|
||
'B': if (NoCaseTag) = '</B>' then ProcessFontRestore;
|
||
'D': if (NoCaseTag) = '</DEL>' then ProcessFontRestore;
|
||
'E': if (NoCaseTag) = '</EM>' then ProcessFontRestore;
|
||
'F': if (NoCaseTag) = '</FONT>' then ProcessFontRestore;
|
||
'I': if (NoCaseTag = '</I>') or (NoCaseTag = '</INS>') then ProcessFontRestore;
|
||
'S': if (NoCaseTag = '</S>') or (NoCaseTag = '</STRONG>') or
|
||
(NoCaseTag = '</SUB>') or (NoCaseTag = '</SUP>') then ProcessFontRestore;
|
||
'U': if (NoCaseTag = '</U>') then ProcessFontRestore;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLAnalyzer.TextFoundHandler(AText: String);
|
||
begin
|
||
if not FPreserveSpaces then
|
||
AText := CleanHTMLString(AText) else
|
||
AText := RemoveHTMLEntities(AText);
|
||
if AText <> '' then
|
||
begin
|
||
if FPlainText = '' then
|
||
FPlainText := AText
|
||
else
|
||
FPlainText := FPlainText + AText;
|
||
end;
|
||
end;
|
||
|
||
|
||
{@@ ----------------------------------------------------------------------------
|
||
Extracts rich-text parameters out of an html-formatted string and returns the
|
||
plain text
|
||
-------------------------------------------------------------------------------}
|
||
procedure HTMLToRichText(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
const AHTMLText: String; out APlainText: String;
|
||
out ARichTextParams: TsRichTextParams);
|
||
var
|
||
analyzer: TsHTMLAnalyzer;
|
||
j: Integer;
|
||
begin
|
||
analyzer := TsHTMLAnalyzer.Create(AWorkbook, AFont, AHTMLText + '<end>');
|
||
try
|
||
analyzer.PreserveSpaces := true;
|
||
analyzer.Exec;
|
||
APlainText := analyzer.PlainText;
|
||
SetLength(ARichTextParams, Length(analyzer.RichTextParams));
|
||
for j:=0 to High(ARichTextParams) do
|
||
ARichTextParams[j] := analyzer.RichTextParams[j];
|
||
finally
|
||
analyzer.Free;
|
||
end;
|
||
end;
|
||
|
||
|
||
{==============================================================================}
|
||
{ Rich-text-to-HTML conversion }
|
||
{==============================================================================}
|
||
|
||
type
|
||
TsHTMLComposer = class
|
||
private
|
||
FPointSeparatorSettings: TFormatSettings;
|
||
FWorkbook: TsWorkbook;
|
||
FBaseFont: TsFont;
|
||
FPlainText: String;
|
||
FRichTextParams: TsRichTextParams;
|
||
FPrefix: String;
|
||
FTagCase: TsTagCase;
|
||
procedure GetFontsFromWorkbook(out AFonts: TsFontArray);
|
||
function GetTextOfRichTextParam(AIndex: Integer): String;
|
||
protected
|
||
function FixTagCase(ATag: String): String;
|
||
public
|
||
constructor Create(AWorkbook: TsWorkbook; AFont: TsFont; APrefix: String = '';
|
||
ATagCase: TsTagCase = tcLowercase);
|
||
function Exec(const APlainText: String; const ARichTextParams: TsRichTextParams): String;
|
||
end;
|
||
|
||
constructor TsHTMLComposer.Create(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
APrefix: String = ''; ATagCase: TsTagCase = tcLowercase);
|
||
begin
|
||
FPointSeparatorSettings := DefaultFormatSettings;
|
||
FPointSeparatorSettings.DecimalSeparator := '.';
|
||
FWorkbook := AWorkbook;
|
||
FBaseFont := AFont;
|
||
FPrefix := APrefix;
|
||
FTagCase := ATagCase;
|
||
end;
|
||
|
||
function TsHTMLComposer.Exec(const APlainText: String;
|
||
const ARichTextParams: TsRichTextParams): String;
|
||
type
|
||
TChangeFlag = (cfFontName, cfFontSize, cfFontColor);
|
||
const
|
||
EPS = 1E-3;
|
||
var
|
||
i: Integer;
|
||
prevFnt, currFnt: TsFont;
|
||
chgFlags: set of TChangeFlag;
|
||
openingTag, closingTag: String;
|
||
fonts: TsFontArray;
|
||
tag: String;
|
||
begin
|
||
if Length(ARichTextParams) = 0 then
|
||
begin
|
||
Result := FPlainText;
|
||
exit;
|
||
end;
|
||
|
||
FRichTextParams := ARichTextParams;
|
||
FPlainText := APlainText;
|
||
|
||
prevFnt := TsFont.Create;
|
||
prevFnt.CopyOf(FBaseFont);
|
||
|
||
if FRichTextParams[0].FirstIndex > 1 then
|
||
Result := GetTextOfRichTextParam(-1) else
|
||
Result := '';
|
||
|
||
GetFontsFromWorkbook(fonts);
|
||
for i:=0 to High(FRichTextParams) do
|
||
begin
|
||
currFnt := fonts[i];
|
||
openingTag := '';
|
||
closingTag := '';
|
||
if not SameFont(currFnt, prevFnt) then
|
||
begin
|
||
chgFlags := [];
|
||
if not SameText(prevFnt.FontName, currFnt.FontName) then
|
||
Include(chgFlags, cfFontName);
|
||
if not SameValue(currFnt.Size, prevFnt.Size, EPS) then
|
||
Include(chgFlags, cfFontSize);
|
||
if currFnt.Color <> prevFnt.Color then
|
||
Include(chgFlags, cfFontColor);
|
||
|
||
if [cfFontName, cfFontSize, cfFontColor] * chgFlags <> [] then
|
||
begin
|
||
tag := FixTagCase('font');
|
||
openingTag := '<' + tag;
|
||
if cfFontName in chgFlags then
|
||
begin
|
||
openingTag := openingTag + ' ' + FPrefix + FixTagCase('face') +
|
||
'="' + UnquoteStr(currFnt.FontName) + '"';
|
||
prevFnt.FontName := currFnt.FontName;
|
||
end;
|
||
if cfFontSize in chgFlags then
|
||
begin
|
||
openingTag := openingTag + ' ' + FPrefix + FixTagCase('size') +
|
||
'="' + Format('%.gpt', [currFnt.Size], FPointSeparatorSettings) + '"';
|
||
prevFnt.Size := currFnt.Size;
|
||
end;
|
||
if cfFontColor in chgFlags then
|
||
begin
|
||
openingTag := openingTag + ' ' + FPrefix + FixTagCase('color') +
|
||
'="' + ColorToHTMLColorStr(currFnt.Color) + '"';
|
||
prevFnt.Size := currFnt.Color;
|
||
end;
|
||
openingTag := openingTag + '>';
|
||
closingTag :='</' + tag + '>' + closingTag;
|
||
end;
|
||
|
||
if (fssBold in currFnt.Style) then
|
||
begin
|
||
tag := FixTagCase('b');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
prevFnt.Style := prevFnt.Style + [fssBold];
|
||
end else
|
||
prevFnt.Style := prevFnt.Style - [fssBold];;
|
||
|
||
if (fssItalic in currFnt.Style) then
|
||
begin
|
||
tag := FixTagCase('i');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
prevFnt.Style := prevFnt.Style + [fssItalic];
|
||
end else
|
||
prevFnt.Style := prevFnt.Style - [fssItalic];
|
||
|
||
if (fssUnderline in currFnt.Style) then
|
||
begin
|
||
tag := FixTagCase('u');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
prevFnt.Style := prevFnt.Style + [fssUnderline];
|
||
end else
|
||
prevFnt.Style := prevFnt.Style - [fssUnderline];
|
||
|
||
if (fssStrikeout in currFnt.Style) then
|
||
begin
|
||
tag := FixTagCase('s');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
prevFnt.Style := prevFnt.Style + [fssStrikeout];
|
||
end else
|
||
prevFnt.Style := prevFnt.Style - [fssStrikeout];
|
||
|
||
if currFnt.Position <> prevFnt.Position then
|
||
begin
|
||
if currFnt.Position = fpSuperscript then
|
||
begin
|
||
tag := FixTagCase('sup');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
currFnt.Position := fpSuperscript;
|
||
end else
|
||
if currFnt.Position = fpSubscript then
|
||
begin
|
||
tag := FixTagCase('sub');
|
||
openingTag := openingTag + '<' + tag + '>';
|
||
closingTag := '</' + tag + '>' + closingTag;
|
||
currFnt.Position := fpSubscript;
|
||
end else
|
||
currFnt.Position := fpNormal;
|
||
end;
|
||
end;
|
||
|
||
// Add the node text with opening and closing tags (reverse order as opening!)
|
||
Result := Result + openingTag + GetTextOfRichTextParam(i) + closingTag;
|
||
end; // for
|
||
end;
|
||
|
||
function TsHTMLComposer.FixTagCase(ATag: String): String;
|
||
begin
|
||
case FTagCase of
|
||
tcLowercase:
|
||
Result := Lowercase(ATag);
|
||
tcUppercase:
|
||
Result := Uppercase(ATag);
|
||
tcProperCase:
|
||
begin
|
||
Result := Lowercase(ATag);
|
||
Result[1] := UpCase(Result[1]);
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
procedure TsHTMLComposer.GetFontsFromWorkbook(out AFonts: TsFontArray);
|
||
var
|
||
i: Integer;
|
||
begin
|
||
SetLength(AFonts, Length(FRichTextParams));
|
||
for i:=0 to High(AFonts) do
|
||
AFonts[i] := FWorkbook.GetFont(FRichTextParams[i].FontIndex);
|
||
end;
|
||
|
||
function TsHTMLComposer.GetTextOfRichTextParam(AIndex: Integer): String;
|
||
var
|
||
p1, p2: Integer;
|
||
begin
|
||
if AIndex = -1 then
|
||
Result := UTF8Copy(FPlainText, 1, FRichTextParams[0].FirstIndex-1)
|
||
else
|
||
if AIndex <= High(FRichTextParams) then
|
||
begin
|
||
p1 := FRichTextParams[AIndex].FirstIndex;
|
||
if AIndex < High(FRichTextparams) then
|
||
p2 := FRichTextParams[AIndex+1].FirstIndex else
|
||
p2 := UTF8Length(FPlainText) + 1;
|
||
Result := UTF8Copy(FPlaiNText, p1, p2-p1);
|
||
end else
|
||
Result := '';
|
||
end;
|
||
|
||
|
||
{@@ ----------------------------------------------------------------------------
|
||
Constructs a html-coded string from a plain text string and
|
||
rich-text parameters
|
||
-------------------------------------------------------------------------------}
|
||
procedure RichTextToHTML(AWorkbook: TsWorkbook; AFont: TsFont;
|
||
const APlainText: String; const ARichTextParams: TsRichTextParams;
|
||
out AHTMLText: String; APrefix: String = ''; ATagCase: TsTagCase = tcLowercase);
|
||
var
|
||
composer: TsHTMLComposer;
|
||
begin
|
||
if Length(ARichTextParams) = 0 then
|
||
AHTMLText := APlainText
|
||
else
|
||
begin
|
||
composer := TsHTMLComposer.Create(AWorkbook, AFont, APrefix, ATagCase);
|
||
try
|
||
AHTMLText := composer.Exec(APlainText, ARichTextParams);
|
||
finally
|
||
composer.Free;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
end.
|
||
|