fpc/tests/test/cg/tcase3.pp
florian e8f7c9dfdd + test for large case statements
git-svn-id: trunk@35646 -
2017-03-23 17:57:31 +00:00

234 lines
8.2 KiB
ObjectPascal
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{$codepage utf8}
{$mode objfpc} {$h+} {$coperators on}
uses
SysUtils, DateUtils;
const
Sample = widestring('Examples' + LineEnding +
'Appointments: Аппойнтменты (Appojntmenty)[2]' + LineEnding +
'Iced Coffee: Аисд кофе (Aisd kofe)[2]' + LineEnding +
'Would you like that sliced or in one piece?: Вам наслайсовать или писом? (Vam naslajsovatj ili pisom?)[4]' + LineEnding +
'Driving upstate on the highways: Драйвуем в апстейт по хайвеям (Drajvujem v apstejt po hajwejam)[5]' + LineEnding +
'Sliced Cheese: Слайсающий чиз (Slajsajuçij čiz)[5]' + LineEnding +
'To merge branches: Смержить бранчи (Smeržitj branči)[2]' + LineEnding +
'To manage: Сменеджить (Smenedžitj)' + LineEnding +
'I sent you message with attached request: Я засендил тебе месседж с приаттаченым реквестом (Ya zasendil tebe messedž s priattachenim rekvestom)');
Iterations = 880000;
type
CodepointFlag = (Letter, Vowel, Consonant, Digit, Whitespace, Newline, Punctuation, Cyrillic, Latin, Diacritic);
CodepointClassification = set of CodepointFlag;
function Classify_Case(const cp: widechar): CodepointClassification;
begin
case cp of
' ', #$9 {tab}, #$a0 {nbsp}, ' ' {ideographic space}: result := [Whitespace];
#$a {lf}, #$d {cr}: result := [Whitespace, Newline];
'0' .. '9': result := [Digit];
'a', 'e', 'i', 'o', 'u', 'y', 'A', 'E', 'I', 'O', 'U', 'Y': result := [Letter, Latin, Vowel];
'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z',
'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z': result := [Letter, Latin, Consonant];
'.', '!', ',', '?', ':', ';', '-', '—': result := [Punctuation];
'А', 'Е', 'И', 'О', 'У', 'Ы', 'Э', 'Ю', 'Я', 'а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я', 'Ё', 'ё': result := [Letter, Cyrillic, Vowel];
'б', 'в', 'г', 'д', 'ж', 'з', 'й', 'к', 'л', 'м', 'н', 'п', 'р', 'с', 'т', 'ф', 'х', 'ц', 'ч', 'ш', 'щ',
'Б', 'В', 'Г', 'Д', 'Ж', 'З', 'Й', 'К', 'Л', 'М', 'Н', 'П', 'Р', 'С', 'Т', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ': result := [Letter, Cyrillic, Consonant];
'ъ', 'ь', 'Ъ', 'Ь': result := [Letter, Cyrillic];
#$300 {first diacritic} .. #$36f {last diacritic}: result := [Diacritic];
else result := [];
end;
end;
// I have built this tree from sorted starts of subsequences:
// (1) 9 tab
// (2) A lf
// (3) D cr
// (4) 20 space
// (5) 21 !
// (6) 2C ,
// (7) 2D -
// (8) 2E .
// (9) 3039 0-9
// (10) 3A :
// (11) 3B ;
// (12) 3F ?
// (13) 41-5a A-Z
// (14) 61-7a a-z
// (15) A0 nbsp
// (16) 30036f diacritics
// (17) 401 Ё
// (18) 41044f А
// (19) 451 ё
// (20) 2014 — (em dash)
// (21) 3000 ideographic space
function Classify_Tree(const cp: widechar): CodepointClassification;
begin
if cp < #$3F then // (1) ~ (11)
if cp < #$2D then // (1) ~ (6) — small set without subsequences, resort to predictor-friendly :) case..of
case cp of
' ', #$9 {tab}: exit([Whitespace]);
#$a {lf}, #$d {cr}: exit([Whitespace, Newline]);
'!', ',': exit([Punctuation]);
end
else // (7) ~ (11)
if cp < #$3A then // (7) ~ (9)
if cp < #$30 then // (7) ~ (8)
case cp of
'-', '.': exit([Punctuation]);
end
else // (9)
case cp of
'0' .. '9': exit([Digit]);
end
else // (10) ~ (11)
case cp of
':', ';': exit([Punctuation]);
end
else // (12) ~ (21)
if cp < #$401 then // (12) ~ (16)
if cp < #$A0 then // (12) ~ (14)
if cp < #$61 then // (12) ~ (13)
if cp < #$41 then // (12)
case cp of
'?': exit([Punctuation]);
end
else // (13)
case cp of
'A', 'E', 'I', 'O', 'U', 'Y': exit([Letter, Latin, Vowel]);
'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z': exit([Letter, Latin, Consonant]);
end
else //(14)
case cp of
'a', 'e', 'i', 'o', 'u', 'y': exit([Letter, Latin, Vowel]);
'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z': exit([Letter, Latin, Consonant]);
end
else // (15) ~ (16)
if cp < #$300 then // (15)
case cp of
#$a0 {nbsp}: exit([Whitespace]);
end
else // (16)
case cp of
#$300 {first diacritic} .. #$36f {last diacritic}: exit([Diacritic]);
end
else // (17) ~ (21)
if cp < #$2014 then // (17) ~ (19)
if cp < #$451 then // (17) ~ (18)
if cp < #$410 then // (17)
case cp of
'Ё': exit([Letter, Cyrillic, Vowel]);
end
else // (18)
case cp of
'А', 'Е', 'И', 'О', 'У', 'Ы', 'Э', 'Ю', 'Я', 'а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я': exit([Letter, Cyrillic, Vowel]);
'б', 'в', 'г', 'д', 'ж', 'з', 'й', 'к', 'л', 'м', 'н', 'п', 'р', 'с', 'т', 'ф', 'х', 'ц', 'ч', 'ш', 'щ',
'Б', 'В', 'Г', 'Д', 'Ж', 'З', 'Й', 'К', 'Л', 'М', 'Н', 'П', 'Р', 'С', 'Т', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ': exit([Letter, Cyrillic, Consonant]);
'ъ', 'ь', 'Ъ', 'Ь': exit([Letter, Cyrillic]);
end
else // (19)
case cp of
'ё': exit([Letter, Cyrillic, Vowel]);
end
else // (20) ~ (21)
case cp of
'—': exit([Punctuation]);
' ': exit([Whitespace]);
end;
// 'else'
result := [];
end;
procedure Test;
function ToString(const cls: CodepointClassification): string;
var
f: CodepointFlag;
begin
result := '';
for f in CodepointFlag do
if f in cls then
begin
if result <> '' then result += ', ';
WriteStr(result, f);
end;
result := '[' + result + ']';
end;
var
cp: widechar;
begin
write('Sanity check... ');
for cp := #0 to #$3001 do
if Classify_Case(cp) <> Classify_Tree(cp) then
begin
writeln(
'Failed. Classify_Case isn''t equivalent to Classify_Tree.', LineEnding,
'Symbol code: $', HexStr(cardinal(cp), bitsizeof(widechar) div 4), LineEnding,
'Classify_Case: ', ToString(Classify_Case(cp)), LineEnding,
'Classify_Tree: ', ToString(Classify_Tree(cp)), LineEnding);
readln;
halt(1);
end;
writeln('Passed. Classify_Case and Classify_Tree are equivalent.',
LineEnding);
end;
procedure Benchmark;
var
caseTime, treeTime: TDateTime;
vow, lat: cardinal;
iteration: cardinal;
i: SizeInt;
procedure ResetWork(out vow, lat: cardinal);
begin
vow := 0; lat := 0;
end;
procedure Note(const cls: CodepointClassification);
begin
if Vowel in cls then inc(vow);
if Latin in cls then inc(lat);
end;
begin
write('Benchmarking Classify_Tree... ');
ResetWork(vow, lat);
treeTime := Now;
for iteration := 1 to Iterations do
begin
for i := 1 to length(Sample) do
Note(Classify_Tree(Sample[i]));
if iteration < Iterations then ResetWork(vow, lat);
end;
treeTime := SecondSpan(Now, treeTime);
writeln(treeTime:0:1, ' s (', treeTime/Iterations*1e6:0:1, ' mcs/it), vow. ', vow, ', lat. ', lat, '.');
write('Benchmarking Classify_Case... ');
ResetWork(vow, lat);
caseTime := Now;
for iteration := 1 to Iterations do
begin
for i := 1 to length(Sample) do
Note(Classify_Case(Sample[i]));
if iteration < Iterations then ResetWork(vow, lat);
end;
caseTime := SecondSpan(Now, caseTime);
writeln(caseTime:0:1, ' s (', caseTime/Iterations*1e6:0:1, ' mcs/it), vow. ', vow, ', lat. ', lat, '.');
if (treeTime < 0.5) or (caseTime < 0.5) then
writeln('Could not deduce anything useful.')
else
if (treeTime <= 1.05*caseTime) and (caseTime <= 1.05*treeTime) then
writeln('There was no observable difference.')
else
if treeTime > caseTime then
writeln('Classify_Case was faster by ', (treeTime / caseTime - 1)*100:0:0, '% (', treeTime/caseTime:0:1, 'x).')
else
writeln('Classify_Tree was faster by ', (caseTime / treeTime - 1)*100:0:0, '% (', caseTime/treeTime:0:1, 'x).')
end;
begin
Test;
// Benchmark;
end.