fpc/tests/bench/stream.pp
daniel d6e6e39d16 * Minor changes.
git-svn-id: trunk@7580 -
2007-06-05 07:53:55 +00:00

432 lines
11 KiB
ObjectPascal

program stream;
{$ifdef unix}
uses baseunix,unix;
{$endif}
{$ifdef windows}
uses windows;
{$endif}
{-----------------------------------------------------------------------}
{ Original code developed by John D. McCalpin }
{ Programmers: John D. McCalpin }
{ Joe R. Zagar }
{ Pascal conversion: Daniel Mantione }
{ }
{ This program measures memory transfer rates in MB/s for simple }
{ computational kernels coded in Pascal. }
{-----------------------------------------------------------------------}
{ Copyright 1991-2005: John D. McCalpin }
{-----------------------------------------------------------------------}
{ License: }
{ 1. You are free to use this program and/or to redistribute }
{ this program. }
{ 2. You are free to modify this program for your own use, }
{ including commercial use, subject to the publication }
{ restrictions in item 3. }
{ 3. You are free to publish results obtained from running this }
{ program, or from works that you derive from this program, }
{ with the following limitations: }
{ 3a. In order to be referred to as "STREAM benchmark results", }
{ published results must be in conformance to the STREAM }
{ Run Rules, (briefly reviewed below) published at }
{ http://www.cs.virginia.edu/stream/ref.html }
{ and incorporated herein by reference. }
{ As the copyright holder, John McCalpin retains the }
{ right to determine conformity with the Run Rules. }
{ 3b. Results based on modified source code or on runs not in }
{ accordance with the STREAM Run Rules must be clearly }
{ labelled whenever they are published. Examples of }
{ proper labelling include: }
{ "tuned STREAM benchmark results" }
{ "based on a variant of the STREAM benchmark code" }
{ Other comparable, clear and reasonable labelling is }
{ acceptable. }
{ 3c. Submission of results to the STREAM benchmark web site }
{ is encouraged, but not required. }
{ 4. Use of this program or creation of derived works based on this }
{ program constitutes acceptance of these licensing restrictions. }
{ 5. Absolutely no warranty is expressed or implied. }
{-----------------------------------------------------------------------}
{ INSTRUCTIONS:
*
* 1) Stream requires a good bit of memory to run. Adjust the
* value of 'N' (below) to give a 'timing calibration' of
* at least 20 clock-ticks. This will provide rate estimates
* that should be good to about 5% precision.
}
const N = 2000000;
NTIMES = 10;
OFFSET = 0;
{
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonably good, on the
* other hand, the optimizer might be too smart for me!
*
* Try compiling with:
* cc -O stream_omp.c -o stream_omp
*
* This is known to work on Cray, SGI, IBM, and Sun machines.
*
*
* 4) Mail the results to mccalpin@cs.virginia.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
* Thanks!
*
}
const HLINE = '-------------------------------------------------------------';
inf = 1/0;
var a,b,c:array[0..N+OFFSET-1] of double;
avgtime:array[0..3] of double = (0,0,0,0);
maxtime:array[0..3] of double = (0,0,0,0);
mintime:array[0..3] of double = (inf,inf,inf,inf);
labels:array[0..3] of string[16]= ('Copy:',
'Scale:',
'Add:',
'Triad:');
bytes:array[0..3] of cardinal = (
2 * sizeof(double) * N,
2 * sizeof(double) * N,
3 * sizeof(double) * N,
3 * sizeof(double) * N
);
const M=20;
function min(a,b:longint):longint;inline;
begin
if a>b then
min:=b
else
min:=a;
end;
function max(a,b:longint):longint;inline;
begin
if a>b then
max:=a
else
max:=b;
end;
function min(a,b:double):double;inline;
begin
if a>b then
min:=b
else
min:=a;
end;
function max(a,b:double):double;inline;
begin
if a>b then
max:=a
else
max:=b;
end;
procedure tuned_STREAM_Copy;
var j:longint;
begin
for j:=0 to N-1 do
c[j]:=a[j];
end;
procedure tuned_STREAM_Scale(scalar:double);
var j:longint;
begin
for j:=0 to N-1 do
b[j]:=scalar*c[j];
end;
procedure tuned_STREAM_Add;
var j:longint;
begin
for j:=0 to N-1 do
c[j]:=a[j]+b[j];
end;
procedure tuned_STREAM_Triad(scalar:double);
var j:longint;
begin
for j:=0 to N-1 do
a[j]:=b[j]+scalar*c[j];
end;
{$ifdef unix}
{$define have_mysecond}
function mysecond:double;
var tp:timeval;
tzp:timezone;
begin
fpgettimeofday(@tp,@tzp);
mysecond:=double(tp.tv_sec)+double(tp.tv_usec)*1e-6;
end;
{$endif}
{$ifdef windows}
{$define have_mysecond}
function mysecond:double;
begin
mysecond:=gettickcount*1e-3;
end;
{$endif}
{$ifndef have_mysecond}
{$error Please implement a mysecond for your platform.}
{$endif}
function checktick:longint;
var i,minDelta,Delta:longint;
t1,t2:double;
timesfound:array[0..M-1] of double;
begin
{ Collect a sequence of M unique time values from the system. }
for i:=0 to M-1 do
begin
t1:=mysecond;
t2:=t1;
while t2-t1<1E-6 do
t2:=mysecond;
t1:=t2;
timesfound[i]:=t1;
end;
{
* Determine the minimum difference between these M values.
* This result will be our estimate (in microseconds) for the
* clock granularity.
}
minDelta:=1000000;
for i:=1 to M-1 do
begin
Delta:=trunc(1E6*(timesfound[i]-timesfound[i-1]));
minDelta:=MIN(minDelta,MAX(Delta,0));
end;
checktick:=minDelta;
end;
procedure checkSTREAMresults;
var aj,bj,cj,scalar:double;
asum,bsum,csum:double;
epsilon:double;
j,k:longint;
begin
{ reproduce initialization }
aj:=1;
bj:=2;
cj:=0;
{ a[] is modified during timing check }
aj:=2*aj;
{ now execute timing loop }
scalar:=3;
for k:=0 to NTIMES-1 do
begin
cj:=aj;
bj:=scalar*cj;
cj:=aj+bj;
aj:=bj+scalar*cj;
end;
aj:=aj*N;
bj:=bj*N;
cj:=cj*N;
asum:=0;
bsum:=0;
csum:=0;
for j:=0 to N-1 do
begin
asum:=asum+a[j];
bsum:=bsum+b[j];
csum:=csum+c[j];
end;
{$ifdef VERBOSE}
writeln('Results Comparison: ');
writeln(' Expected : ',aj,' ',bj,' ',cj);
writeln(' Observed : ',asum,' ',bsum,' ',csum);
{$endif}
epsilon:=1e-8;
if abs(aj-asum)/asum>epsilon then
begin
writeln('Failed Validation on array a');
writeln(' Expected : ',aj);
writeln(' Observed : ',asum);
end
else if abs(bj-bsum)/bsum>epsilon then
begin
writeln('Failed Validation on array b');
writeln(' Expected : ',bj);
writeln(' Observed : ',bsum);
end
else if abs(cj-csum)/csum>epsilon then
begin
writeln('Failed Validation on array c');
writeln(' Expected : ',cj);
writeln(' Observed : ',csum);
end
else
writeln('Solution Validates');
end;
var quantum:longint;
BytesPerWord:longint;
j,k:longint;
scalar,t:double;
times:array[0..3,0..NTIMES-1] of double;
begin
{ --- SETUP --- determine precision and check timing --- }
writeln(HLINE);
writeln('STREAM version Revision: 5.6');
writeln(HLINE);
BytesPerWord:=sizeof(double);
writeln('This system uses ',BytesPerWord,' bytes per DOUBLE PRECISION word.');
writeln(HLINE);
writeln('Array size = ',N,', Offset = ',OFFSET);
writeln('Total memory required = ',3*BytesPerWord*(N/1048576),' MB.');
writeln('Each test is run ',NTIMES,' times, but only');
writeln('the *best* time for each is used.');
writeln(HLINE);
writeln('writelning one line per active thread....');
{ Get initial value for system clock. }
for j:=0 to N-1 do
begin
a[j]:=1;
b[j]:=2;
c[j]:=0;
end;
writeln(HLINE);
quantum:=checktick;
if quantum>=1 then
writeln('Your clock granularity/precision appears to be ',quantum,
' microseconds.')
else
writeln('Your clock granularity appears to be '+
'less than one microsecond.');
t:=mysecond;
for j:=0 to N-1 do
a[j]:=2*a[j];
t:=1E6*(mysecond-t);
writeln('Each test below will take on the order of ',t,
' microseconds.');
writeln(' (= ',t/quantum,' clock ticks)');
writeln('Increase the size of the arrays if this shows that');
writeln('you are not getting at least 20 clock ticks per test.');
writeln(HLINE);
writeln('WARNING -- The above is only a rough guideline.');
writeln('For best results, please be sure you know the');
writeln('precision of your system timer.');
writeln(HLINE);
{ --- MAIN LOOP --- repeat test cases NTIMES times --- }
scalar:=3;
for k:=0 to NTIMES-1 do
begin
times[0,k]:=mysecond();
{$ifdef TUNED}
tuned_STREAM_Copy();
{$else}
for j:=0 to N-1 do
c[j]:=a[j];
{$endif}
times[0,k]:=mysecond-times[0,k];
times[1,k]:=mysecond;
{$ifdef TUNED}
tuned_STREAM_Scale(scalar);
{$else}
for j:=0 to N-1 do
b[j]:=scalar*c[j];
{$endif}
times[1,k]:=mysecond-times[1,k];
times[2,k]:=mysecond;
{$ifdef TUNED}
tuned_STREAM_Add();
{$else}
for j:=0 to N-1 do
c[j]:=a[j]+b[j];
{$endif}
times[2,k]:=mysecond-times[2,k];
times[3,k]:=mysecond;
{$ifdef TUNED}
tuned_STREAM_Triad(scalar);
{$else}
for j:=0 to N-1 do
a[j]:=b[j]+scalar*c[j];
{$endif}
times[3,k]:=mysecond-times[3,k];
end;
{ --- SUMMARY --- }
for k:=1 to NTIMES-1 do { note -- skip first iteration }
for j:=0 to 3 do
begin
avgtime[j]:=avgtime[j] + times[j,k];
mintime[j]:=MIN(mintime[j], times[j,k]);
maxtime[j]:=MAX(maxtime[j], times[j,k]);
end;
writeln('Function Rate (MB/s) Avg time Min time Max time');
for j:=0 to 3 do
begin
avgtime[j]:=avgtime[j]/(NTIMES-1);
writeln(labels[j]:11,
1E-6*bytes[j]/mintime[j]:11:4,
avgtime[j]:11:4,
mintime[j]:11:4,
maxtime[j]:11:4);
end;
writeln(HLINE);
{ --- Check Results --- }
checkSTREAMresults;
writeln(HLINE);
end.