mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 23:07:55 +02:00
432 lines
11 KiB
ObjectPascal
432 lines
11 KiB
ObjectPascal
program stream;
|
|
|
|
{$ifdef unix}
|
|
uses baseunix,unix;
|
|
{$endif}
|
|
|
|
{$ifdef windows}
|
|
uses windows;
|
|
{$endif}
|
|
|
|
{-----------------------------------------------------------------------}
|
|
{ Original code developed by John D. McCalpin }
|
|
{ Programmers: John D. McCalpin }
|
|
{ Joe R. Zagar }
|
|
{ Pascal conversion: Daniel Mantione }
|
|
{ }
|
|
{ This program measures memory transfer rates in MB/s for simple }
|
|
{ computational kernels coded in Pascal. }
|
|
{-----------------------------------------------------------------------}
|
|
{ Copyright 1991-2005: John D. McCalpin }
|
|
{-----------------------------------------------------------------------}
|
|
{ License: }
|
|
{ 1. You are free to use this program and/or to redistribute }
|
|
{ this program. }
|
|
{ 2. You are free to modify this program for your own use, }
|
|
{ including commercial use, subject to the publication }
|
|
{ restrictions in item 3. }
|
|
{ 3. You are free to publish results obtained from running this }
|
|
{ program, or from works that you derive from this program, }
|
|
{ with the following limitations: }
|
|
{ 3a. In order to be referred to as "STREAM benchmark results", }
|
|
{ published results must be in conformance to the STREAM }
|
|
{ Run Rules, (briefly reviewed below) published at }
|
|
{ http://www.cs.virginia.edu/stream/ref.html }
|
|
{ and incorporated herein by reference. }
|
|
{ As the copyright holder, John McCalpin retains the }
|
|
{ right to determine conformity with the Run Rules. }
|
|
{ 3b. Results based on modified source code or on runs not in }
|
|
{ accordance with the STREAM Run Rules must be clearly }
|
|
{ labelled whenever they are published. Examples of }
|
|
{ proper labelling include: }
|
|
{ "tuned STREAM benchmark results" }
|
|
{ "based on a variant of the STREAM benchmark code" }
|
|
{ Other comparable, clear and reasonable labelling is }
|
|
{ acceptable. }
|
|
{ 3c. Submission of results to the STREAM benchmark web site }
|
|
{ is encouraged, but not required. }
|
|
{ 4. Use of this program or creation of derived works based on this }
|
|
{ program constitutes acceptance of these licensing restrictions. }
|
|
{ 5. Absolutely no warranty is expressed or implied. }
|
|
{-----------------------------------------------------------------------}
|
|
|
|
{ INSTRUCTIONS:
|
|
*
|
|
* 1) Stream requires a good bit of memory to run. Adjust the
|
|
* value of 'N' (below) to give a 'timing calibration' of
|
|
* at least 20 clock-ticks. This will provide rate estimates
|
|
* that should be good to about 5% precision.
|
|
}
|
|
|
|
const N = 2000000;
|
|
NTIMES = 10;
|
|
OFFSET = 0;
|
|
|
|
{
|
|
* 3) Compile the code with full optimization. Many compilers
|
|
* generate unreasonably bad code before the optimizer tightens
|
|
* things up. If the results are unreasonably good, on the
|
|
* other hand, the optimizer might be too smart for me!
|
|
*
|
|
* Try compiling with:
|
|
* cc -O stream_omp.c -o stream_omp
|
|
*
|
|
* This is known to work on Cray, SGI, IBM, and Sun machines.
|
|
*
|
|
*
|
|
* 4) Mail the results to mccalpin@cs.virginia.edu
|
|
* Be sure to include:
|
|
* a) computer hardware model number and software revision
|
|
* b) the compiler flags
|
|
* c) all of the output from the test case.
|
|
* Thanks!
|
|
*
|
|
}
|
|
|
|
const HLINE = '-------------------------------------------------------------';
|
|
|
|
inf = 1/0;
|
|
|
|
var a,b,c:array[0..N+OFFSET-1] of double;
|
|
|
|
avgtime:array[0..3] of double = (0,0,0,0);
|
|
maxtime:array[0..3] of double = (0,0,0,0);
|
|
mintime:array[0..3] of double = (inf,inf,inf,inf);
|
|
|
|
labels:array[0..3] of string[16]= ('Copy:',
|
|
'Scale:',
|
|
'Add:',
|
|
'Triad:');
|
|
|
|
bytes:array[0..3] of cardinal = (
|
|
2 * sizeof(double) * N,
|
|
2 * sizeof(double) * N,
|
|
3 * sizeof(double) * N,
|
|
3 * sizeof(double) * N
|
|
);
|
|
|
|
const M=20;
|
|
|
|
function min(a,b:longint):longint;inline;
|
|
|
|
begin
|
|
if a>b then
|
|
min:=b
|
|
else
|
|
min:=a;
|
|
end;
|
|
|
|
function max(a,b:longint):longint;inline;
|
|
|
|
begin
|
|
if a>b then
|
|
max:=a
|
|
else
|
|
max:=b;
|
|
end;
|
|
|
|
function min(a,b:double):double;inline;
|
|
|
|
begin
|
|
if a>b then
|
|
min:=b
|
|
else
|
|
min:=a;
|
|
end;
|
|
|
|
function max(a,b:double):double;inline;
|
|
|
|
begin
|
|
if a>b then
|
|
max:=a
|
|
else
|
|
max:=b;
|
|
end;
|
|
|
|
procedure tuned_STREAM_Copy;
|
|
|
|
var j:longint;
|
|
|
|
begin
|
|
for j:=0 to N-1 do
|
|
c[j]:=a[j];
|
|
end;
|
|
|
|
procedure tuned_STREAM_Scale(scalar:double);
|
|
|
|
var j:longint;
|
|
|
|
begin
|
|
for j:=0 to N-1 do
|
|
b[j]:=scalar*c[j];
|
|
end;
|
|
|
|
procedure tuned_STREAM_Add;
|
|
|
|
var j:longint;
|
|
|
|
begin
|
|
for j:=0 to N-1 do
|
|
c[j]:=a[j]+b[j];
|
|
end;
|
|
|
|
procedure tuned_STREAM_Triad(scalar:double);
|
|
|
|
var j:longint;
|
|
|
|
begin
|
|
for j:=0 to N-1 do
|
|
a[j]:=b[j]+scalar*c[j];
|
|
end;
|
|
|
|
{$ifdef unix}
|
|
{$define have_mysecond}
|
|
function mysecond:double;
|
|
|
|
var tp:timeval;
|
|
tzp:timezone;
|
|
|
|
begin
|
|
fpgettimeofday(@tp,@tzp);
|
|
mysecond:=double(tp.tv_sec)+double(tp.tv_usec)*1e-6;
|
|
end;
|
|
{$endif}
|
|
|
|
{$ifdef windows}
|
|
{$define have_mysecond}
|
|
function mysecond:double;
|
|
|
|
begin
|
|
mysecond:=gettickcount*1e-3;
|
|
end;
|
|
{$endif}
|
|
|
|
{$ifndef have_mysecond}
|
|
{$error Please implement a mysecond for your platform.}
|
|
{$endif}
|
|
|
|
function checktick:longint;
|
|
|
|
var i,minDelta,Delta:longint;
|
|
t1,t2:double;
|
|
timesfound:array[0..M-1] of double;
|
|
|
|
begin
|
|
{ Collect a sequence of M unique time values from the system. }
|
|
for i:=0 to M-1 do
|
|
begin
|
|
t1:=mysecond;
|
|
t2:=t1;
|
|
while t2-t1<1E-6 do
|
|
t2:=mysecond;
|
|
t1:=t2;
|
|
timesfound[i]:=t1;
|
|
end;
|
|
|
|
{
|
|
* Determine the minimum difference between these M values.
|
|
* This result will be our estimate (in microseconds) for the
|
|
* clock granularity.
|
|
}
|
|
|
|
minDelta:=1000000;
|
|
for i:=1 to M-1 do
|
|
begin
|
|
Delta:=trunc(1E6*(timesfound[i]-timesfound[i-1]));
|
|
minDelta:=MIN(minDelta,MAX(Delta,0));
|
|
end;
|
|
|
|
checktick:=minDelta;
|
|
end;
|
|
|
|
procedure checkSTREAMresults;
|
|
|
|
var aj,bj,cj,scalar:double;
|
|
asum,bsum,csum:double;
|
|
epsilon:double;
|
|
j,k:longint;
|
|
|
|
begin
|
|
{ reproduce initialization }
|
|
aj:=1;
|
|
bj:=2;
|
|
cj:=0;
|
|
{ a[] is modified during timing check }
|
|
aj:=2*aj;
|
|
{ now execute timing loop }
|
|
scalar:=3;
|
|
for k:=0 to NTIMES-1 do
|
|
begin
|
|
cj:=aj;
|
|
bj:=scalar*cj;
|
|
cj:=aj+bj;
|
|
aj:=bj+scalar*cj;
|
|
end;
|
|
aj:=aj*N;
|
|
bj:=bj*N;
|
|
cj:=cj*N;
|
|
|
|
asum:=0;
|
|
bsum:=0;
|
|
csum:=0;
|
|
for j:=0 to N-1 do
|
|
begin
|
|
asum:=asum+a[j];
|
|
bsum:=bsum+b[j];
|
|
csum:=csum+c[j];
|
|
end;
|
|
{$ifdef VERBOSE}
|
|
writeln('Results Comparison: ');
|
|
writeln(' Expected : ',aj,' ',bj,' ',cj);
|
|
writeln(' Observed : ',asum,' ',bsum,' ',csum);
|
|
{$endif}
|
|
|
|
epsilon:=1e-8;
|
|
|
|
if abs(aj-asum)/asum>epsilon then
|
|
begin
|
|
writeln('Failed Validation on array a');
|
|
writeln(' Expected : ',aj);
|
|
writeln(' Observed : ',asum);
|
|
end
|
|
else if abs(bj-bsum)/bsum>epsilon then
|
|
begin
|
|
writeln('Failed Validation on array b');
|
|
writeln(' Expected : ',bj);
|
|
writeln(' Observed : ',bsum);
|
|
end
|
|
else if abs(cj-csum)/csum>epsilon then
|
|
begin
|
|
writeln('Failed Validation on array c');
|
|
writeln(' Expected : ',cj);
|
|
writeln(' Observed : ',csum);
|
|
end
|
|
else
|
|
writeln('Solution Validates');
|
|
end;
|
|
|
|
var quantum:longint;
|
|
BytesPerWord:longint;
|
|
j,k:longint;
|
|
scalar,t:double;
|
|
times:array[0..3,0..NTIMES-1] of double;
|
|
|
|
begin
|
|
{ --- SETUP --- determine precision and check timing --- }
|
|
writeln(HLINE);
|
|
writeln('STREAM version Revision: 5.6');
|
|
writeln(HLINE);
|
|
BytesPerWord:=sizeof(double);
|
|
writeln('This system uses ',BytesPerWord,' bytes per DOUBLE PRECISION word.');
|
|
|
|
writeln(HLINE);
|
|
writeln('Array size = ',N,', Offset = ',OFFSET);
|
|
writeln('Total memory required = ',3*BytesPerWord*(N/1048576),' MB.');
|
|
writeln('Each test is run ',NTIMES,' times, but only');
|
|
writeln('the *best* time for each is used.');
|
|
|
|
writeln(HLINE);
|
|
writeln('writelning one line per active thread....');
|
|
|
|
{ Get initial value for system clock. }
|
|
for j:=0 to N-1 do
|
|
begin
|
|
a[j]:=1;
|
|
b[j]:=2;
|
|
c[j]:=0;
|
|
end;
|
|
|
|
writeln(HLINE);
|
|
|
|
quantum:=checktick;
|
|
if quantum>=1 then
|
|
writeln('Your clock granularity/precision appears to be ',quantum,
|
|
' microseconds.')
|
|
else
|
|
writeln('Your clock granularity appears to be '+
|
|
'less than one microsecond.');
|
|
|
|
t:=mysecond;
|
|
for j:=0 to N-1 do
|
|
a[j]:=2*a[j];
|
|
t:=1E6*(mysecond-t);
|
|
|
|
writeln('Each test below will take on the order of ',t,
|
|
' microseconds.');
|
|
writeln(' (= ',t/quantum,' clock ticks)');
|
|
writeln('Increase the size of the arrays if this shows that');
|
|
writeln('you are not getting at least 20 clock ticks per test.');
|
|
|
|
writeln(HLINE);
|
|
|
|
writeln('WARNING -- The above is only a rough guideline.');
|
|
writeln('For best results, please be sure you know the');
|
|
writeln('precision of your system timer.');
|
|
writeln(HLINE);
|
|
|
|
{ --- MAIN LOOP --- repeat test cases NTIMES times --- }
|
|
|
|
scalar:=3;
|
|
for k:=0 to NTIMES-1 do
|
|
begin
|
|
times[0,k]:=mysecond();
|
|
{$ifdef TUNED}
|
|
tuned_STREAM_Copy();
|
|
{$else}
|
|
for j:=0 to N-1 do
|
|
c[j]:=a[j];
|
|
{$endif}
|
|
times[0,k]:=mysecond-times[0,k];
|
|
|
|
times[1,k]:=mysecond;
|
|
{$ifdef TUNED}
|
|
tuned_STREAM_Scale(scalar);
|
|
{$else}
|
|
for j:=0 to N-1 do
|
|
b[j]:=scalar*c[j];
|
|
{$endif}
|
|
times[1,k]:=mysecond-times[1,k];
|
|
times[2,k]:=mysecond;
|
|
{$ifdef TUNED}
|
|
tuned_STREAM_Add();
|
|
{$else}
|
|
for j:=0 to N-1 do
|
|
c[j]:=a[j]+b[j];
|
|
{$endif}
|
|
times[2,k]:=mysecond-times[2,k];
|
|
times[3,k]:=mysecond;
|
|
{$ifdef TUNED}
|
|
tuned_STREAM_Triad(scalar);
|
|
{$else}
|
|
for j:=0 to N-1 do
|
|
a[j]:=b[j]+scalar*c[j];
|
|
{$endif}
|
|
times[3,k]:=mysecond-times[3,k];
|
|
end;
|
|
|
|
{ --- SUMMARY --- }
|
|
for k:=1 to NTIMES-1 do { note -- skip first iteration }
|
|
for j:=0 to 3 do
|
|
begin
|
|
avgtime[j]:=avgtime[j] + times[j,k];
|
|
mintime[j]:=MIN(mintime[j], times[j,k]);
|
|
maxtime[j]:=MAX(maxtime[j], times[j,k]);
|
|
end;
|
|
|
|
writeln('Function Rate (MB/s) Avg time Min time Max time');
|
|
for j:=0 to 3 do
|
|
begin
|
|
avgtime[j]:=avgtime[j]/(NTIMES-1);
|
|
writeln(labels[j]:11,
|
|
1E-6*bytes[j]/mintime[j]:11:4,
|
|
avgtime[j]:11:4,
|
|
mintime[j]:11:4,
|
|
maxtime[j]:11:4);
|
|
end;
|
|
writeln(HLINE);
|
|
|
|
{ --- Check Results --- }
|
|
checkSTREAMresults;
|
|
writeln(HLINE);
|
|
end.
|