LazStats: More general usability of the calculations in DescriptiveUnit
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7739 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
parent
a38794b4f8
commit
6f5659920f
@ -1563,6 +1563,11 @@
|
||||
<DebugInfoType Value="dsDwarf2"/>
|
||||
</Debugging>
|
||||
<LinkSmart Value="True"/>
|
||||
<Options>
|
||||
<Win32>
|
||||
<GraphicApplication Value="True"/>
|
||||
</Win32>
|
||||
</Options>
|
||||
</Linking>
|
||||
</CompilerOptions>
|
||||
<Debugging>
|
||||
|
@ -15,48 +15,52 @@ type
|
||||
|
||||
{ TDescriptiveStats }
|
||||
|
||||
TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion);
|
||||
TDescriptiveOptions = set of TDescriptiveOption;
|
||||
TQuartileMethod = 1..8;
|
||||
TQuartile = 1..3;
|
||||
TQuartiles = array[TQuartileMethod, TQuartile] of Double;
|
||||
|
||||
TDescriptiveStats = class
|
||||
private
|
||||
FDataGrid: TStringGrid;
|
||||
FColIndex: Integer;
|
||||
FConfLevel: Double; // usually 0.95
|
||||
FColsSelected: IntDyneVec;
|
||||
FMean, FStdErrorMean, FDeltaMean: Double;
|
||||
FValues: DblDyneVec;
|
||||
FMean, FStdErrorMean: Double;
|
||||
FMin, FMax: Double;
|
||||
FSum: Double;
|
||||
FVariance, FStdDev: Double;
|
||||
FSkew, FStdErrorSkew: Double;
|
||||
FKurtosis, FStdErrorKurtosis: Double;
|
||||
FFirstQuartile, FMedian, FThirdQuartile: Double;
|
||||
FCategoryValues, FPercentiles: DblDyneVec;
|
||||
FFreqValues: IntDyneVec;
|
||||
FOptions: TDescriptiveOptions;
|
||||
FNumCases: Integer;
|
||||
FQuartiles: array[TQuartileMethod, TQuartile] of Double;
|
||||
procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec);
|
||||
function Calc_DeltaMean(AStdErrorOfMean: Double): Double;
|
||||
procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double;
|
||||
out M2, M3, M4: Double);
|
||||
procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double);
|
||||
procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer;
|
||||
out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
|
||||
procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
|
||||
out ASum, ASumOfSquares, AMin, AMax: Double);
|
||||
procedure CollectValues(out AValues: DblDyneVec);
|
||||
procedure PercentileRank(const AValues: DblDyneVec;
|
||||
out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
|
||||
procedure Calc_Quartiles;
|
||||
procedure Calc_Skew_Kurtosis;
|
||||
// function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double;
|
||||
function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
|
||||
procedure Reset;
|
||||
public
|
||||
constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double);
|
||||
procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
|
||||
procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings);
|
||||
constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil);
|
||||
procedure Analyze(AColIndex: Integer);
|
||||
procedure CalcQuartiles(out AQuartiles: TQuartiles);
|
||||
procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
|
||||
|
||||
property FirstQuartile: Double read FFirstQuartile;
|
||||
property Kurtosis: Double read FKurtosis;
|
||||
property Max: Double read FMax;
|
||||
property Median: Double read FMedian;
|
||||
property Mean: Double read FMean;
|
||||
property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits;
|
||||
property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits;
|
||||
property Min: Double read FMin;
|
||||
property NumCases: Integer read FNumCases;
|
||||
property Skew: Double read FSkew;
|
||||
property StdErrorSkew: Double read FStdErrorSkew;
|
||||
property StdDev: Double read FStdDev;
|
||||
// more can be added...
|
||||
property StdErrorKurtosis: Double read FStdErrorKurtosis;
|
||||
property StdErrorMean: Double read FStdErrorMean;
|
||||
property StdErrorSkey: Double read FStdErrorSkew;
|
||||
property ThirdQuartile: Double read FThirdQuartile;
|
||||
property Variance: Double read FVariance;
|
||||
end;
|
||||
|
||||
|
||||
@ -90,6 +94,7 @@ type
|
||||
|
||||
private
|
||||
function GetReportFrame(APageIndex: Integer): TReportFrame;
|
||||
procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings);
|
||||
procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec;
|
||||
AMean, AStdDev: Double);
|
||||
|
||||
@ -113,7 +118,7 @@ implementation
|
||||
|
||||
uses
|
||||
Math,
|
||||
Utils;
|
||||
Utils, MathUnit, GridProcs;
|
||||
|
||||
|
||||
{===============================================================================
|
||||
@ -125,218 +130,149 @@ uses
|
||||
*==============================================================================}
|
||||
|
||||
constructor TDescriptiveStats.Create(ADataGrid: TStringGrid;
|
||||
AColsSelected: IntDyneVec; AConfLevel: Double);
|
||||
AColsSelected: IntDyneVec = nil);
|
||||
begin
|
||||
inherited Create;
|
||||
FDataGrid := ADataGrid;
|
||||
FColsSelected := AColsSelected;
|
||||
FConfLevel := AConfLevel;
|
||||
Reset;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
|
||||
var
|
||||
SS: Double;
|
||||
values: DblDyneVec;
|
||||
M2, M3, M4: Double;
|
||||
procedure TDescriptiveStats.Analyze(AColIndex: Integer);
|
||||
begin
|
||||
FMean := NaN;
|
||||
FVariance := NaN;
|
||||
FStdDev := NaN;
|
||||
FStdErrorMean := NaN;
|
||||
FDeltaMean := NaN;
|
||||
FSkew := NaN;
|
||||
FStdErrorSkew := NaN;
|
||||
Reset;
|
||||
|
||||
FColIndex := AColIndex;
|
||||
FOptions := AOptions;
|
||||
|
||||
CollectValues(values);
|
||||
FNumCases := Length(values);
|
||||
if Length(FColsSelected) = 0 then
|
||||
FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
|
||||
else
|
||||
FValues := CollectValues(FDataGrid, AColIndex);
|
||||
FNumCases := Length(FValues);
|
||||
|
||||
SortOnX(values);
|
||||
SortOnX(FValues);
|
||||
|
||||
Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax);
|
||||
if FNumCases > 0 then begin
|
||||
FMean := FSum / FNumCases;
|
||||
if FNumCases > 1 then
|
||||
MathUnit.Calc_MaxMin(FValues, FMax, FMin);
|
||||
MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev);
|
||||
|
||||
if FNumCases > 1 then
|
||||
FStdErrorMean := sqrt(FVariance / FNumCases);
|
||||
|
||||
Calc_Skew_Kurtosis;
|
||||
Calc_Quartiles;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Quartiles;
|
||||
begin
|
||||
FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues);
|
||||
FMedian := Quartiles(2, 0.5, FNumCases, FValues);
|
||||
FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues);
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles);
|
||||
var
|
||||
q: TQuartile;
|
||||
m: TQuartileMethod;
|
||||
begin
|
||||
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
|
||||
for q := Low(TQuartile) to High(TQuartile) do
|
||||
AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues);
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Skew_Kurtosis;
|
||||
|
||||
procedure Moments(out M2, M3, M4: Double);
|
||||
var
|
||||
i: Integer;
|
||||
dev, devSqr: Double;
|
||||
begin
|
||||
M2 := 0;
|
||||
M3 := 0;
|
||||
M4 := 0;
|
||||
for i := 0 to High(FValues) do
|
||||
begin
|
||||
FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1);
|
||||
FStdDev := sqrt(FVariance);
|
||||
FStdErrorMean := sqrt(FVariance / FNumCases);
|
||||
FDeltaMean := Calc_DeltaMean(FStdErrorMean);
|
||||
dev := FValues[i] - FMean;
|
||||
devSqr := Sqr(dev);
|
||||
M2 := M2 + devSqr;
|
||||
M3 := M3 + dev * devSqr;
|
||||
M4 := M4 + sqr(devSqr);
|
||||
end;
|
||||
end;
|
||||
|
||||
Calc_Moments(values, FMean, M2, M3, M4);
|
||||
Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis);
|
||||
var
|
||||
num, denom: Double;
|
||||
stdDev3, stdDev4: Double;
|
||||
M2, M3, M4: Double;
|
||||
begin
|
||||
FSkew := NaN;
|
||||
FStdErrorSkew := NaN;
|
||||
FKurtosis := NaN;
|
||||
FStdErrorKurtosis := NaN;
|
||||
|
||||
Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile);
|
||||
if FNumCases < 2 then
|
||||
exit;
|
||||
|
||||
if (doAlternativeQuartiles in FOptions) then
|
||||
Calc_AlternativeQuartiles(values);
|
||||
if (doPercentileRanks in FOptions) then
|
||||
PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues);
|
||||
stdDev3 := FStdDev * FStdDev * FStdDev;
|
||||
stdDev4 := StdDev3 * FStdDev;
|
||||
|
||||
Moments(M2, M3, M4);
|
||||
|
||||
if FNumCases > 2 then
|
||||
begin
|
||||
FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3);
|
||||
|
||||
num := 6.0 * FNumCases * (FNumCases - 1);
|
||||
denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3);
|
||||
FStdErrorSkew := sqrt(num / denom);
|
||||
end;
|
||||
|
||||
if FNumCases > 3 then
|
||||
begin
|
||||
num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1);
|
||||
denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4;
|
||||
FKurtosis := num / denom;
|
||||
|
||||
num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew);
|
||||
denom := (FNumCases - 3) * (FNumCases + 5);
|
||||
FStdErrorKurtosis := sqrt(num / denom);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec);
|
||||
var
|
||||
nCases: Integer;
|
||||
q: TQuartile;
|
||||
m: TQuartileMethod;
|
||||
begin
|
||||
nCases := Length(AValues);
|
||||
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
|
||||
for q := Low(TQuartile) to High(TQuartile) do
|
||||
FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues);
|
||||
end;
|
||||
|
||||
|
||||
// Tolerance around the mean
|
||||
function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double;
|
||||
function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
|
||||
var
|
||||
alpha: Double;
|
||||
confLev: Double;
|
||||
DOF: Integer;
|
||||
delta: Double;
|
||||
begin
|
||||
alpha := (1 - FConfLevel) / 2;
|
||||
alpha := (1 - AConfLevel) / 2;
|
||||
confLev := 1 - alpha;
|
||||
|
||||
if FNumCases < 120 then
|
||||
begin
|
||||
DOF := FNumCases - 1;
|
||||
Result := AStdErrorOfMean * InverseT(confLev, DOF);
|
||||
delta := FStdErrorMean * InverseT(confLev, DOF);
|
||||
end else
|
||||
Result := AStdErrorOfMean * InverseZ(confLev);
|
||||
end;
|
||||
delta := FStdErrorMean * InverseZ(confLev);
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec;
|
||||
AMean: Double; out M2, M3, M4: Double);
|
||||
var
|
||||
i: Integer;
|
||||
dev, devSqr: Double;
|
||||
begin
|
||||
M2 := 0;
|
||||
M3 := 0;
|
||||
M4 := 0;
|
||||
for i := 0 to High(AValues) do
|
||||
begin
|
||||
dev := AValues[i] - AMean;
|
||||
devSqr := Sqr(dev);
|
||||
M2 := M2 + devSqr;
|
||||
M3 := M3 + dev * devSqr;
|
||||
M4 := M4 + sqr(devSqr);
|
||||
case AIndex of
|
||||
0: Result := FMean - delta;
|
||||
1: Result := FMean + delta;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec;
|
||||
out Q1, Median, Q3: Double);
|
||||
var
|
||||
n: Integer;
|
||||
begin
|
||||
n := Length(AValues);
|
||||
Q1 := Quartiles(2, 0.25, n, AValues);
|
||||
Median := Quartiles(2, 0.5, n, AValues);
|
||||
Q3 := Quartiles(2, 0.75, n, AValues);
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double;
|
||||
ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
|
||||
var
|
||||
num, denom: Double;
|
||||
stdDev3, stdDev4: Double;
|
||||
begin
|
||||
Skew := NaN;
|
||||
StdErrorSkew := NaN;
|
||||
Kurtosis := NaN;
|
||||
StdErrorKurtosis := NaN;
|
||||
|
||||
stdDev3 := StdDev * StdDev * StdDev;
|
||||
stdDev4 := StdDev3 * StdDev;
|
||||
|
||||
if ANumCases > 2 then
|
||||
begin
|
||||
Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3);
|
||||
|
||||
num := 6.0 * ANumCases * (ANumCases - 1);
|
||||
denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3);
|
||||
StdErrorSkew := sqrt(num / denom);
|
||||
end;
|
||||
|
||||
if ANumCases > 3 then
|
||||
begin
|
||||
num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1);
|
||||
denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4;
|
||||
Kurtosis := num / denom;
|
||||
|
||||
num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew);
|
||||
denom := (ANumCases - 3) * (ANumCases + 5);
|
||||
StdErrorKurtosis := sqrt(num / denom);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
|
||||
out ASum, ASumOfSquares, AMin, AMax: Double);
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
ASum := 0.0;
|
||||
ASumOfSquares := 0;
|
||||
AMin := Infinity;
|
||||
AMax := -Infinity;
|
||||
|
||||
for i := 0 to High(AValues) do
|
||||
begin
|
||||
ASum := ASum + AValues[i];
|
||||
ASumOfSquares := ASumOfSquares + sqr(AValues[i]);
|
||||
if AValues[i] < AMin then AMin := AValues[i];
|
||||
if AValues[i] > AMax then AMax := AValues[i];
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec);
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
AValues := nil; // silence the compiler
|
||||
SetLength(AValues, NoCases);
|
||||
n := 0;
|
||||
for i := 1 to NoCases do
|
||||
begin
|
||||
if (doCasewiseDeletion in FOptions) then
|
||||
begin
|
||||
// Do not consider a case when any variable is empty
|
||||
if not ValidValue(i, FColIndex) then
|
||||
continue;
|
||||
end else
|
||||
begin
|
||||
// Do not consider a case when the current variable is empty
|
||||
if not GoodRecord(i, Length(FColsSelected), FColsSelected) then
|
||||
continue;
|
||||
end;
|
||||
|
||||
if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then
|
||||
inc(n)
|
||||
else
|
||||
raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"',
|
||||
[FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]);
|
||||
end;
|
||||
SetLength(AValues, n);
|
||||
end;
|
||||
|
||||
|
||||
// Computes the percentile ranks of values stored in the data grid at the
|
||||
// loaded columns. The values are assumed to be sorted.
|
||||
procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out
|
||||
ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
|
||||
procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec;
|
||||
out AFreq: IntDyneVec);
|
||||
var
|
||||
i, nCases, iCat, nCategories: Integer;
|
||||
i, iCat, nCategories: Integer;
|
||||
lastCategoryValue: Double;
|
||||
cumFreqCentered: Double;
|
||||
cumFreq: Integer;
|
||||
@ -346,24 +282,23 @@ begin
|
||||
AFreq := nil;
|
||||
APercentiles := nil;
|
||||
|
||||
nCases := Length(AValues);
|
||||
SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later
|
||||
SetLength(AFreq, nCases);
|
||||
SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later
|
||||
SetLength(AFreq, FNumCases);
|
||||
|
||||
// Get count of unique values and frequencies of each
|
||||
lastCategoryValue := AValues[0];
|
||||
lastCategoryValue := FValues[0];
|
||||
ACategoryValues[0] := lastCategoryValue;
|
||||
AFreq[0] := 1;
|
||||
iCat := 0;
|
||||
for i := 1 to nCases-1 do
|
||||
for i := 1 to FNumCases-1 do
|
||||
begin
|
||||
if (lastCategoryValue = AValues[i]) then
|
||||
if (lastCategoryValue = FValues[i]) then
|
||||
AFreq[iCat] := AFreq[iCat] + 1
|
||||
else
|
||||
begin // new value
|
||||
inc(iCat);
|
||||
AFreq[iCat] := 1;
|
||||
lastCategoryValue := AValues[i];
|
||||
lastCategoryValue := FValues[i];
|
||||
ACategoryValues[iCat] := lastCategoryValue;
|
||||
end;
|
||||
end;
|
||||
@ -375,94 +310,32 @@ begin
|
||||
|
||||
// Get cumulative frequencies and percentile ranks
|
||||
SetLength(APercentiles, nCategories);
|
||||
APercentiles[0] := AFreq[0] * 0.5 / nCases;
|
||||
APercentiles[0] := AFreq[0] * 0.5 / FNumCases;
|
||||
cumFreq := AFreq[0];
|
||||
for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1
|
||||
begin
|
||||
cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point
|
||||
APercentiles[i] := cumFreqCentered / nCases;
|
||||
APercentiles[i] := cumFreqCentered / FNumCases;
|
||||
cumFreq := cumFreq + AFreq[i];
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer;
|
||||
AReport: TStrings);
|
||||
var
|
||||
w: Integer;
|
||||
nCategories: Integer;
|
||||
i: Integer;
|
||||
cumFreq: Integer;
|
||||
m: TQuartileMethod;
|
||||
procedure TDescriptiveStats.Reset;
|
||||
begin
|
||||
w := 10 + ADecPlaces - 3;
|
||||
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
|
||||
AReport.Add('');
|
||||
AReport.Add('Number of cases: %*d', [W, FNumCases]);
|
||||
AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]);
|
||||
AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]);
|
||||
AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]);
|
||||
AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]);
|
||||
AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]);
|
||||
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
|
||||
FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]);
|
||||
AReport.Add('');
|
||||
AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]);
|
||||
AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]);
|
||||
AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]);
|
||||
AReport.Add('');
|
||||
AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]);
|
||||
AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]);
|
||||
AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]);
|
||||
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]);
|
||||
AReport.Add('');
|
||||
AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]);
|
||||
AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]);
|
||||
AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]);
|
||||
AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]);
|
||||
|
||||
if (doAlternativeQuartiles in FOptions) then
|
||||
begin
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
|
||||
AReport.Add('');
|
||||
AReport.Add('Method First Quartile Median Third Quartile');
|
||||
AReport.Add('------ -------------- ---------- --------------');
|
||||
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
|
||||
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]);
|
||||
AReport.Add('');
|
||||
AReport.Add('NOTES:');
|
||||
AReport.Add('Method 1 is the weighted average at X[np] where ');
|
||||
AReport.Add(' n is no. of cases, p is percentile / 100');
|
||||
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
|
||||
AReport.Add('Method 3 is the empirical distribution function.');
|
||||
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
|
||||
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
|
||||
AReport.Add('Method 6 is the closest observation method.');
|
||||
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
|
||||
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
|
||||
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
|
||||
end;
|
||||
|
||||
if (doPercentileRanks in FOptions) then
|
||||
begin
|
||||
nCategories := Length(FCategoryValues);
|
||||
cumFreq := 0;
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
AReport.Add('PERCENTILE RANKS');
|
||||
AReport.Add('');
|
||||
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
|
||||
AReport.Add('----------- --------- --------- ---------------');
|
||||
for i := 0 to nCategories-1 do
|
||||
begin
|
||||
cumFreq := cumFreq + FFreqValues[i];
|
||||
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
|
||||
FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0
|
||||
]);
|
||||
end;
|
||||
end;
|
||||
FValues := nil;
|
||||
FMean := NaN;
|
||||
FStdErrorMean := NaN;
|
||||
FMin := NaN;
|
||||
FMax := NaN;
|
||||
FVariance := NaN;
|
||||
FStdDev := NaN;
|
||||
FStdErrorMean := NaN;
|
||||
FSkew := NaN;
|
||||
FStdErrorSkew := NaN;
|
||||
FKurtosis := NaN;
|
||||
FStdErrorKurtosis := NaN;
|
||||
FNumCases := 0;
|
||||
end;
|
||||
|
||||
|
||||
@ -514,8 +387,7 @@ var
|
||||
page: TTabSheet;
|
||||
reportFrame: TReportFrame;
|
||||
lReport: TStrings;
|
||||
lDescrStats: TDescriptiveStats;
|
||||
options: TDescriptiveOptions;
|
||||
lStats: TDescriptiveStats;
|
||||
begin
|
||||
noSelected := SelList.Items.Count;
|
||||
if noSelected = 0 then
|
||||
@ -555,29 +427,23 @@ begin
|
||||
for i := 0 to NoSelected-1 do
|
||||
PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0];
|
||||
|
||||
// Prepare options
|
||||
options := [];
|
||||
if PercentileChk.Checked then Include(options, doPercentileRanks);
|
||||
if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles);
|
||||
if CaseChk.Checked then Include(options, doCasewiseDeletion);
|
||||
|
||||
//
|
||||
lReport := TStringList.Create;
|
||||
lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100);
|
||||
if not CaseChk.Checked then
|
||||
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid)
|
||||
else
|
||||
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected);
|
||||
try
|
||||
for i := 0 to noSelected-1 do
|
||||
begin
|
||||
// Analyze the data and get descriptive stats
|
||||
lDescrStats.Analyze(selected[i], options);
|
||||
lStats.Analyze(selected[i]);
|
||||
|
||||
// Store z values, (value - mean) / stdDev, to grid, if needed
|
||||
zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev);
|
||||
zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev);
|
||||
|
||||
// Write descriptive stats to report
|
||||
lReport.Clear;
|
||||
lReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
|
||||
lReport.Add('');
|
||||
lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]),
|
||||
DecPlacesEdit.Value, lReport);
|
||||
WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport);
|
||||
|
||||
// Display report in the page of the variable
|
||||
reportFrame := GetReportFrame(i);
|
||||
@ -585,7 +451,7 @@ begin
|
||||
end;
|
||||
finally
|
||||
// Clean up
|
||||
lDescrStats.Free;
|
||||
lStats.Free;
|
||||
lReport.Free;
|
||||
end;
|
||||
end;
|
||||
@ -651,7 +517,6 @@ begin
|
||||
|
||||
for i := PageControl.PageCount-1 downto 1 do
|
||||
PageControl.Pages[i].Free;
|
||||
|
||||
PageControl.Pages[0].Caption := 'Report';
|
||||
|
||||
CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT);
|
||||
@ -698,12 +563,16 @@ var
|
||||
F: TReportFrame;
|
||||
begin
|
||||
inherited;
|
||||
|
||||
for i := 0 to PageControl.PageCount-1 do
|
||||
(*
|
||||
for i := PageControl.PageCount-1 downto 0 do
|
||||
begin
|
||||
F := GetReportFrame(i);
|
||||
if Assigned(F) then F.Clear;
|
||||
end;
|
||||
if i > 0 then
|
||||
PageControl.Pages[i].Free
|
||||
else begin
|
||||
F := GetReportFrame(i);
|
||||
if Assigned(F) then F.Clear;
|
||||
end;
|
||||
end; *)
|
||||
|
||||
lSelected := false;
|
||||
for i := 0 to VarList.Items.Count-1 do
|
||||
@ -733,6 +602,103 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats;
|
||||
AVarName: String; AReport: TStrings);
|
||||
var
|
||||
w: Integer;
|
||||
i: Integer;
|
||||
confLevel: Double;
|
||||
decPlaces: Integer;
|
||||
m: TQuartileMethod;
|
||||
Q: TQuartiles;
|
||||
cumFreq: Integer;
|
||||
nCategories: Integer;
|
||||
categories: DblDyneVec = nil;
|
||||
freq: IntDyneVec = nil;
|
||||
percentiles: DblDyneVec = nil;
|
||||
begin
|
||||
confLevel := StrToFloat(CIEdit.Text) / 100;
|
||||
decPlaces := DecPlacesEdit.Value;
|
||||
w := 10 + decPlaces - 3;
|
||||
|
||||
AReport.Clear;
|
||||
|
||||
AReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
|
||||
AReport.Add('');
|
||||
|
||||
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
|
||||
AReport.Add('');
|
||||
AReport.Add('Number of cases: %*d', [W, Stats.NumCases]);
|
||||
// AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]);
|
||||
AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]);
|
||||
AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]);
|
||||
AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]);
|
||||
AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]);
|
||||
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
|
||||
confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel]
|
||||
]);
|
||||
AReport.Add('');
|
||||
AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]);
|
||||
AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]);
|
||||
AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]);
|
||||
AReport.Add('');
|
||||
AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]);
|
||||
AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]);
|
||||
AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]);
|
||||
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]);
|
||||
AReport.Add('');
|
||||
AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]);
|
||||
AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]);
|
||||
AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]);
|
||||
AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]);
|
||||
|
||||
if AllQuartilesChk.Checked then
|
||||
begin
|
||||
Stats.CalcQuartiles(Q);
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
|
||||
AReport.Add('');
|
||||
AReport.Add('Method First Quartile Median Third Quartile');
|
||||
AReport.Add('------ -------------- ---------- --------------');
|
||||
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
|
||||
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]);
|
||||
AReport.Add('');
|
||||
AReport.Add('NOTES:');
|
||||
AReport.Add('Method 1 is the weighted average at X[np] where ');
|
||||
AReport.Add(' n is no. of cases, p is percentile / 100');
|
||||
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
|
||||
AReport.Add('Method 3 is the empirical distribution function.');
|
||||
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
|
||||
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
|
||||
AReport.Add('Method 6 is the closest observation method.');
|
||||
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
|
||||
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
|
||||
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
|
||||
end;
|
||||
|
||||
if PercentileChk.Checked then
|
||||
begin
|
||||
Stats.PercentileRank(categories, percentiles, freq);
|
||||
nCategories := Length(categories);
|
||||
cumFreq := 0;
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
AReport.Add('PERCENTILE RANKS');
|
||||
AReport.Add('');
|
||||
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
|
||||
AReport.Add('----------- --------- --------- ---------------');
|
||||
for i := 0 to nCategories-1 do
|
||||
begin
|
||||
cumFreq := cumFreq + freq[i];
|
||||
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
|
||||
categories[i], freq[i], cumFreq, percentiles[i]*100.0
|
||||
]);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer;
|
||||
const AColsSelected: IntDyneVec; AMean, AStdDev: Double);
|
||||
var
|
||||
@ -759,10 +725,10 @@ begin
|
||||
begin
|
||||
if CaseChk.Checked then
|
||||
begin
|
||||
if not ValidValue(i, AColsSelected[AColIndex]) then continue;
|
||||
if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue;
|
||||
end
|
||||
else
|
||||
if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
|
||||
if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
|
||||
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]);
|
||||
zValue := (value - AMean) / AStdDev;
|
||||
|
@ -41,6 +41,8 @@ type
|
||||
LHelpPath: String;
|
||||
end;
|
||||
|
||||
ELazStats = class(Exception);
|
||||
|
||||
var
|
||||
NoCases : integer;
|
||||
NoVariables : integer;
|
||||
|
@ -9,7 +9,7 @@ uses
|
||||
Globals, DictionaryUnit;
|
||||
|
||||
function CollectValues(AGrid: TStringGrid; AColIndex: Integer;
|
||||
AColCheck: IntDyneVec): DblDyneVec;
|
||||
AColCheck: IntDyneVec = nil): DblDyneVec;
|
||||
|
||||
procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer;
|
||||
const AColCheck: IntDyneVec; out AMin, AMax: Double);
|
||||
@ -42,6 +42,7 @@ uses
|
||||
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
|
||||
var
|
||||
row, n: Integer;
|
||||
val: Double;
|
||||
begin
|
||||
SetLength(Result, AGrid.RowCount);
|
||||
n := 0;
|
||||
@ -54,7 +55,11 @@ begin
|
||||
begin
|
||||
if not GoodRecord(AGrid, row, AColCheck) then continue;
|
||||
end;
|
||||
Result[n] := StrToFloat(trim(AGrid.Cells[AColIndex, row]));
|
||||
if TryStrToFloat(trim(AGrid.Cells[AColIndex, row]), val) then
|
||||
Result[n] := val
|
||||
else
|
||||
raise ELazStats.CreateFmt('Non-numeric string "%s" in column %d, row %d',
|
||||
[AGrid.Cells[AColIndex, row], AColIndex, row]);
|
||||
inc(n);
|
||||
end;
|
||||
SetLength(Result, n);
|
||||
|
@ -7,7 +7,8 @@ unit MathUnit;
|
||||
interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils;
|
||||
Classes, SysUtils,
|
||||
Globals;
|
||||
|
||||
const
|
||||
TWO_PI = 2.0 * PI;
|
||||
@ -43,6 +44,13 @@ function FactorialLn(n: Integer): Double;
|
||||
function PoissonPDF(n: integer; a: double): Double;
|
||||
function PoissonCDF(n: Integer; a: double): Double;
|
||||
|
||||
|
||||
procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
|
||||
procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
|
||||
procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
|
||||
procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
|
||||
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
@ -474,6 +482,70 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
|
||||
{===============================================================================
|
||||
* Vector-based calculations
|
||||
===============================================================================}
|
||||
procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
AMin := Infinity;
|
||||
AMax := -Infinity;
|
||||
for i := Low(AData) to High(AData) do
|
||||
begin
|
||||
if AData[i] < AMin then AMin := AData[i];
|
||||
if AData[i] > AMax then AMax := AData[i];
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
|
||||
var
|
||||
variance: Double;
|
||||
begin
|
||||
Calc_MeanVarStdDev(AData, AMean, variance, AStdDev);
|
||||
end;
|
||||
|
||||
|
||||
procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
|
||||
var
|
||||
sum, ss: Double;
|
||||
n: Integer;
|
||||
begin
|
||||
AMean := NaN;
|
||||
AVariance := NaN;
|
||||
AStdDev := NaN;
|
||||
|
||||
n := Length(AData);
|
||||
if n = 0 then
|
||||
exit;
|
||||
|
||||
Calc_SumSS(AData, sum, ss);
|
||||
|
||||
AMean := sum / n;
|
||||
if n = 1 then
|
||||
exit;
|
||||
|
||||
AVariance := ((ss - sqr(AMean)) / n) / (n - 1);
|
||||
AStdDev := sqrt(AVariance);
|
||||
end;
|
||||
|
||||
|
||||
procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
Sum := 0;
|
||||
SS := 0;
|
||||
for i := Low(AData) to High(AData) do
|
||||
begin
|
||||
Sum := Sum + AData[i];
|
||||
SS := SS + sqr(AData[i]);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
initialization
|
||||
InitFactLn();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user