/*	descriptivestatistics.js		12-31-2008		JavaScript
	Copyright (C)2008 Steven Whitney.
	Initially published by http://25yearsofprogramming.com.

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License (GPL)
	Version 3 as published by the Free Software Foundation.
	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.
	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

Given an input set of values, it calculates and returns statistics describing the set.

It returns an associative array whose members can be accessed as either array elements or properties.
Examples: Stats["Count"] or Stats.Count
In the unlikely situation of iterating its members, only 
for(i in array) seems to work properly.
for(i = 0 ; i < array.length ; i++) 
apparently does not iterate associative elements, nor even recognize their existence.
	
*/
//----------------------------------------------------------------------------------------------
// Input must be an array with the data items in its successive elements.
// The return value is an associative array with the stats in its elements.
function DescriptiveStatistics(Input)
{
var Raw = new Array();					// an array for holding only the legitimate numeric values from Input
var Stats = new Array();				// the array for holding the statistics
var AllowGHMean = true;					// any input <= 0 makes calculation of the Geometric and Harmonic means invalid
Stats["Count"] = 0;						// N, the number of values in the input list
Stats["SumX"] = 0;						// Sum of all the X input values
Stats["SumX2"] = 0;						// Each X is squared, then all the squares are summed
Stats["Minimum"] = Number.NaN;			// Lowest value encountered
Stats["Maximum"] = Number.NaN;			// Highest value encountered
Stats["Range"] = Number.NaN;			// Highest - Lowest
Stats["StdDevPop"] = Number.NaN;		// Population Standard Deviation (N weighting)
Stats["StdDevEst"] = Number.NaN;		// Estimated Standard Deviation from sampled data (N-1 weighting)
Stats["ArithMean"] = Number.NaN;		// Arithmetic mean (average)
Stats["HarmonicMean"] = Number.NaN;		// Harmonic mean
Stats["GeometricMean"] = Number.NaN;	// Geometric mean
Stats["Median"] = Number.NaN;			// Median, C50, midpoint. Half the values fall above/below this value.
Stats["Modes"] = new Array();			// Modes, most frequent input value(s). It is an array because there can be > 1 mode.
Stats["ModeFrequency"] = 0;				// Number of occurrences of the modal value.
Stats["IsOk"] = true;					// True only if all input values were successfully parsed as numbers. 

// Could do this in two passes for better "numerical stability", 
// although lack of significant digits is hardly a likely problem.
// Pass 1: transfer the data from Input to Raw, then sort Raw from smallest absolute value to largest.
// Pass 2: do the math calculations 
var x, i, tally;
for(i = 0 ; i < Input.length ; i++)
{
	x = parseFloat(Input[i]);
	if(isNaN(x))
	{
		// Since failed values are ignored, the stats might be ok even if this flag is set, but this is a warning.
		Stats["IsOk"] = false;
	}
	else
	{
		Raw.push(x);
		if(x <= 0)
			AllowGHMean = false;
		// Delay initializing Min and Max until now so they remain NaN if there are no valid numbers in Input array.
		if(Stats["Count"] == 0)	
		{
			Stats["Minimum"] = Number.MAX_VALUE;
			Stats["Maximum"] = -(Number.MAX_VALUE);
		}
		Stats["Count"]++;
		Stats["SumX"] += x;
		Stats["SumX2"] += x * x;
		Stats["Minimum"] = Math.min(Stats["Minimum"],x);
		Stats["Maximum"] = Math.max(Stats["Maximum"],x);
	}
}
if(Stats["Count"] > 0)	
{
	Raw.sort(function(l,r){return l - r;});	// sort numerically for mode and median calculations

	Stats["Range"] = Stats["Maximum"] - Stats["Minimum"];
	Stats["ArithMean"] = Stats["SumX"] / Stats["Count"];
	// Pop calculation is always valid. If N==1, Pop and Est are both 0. If N>1, value of Est gets overwritten later.
	Stats["StdDevEst"] = Stats["StdDevPop"] = Math.sqrt((Stats["Count"] * Stats["SumX2"]) - (Stats["SumX"] * Stats["SumX"])) / Stats["Count"];
	Stats["Median"] = Raw[0];	// default value, for Count == 1; will be overridden if Count > 1
	
	if(AllowGHMean == true)
	{
		// Harmonic mean calculation 
		x = 0;
		for(i = 0 ; i < Stats["Count"] ; i++)
			x += (1 / Raw[i]);
		Stats["HarmonicMean"] = Stats["Count"] / x;
		
		// Geometric mean calculation 
		x = 1;
		for(i = 0 ; i < Stats["Count"] ; i++)
			x *= Math.pow(Raw[i], 1 / Stats["Count"]);	// this calc avoids math overflow
		Stats["GeometricMean"] = x;
	}

	// Mode calculation. Allows for multimodal data sets.
	x = Raw[0];								// each number encountered, initialized to first element
	tally = 1;								// tallies frequency of each; first element occurs at least once.
	for(i = 1 ; i < Stats["Count"] ; i++)
	{
		if(Raw[i] == x)							// if it's another occurrence,
			tally++;							// just increment the counter
		else                        			// else if we hit a new #,
		{										// first decide if the old number is a mode candidate.
			if(tally == Stats["ModeFrequency"])	// if tally is a tie, add number to the modes list
				Stats["Modes"].push(x);   		
			if(tally > Stats["ModeFrequency"])	// if there is a new higher frequency,
			{
				Stats["Modes"].length = 0;		// delete all previous mode candidates
				Stats["Modes"].push(x);   		// add this one to the list
				Stats["ModeFrequency"] = tally;	// and update the highest count counter
			}
			x = Raw[i];   						// now start tallying the new number
			tally = 1;							// it has already occurred once
		}
	}
	if(tally == Stats["ModeFrequency"])		// final check: maybe the last # was also a potential mode
		Stats["Modes"].push(x);   		
	if(tally > Stats["ModeFrequency"])		
	{
		Stats["Modes"].length = 0;
		Stats["Modes"].push(x);
		Stats["ModeFrequency"] = tally;
	}
}
if(Stats["Count"] > 1)	
{
	// Mode, continued: if there was only 1 input value, it's ok to let it be the mode,
	// but if there were multiple input values, minimum frequency for the mode is 2.
	if(Stats["ModeFrequency"] < 2)		
	{
		Stats["Modes"].length = 0;		// No legitimate mode found.
		Stats["ModeFrequency"] = 0;		// No occurrences.
	}

	// Estimated Standard Deviation is only valid when Count > 1, to avoid divide by zero.
	Stats["StdDevEst"] = 
		Math.sqrt(((Stats["Count"] * Stats["SumX2"]) - (Stats["SumX"] * Stats["SumX"])) / (Stats["Count"] * (Stats["Count"] - 1)));

	// Median calculation (midpoint of data points)
	i = Math.floor(Stats["Count"] / 2);		// in JavaScript, must explicitly truncate to integer
	if((Stats["Count"] % 2) == 1)			// if Count is odd, the center point is known
		Stats["Median"] = Raw[i];	
	else									// if Count is even, interpolate to get a "center" point
		Stats["Median"] = (Raw[i - 1] + Raw[i]) / 2;
}
return Stats;	
}
//----------------------------------------------------------------------------------------------

