require'more_math'moduleMoreMath# This class is used to contain elements and compute various statistical# values for them.classSequencedefinitialize(elements)@elements=elements@elements.freezeend# Returns the array of elements.attr_reader:elements# Calls the +block+ for every element of this Sequence.defeach(&block)@elements.each(&block)endincludeEnumerable# Returns true if this sequence is empty, otherwise false.defempty?@elements.empty?end# Returns the number of elements, on which the analysis is based.defsize@elements.sizeend# Returns the variance of the elements.defvariance@variance||=sum_of_squares/sizeend# Returns the sample_variance of the elements.defsample_variance@sample_variance||=size>1?sum_of_squares/(size-1.0):0.0end# Returns the sum of squares (the sum of the squared deviations) of the# elements.defsum_of_squares@sum_of_squares||=@elements.inject(0.0){|s,t|s+(t-arithmetic_mean)**2}end# Returns the standard deviation of the elements.defstandard_deviation@sample_deviation||=Math.sqrt(variance)end# Returns the standard deviation of the elements in percentage of the# arithmetic mean.defstandard_deviation_percentage@standard_deviation_percentage||=100.0*standard_deviation/arithmetic_meanend# Returns the sample standard deviation of the elements.defsample_standard_deviation@sample_standard_deviation||=Math.sqrt(sample_variance)end# Returns the sample standard deviation of the elements in percentage# of the arithmetic mean.defsample_standard_deviation_percentage@sample_standard_deviation_percentage||=100.0*sample_standard_deviation/arithmetic_meanend# Returns the sum of all elements.defsum@sum||=@elements.inject(0.0){|s,t|s+t}end# Returns the arithmetic mean of the elements.defarithmetic_mean@arithmetic_mean||=sum/sizeendaliasmeanarithmetic_mean# Returns the harmonic mean of the elements. If any of the elements# is less than or equal to 0.0, this method returns NaN.defharmonic_mean@harmonic_mean||=(sum=@elements.inject(0.0){|s,t|ift>0s+1.0/telsebreaknilend}sum?size/sum:0/0.0)end# Returns the geometric mean of the elements. If any of the# elements is less than 0.0, this method returns NaN.defgeometric_mean@geometric_mean||=(sum=@elements.inject(0.0){|s,t|casewhent>0s+Math.log(t)whent==0break:nullelsebreaknilend}casesumwhen:null0.0whenFloatMath.exp(sum/size)else0/0.0end)end# Returns the minimum of the elements.defmin@min||=@elements.minend# Returns the maximum of the elements.defmax@max||=@elements.maxend# Returns the +p+-percentile of the elements.# There are many methods to compute the percentile, this method uses the# the weighted average at x_(n + 1)p, which allows p to be in 0...100# (excluding the 100).defpercentile(p=50)(0...100).include?(p)orraiseArgumentError,"p = #{p}, but has to be in (0...100)"p/=100.0@sorted||=@elements.sortr=p*(@sorted.size+1)r_i=r.to_ir_f=r-r_iifr_i>=1result=@sorted[r_i-1]ifr_i<@sorted.sizeresult+=r_f*(@sorted[r_i]-@sorted[r_i-1])endelseresult=@sorted[0]endresultendaliasmedianpercentile# Use an approximation of the Welch-Satterthwaite equation to compute the# degrees of freedom for Welch's t-test.defcompute_welch_df(other)(sample_variance/size+other.sample_variance/other.size)**2/((sample_variance**2/(size**2*(size-1)))+(other.sample_variance**2/(other.size**2*(other.size-1))))end# Returns the t value of the Welch's t-test between this Sequence# instance and the +other+.deft_welch(other)signal=arithmetic_mean-other.arithmetic_meannoise=Math.sqrt(sample_variance/size+other.sample_variance/other.size)signal/noiserescueErrno::EDOM0.0end# Returns an estimation of the common standard deviation of the# elements of this and +other+.defcommon_standard_deviation(other)Math.sqrt(common_variance(other))end# Returns an estimation of the common variance of the elements of this# and +other+.defcommon_variance(other)(size-1)*sample_variance+(other.size-1)*other.sample_variance/(size+other.size-2)end# Compute the # degrees of freedom for Student's t-test.defcompute_student_df(other)size+other.size-2end# Returns the t value of the Student's t-test between this Sequence# instance and the +other+.deft_student(other)signal=arithmetic_mean-other.arithmetic_meannoise=common_standard_deviation(other)*Math.sqrt(size**-1+size**-1)rescueErrno::EDOM0.0end# Compute a sample size, that will more likely yield a mean difference# between this instance's elements and those of +other+. Use +alpha+# and +beta+ as levels for the first- and second-order errors.defsuggested_sample_size(other,alpha=0.05,beta=0.05)alpha,beta=alpha.abs,beta.abssignal=arithmetic_mean-other.arithmetic_meandf=size+other.size-2pooled_variance_estimate=(sum_of_squares+other.sum_of_squares)/dftd=TDistribution.newdf(((td.inverse_probability(alpha)+td.inverse_probability(beta))*Math.sqrt(pooled_variance_estimate))/signal)**2end# Return true, if the Sequence instance covers the +other+, that is their# arithmetic mean value is most likely to be equal for the +alpha+ error# level.defcover?(other,alpha=0.05)t=t_welch(other)td=TDistribution.new(compute_welch_df(other))t.abs<td.inverse_probability(1-alpha.abs/2.0)end# Return the confidence interval for the arithmetic mean with alpha level +alpha+ of# the elements of this Sequence instance as a Range object.defconfidence_interval(alpha=0.05)td=TDistribution.new(size-1)t=td.inverse_probability(alpha/2).absdelta=t*sample_standard_deviation/Math.sqrt(size)(arithmetic_mean-delta)..(arithmetic_mean+delta)end# Returns the array of autovariances (of length size - 1).defautovarianceArray.new(size-1)do|k|s=0.00.upto(size-k-1)do|i|s+=(@elements[i]-arithmetic_mean)*(@elements[i+k]-arithmetic_mean)ends/sizeendend# Returns the array of autocorrelation values c_k / c_0 (of length size -# 1).defautocorrelationc=autovarianceArray.new(c.size){|k|c[k]/c[0]}end# Returns the d-value for the Durbin-Watson statistic. The value is d << 2# for positive, d >> 2 for negative and d around 2 for no autocorrelation.defdurbin_watson_statistice=linear_regression.residuese.size<=1andreturn2.0(1...e.size).inject(0.0){|s,i|s+(e[i]-e[i-1])**2}/e.inject(0.0){|s,x|s+x**2}end# Returns the q value of the Ljung-Box statistic for the number of lags# +lags+. A higher value might indicate autocorrelation in the elements of# this Sequence instance. This method returns nil if there weren't enough# (at least lags) lags available.defljung_box_statistic(lags=20)r=autocorrelationlags>=r.sizeandreturnn=sizen*(n+2)*(1..lags).inject(0.0){|s,i|s+r[i]**2/(n-i)}end# This method tries to detect autocorrelation with the Ljung-Box# statistic. If enough lags can be considered it returns a hash with# results, otherwise nil is returned. The keys are# :lags:: the number of lags,# :alpha_level:: the alpha level for the test,# :q:: the value of the ljung_box_statistic,# :p:: the p-value computed, if p is higher than alpha no correlation was detected,# :detected:: true if a correlation was found.defdetect_autocorrelation(lags=20,alpha_level=0.05)ifq=ljung_box_statistic(lags)p=ChiSquareDistribution.new(lags).probability(q)return{:lags=>lags,:alpha_level=>alpha_level,:q=>q,:p=>p,:detected=>p>=1-alpha_level,}endend# Return a result hash with the number of :very_low, :low, :high, and# :very_high outliers, determined by the box plotting algorithm run with# :median and :iqr parameters. If no outliers were found or the iqr is# less than epsilon, nil is returned.defdetect_outliers(factor=3.0,epsilon=1E-5)half_factor=factor/2.0quartile1=percentile(25)quartile3=percentile(75)iqr=quartile3-quartile1iqr<epsilonandreturnresult=@elements.inject(Hash.new(0))do|h,t|extreme=casetwhen-Infinity..(quartile1-factor*iqr):very_lowwhen(quartile1-factor*iqr)..(quartile1-half_factor*iqr):lowwhen(quartile1+half_factor*iqr)..(quartile3+factor*iqr):highwhen(quartile3+factor*iqr)..Infinity:very_highendandh[extreme]+=1hendunlessresult.empty?result[:median]=medianresult[:iqr]=iqrresult[:factor]=factorresultendend# Returns the LinearRegression object for the equation a * x + b which# represents the line computed by the linear regression algorithm.deflinear_regression@linear_regression||=LinearRegression.new@elementsend# Returns a Histogram instance with +bins+ as the number of bins for this# analysis' elements.defhistogram(bins)Histogram.new(self,bins)endendend