1# pdf_stat.tcl --
2#
3#    Collection of procedures for evaluating probability and
4#    cumulative density functions
5#    Part of "math::statistics"
6#
7#    january 2008: added procedures by Eric Kemp Benedict for
8#                  Gamma, Poisson and t-distributed variables.
9#                  Replacing some older versions.
10#
11
12# ::math::statistics --
13#   Namespace holding the procedures and variables
14#
15namespace eval ::math::statistics {
16
17    namespace export pdf-normal pdf-uniform \
18	    pdf-exponential \
19	    cdf-normal cdf-uniform \
20	    cdf-exponential \
21	    cdf-students-t \
22	    random-normal random-uniform \
23	    random-exponential \
24	    histogram-uniform \
25	    pdf-gamma pdf-poisson pdf-chisquare pdf-students-t pdf-beta \
26	    cdf-gamma cdf-poisson cdf-chisquare cdf-beta \
27	    random-gamma random-poisson random-chisquare random-students-t random-beta \
28	    incompleteGamma incompleteBeta
29
30    variable cdf_normal_prob     {}
31    variable cdf_normal_x        {}
32    variable cdf_toms322_cached  {}
33    variable initialised_cdf     0
34    variable twopi               [expr {2.0*acos(-1.0)}]
35    variable pi                  [expr {acos(-1.0)}]
36}
37
38
39# pdf-normal --
40#    Return the probabilities belonging to a normal distribution
41#
42# Arguments:
43#    mean     Mean of the distribution
44#    stdev    Standard deviation
45#    x        Value for which the probability must be determined
46#
47# Result:
48#    Probability of value x under the given distribution
49#
50proc ::math::statistics::pdf-normal { mean stdev x } {
51    variable NEGSTDEV
52    variable factorNormalPdf
53
54    if { $stdev <= 0.0 } {
55	return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
56    }
57
58    set xn   [expr {($x-$mean)/$stdev}]
59    set prob [expr {exp(-$xn*$xn/2.0)/$stdev/$factorNormalPdf}]
60
61    return $prob
62}
63
64
65# pdf-uniform --
66#    Return the probabilities belonging to a uniform distribution
67#    (parameters as minimum/maximum)
68#
69# Arguments:
70#    pmin      Minimum of the distribution
71#    pmax      Maximum of the distribution
72#    x         Value for which the probability must be determined
73#
74# Result:
75#    Probability of value x under the given distribution
76#
77proc ::math::statistics::pdf-uniform { pmin pmax x } {
78
79    if { $pmin >= $pmax } {
80	return -code error -errorcode ARG \
81		-errorinfo "Wrong order or zero range" \
82		"Wrong order or zero range"
83    }
84
85    set prob [expr {1.0/($pmax-$min)}]
86
87    if { $x < $pmin || $x > $pmax } { return 0.0 }
88
89    return $prob
90}
91
92
93# pdf-exponential --
94#    Return the probabilities belonging to an exponential
95#    distribution
96#
97# Arguments:
98#    mean     Mean of the distribution
99#    x        Value for which the probability must be determined
100#
101# Result:
102#    Probability of value x under the given distribution
103#
104proc ::math::statistics::pdf-exponential { mean x } {
105    variable NEGSTDEV
106    variable OUTOFRANGE
107
108    if { $stdev <= 0.0 } {
109	return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
110    }
111    if { $mean <= 0.0 } {
112	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
113		"$OUTOFRANGE: mean must be positive"
114    }
115
116    if { $x < 0.0 } { return 0.0 }
117    if { $x > 700.0*$mean } { return 0.0 }
118
119    set prob [expr {exp(-$x/$mean)/$mean}]
120
121    return $prob
122}
123
124
125# cdf-normal --
126#    Return the cumulative probability belonging to a normal distribution
127#
128# Arguments:
129#    mean     Mean of the distribution
130#    stdev    Standard deviation
131#    x        Value for which the probability must be determined
132#
133# Result:
134#    Cumulative probability of value x under the given distribution
135#
136proc ::math::statistics::cdf-normal { mean stdev x } {
137    variable NEGSTDEV
138
139    if { $stdev <= 0.0 } {
140	return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
141    }
142
143    set xn    [expr {($x-$mean)/$stdev}]
144    set prob1 [Cdf-toms322 1 5000 [expr {$xn*$xn}]]
145    if { $xn > 0.0 } {
146	set prob [expr {0.5+0.5*$prob1}]
147    } else {
148	set prob [expr {0.5-0.5*$prob1}]
149    }
150
151    return $prob
152}
153
154
155# cdf-students-t --
156#    Return the cumulative probability belonging to the
157#    Student's t distribution
158#
159# Arguments:
160#    degrees  Number of degrees of freedom
161#    x        Value for which the probability must be determined
162#
163# Result:
164#    Cumulative probability of value x under the given distribution
165#
166proc ::math::statistics::cdf-students-t { degrees x } {
167
168    if { $degrees <= 0 } {
169	return -code error -errorcode ARG -errorinfo \
170		"Number of degrees of freedom must be positive" \
171		"Number of degrees of freedom must be positive"
172    }
173
174    set prob1 [Cdf-toms322 1 $degrees [expr {$x*$x}]]
175    set prob  [expr {0.5+0.5*$prob1}]
176
177    return $prob
178}
179
180
181# cdf-uniform --
182#    Return the cumulative probabilities belonging to a uniform
183#    distribution (parameters as minimum/maximum)
184#
185# Arguments:
186#    pmin      Minimum of the distribution
187#    pmax      Maximum of the distribution
188#    x         Value for which the probability must be determined
189#
190# Result:
191#    Cumulative probability of value x under the given distribution
192#
193proc ::math::statistics::cdf-uniform { pmin pmax x } {
194
195    if { $pmin >= $pmax } {
196	return -code error -errorcode ARG \
197		-errorinfo "Wrong order or zero range" \
198	    }
199
200    set prob [expr {($x-$pmin)/($pmax-$min)}]
201
202    if { $x < $pmin } { return 0.0 }
203    if { $x > $pmax } { return 1.0 }
204
205    return $prob
206}
207
208
209# cdf-exponential --
210#    Return the cumulative probabilities belonging to an exponential
211#    distribution
212#
213# Arguments:
214#    mean     Mean of the distribution
215#    x        Value for which the probability must be determined
216#
217# Result:
218#    Cumulative probability of value x under the given distribution
219#
220proc ::math::statistics::cdf-exponential { mean x } {
221    variable NEGSTDEV
222    variable OUTOFRANGE
223
224    if { $mean <= 0.0 } {
225	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
226		"$OUTOFRANGE: mean must be positive"
227    }
228
229    if { $x <  0.0 } { return 0.0 }
230    if { $x > 30.0*$mean } { return 1.0 }
231
232    set prob [expr {1.0-exp(-$x/$mean)}]
233
234    return $prob
235}
236
237
238# Inverse-cdf-uniform --
239#    Return the argument belonging to the cumulative probability
240#    for a uniform distribution (parameters as minimum/maximum)
241#
242# Arguments:
243#    pmin      Minimum of the distribution
244#    pmax      Maximum of the distribution
245#    prob      Cumulative probability for which the "x" value must be
246#              determined
247#
248# Result:
249#    X value that gives the cumulative probability under the
250#    given distribution
251#
252proc ::math::statistics::Inverse-cdf-uniform { pmin pmax prob } {
253
254    if {0} {
255	if { $pmin >= $pmax } {
256	    return -code error -errorcode ARG \
257		    -errorinfo "Wrong order or zero range" \
258		    "Wrong order or zero range"
259	}
260    }
261
262    set x [expr {$pmin+$prob*($pmax-$pmin)}]
263
264    if { $x < $pmin } { return $pmin }
265    if { $x > $pmax } { return $pmax }
266
267    return $x
268}
269
270
271# Inverse-cdf-exponential --
272#    Return the argument belonging to the cumulative probability
273#    for an exponential distribution
274#
275# Arguments:
276#    mean      Mean of the distribution
277#    prob      Cumulative probability for which the "x" value must be
278#              determined
279#
280# Result:
281#    X value that gives the cumulative probability under the
282#    given distribution
283#
284proc ::math::statistics::Inverse-cdf-exponential { mean prob } {
285
286    if {0} {
287	if { $mean <= 0.0 } {
288	    return -code error -errorcode ARG \
289		    -errorinfo "Mean must be positive" \
290		    "Mean must be positive"
291	}
292    }
293
294    set x [expr {-$mean*log(1.0-$prob)}]
295
296    return $x
297}
298
299
300# Inverse-cdf-normal --
301#    Return the argument belonging to the cumulative probability
302#    for a normal distribution
303#
304# Arguments:
305#    mean      Mean of the distribution
306#    stdev     Standard deviation of the distribution
307#    prob      Cumulative probability for which the "x" value must be
308#              determined
309#
310# Result:
311#    X value that gives the cumulative probability under the
312#    given distribution
313#
314proc ::math::statistics::Inverse-cdf-normal { mean stdev prob } {
315    variable cdf_normal_prob
316    variable cdf_normal_x
317
318    variable initialised_cdf
319    if { $initialised_cdf == 0 } {
320       Initialise-cdf-normal
321    }
322
323    # Look for the proper probability level first,
324    # then interpolate
325    #
326    # Note: the numerical data are connected to the length of
327    #       the lists - see Initialise-cdf-normal
328    #
329    set size 32
330    set idx  64
331    for { set i 0 } { $i <= 7 } { incr i } {
332	set upper [lindex $cdf_normal_prob $idx]
333	if { $prob > $upper } {
334	    set idx  [expr {$idx+$size}]
335	} else {
336	    set idx  [expr {$idx-$size}]
337	}
338	set size [expr {$size/2}]
339    }
340    #
341    # We have found a value that is close to the one we need,
342    # now find the enclosing interval
343    #
344    if { $upper < $prob } {
345	incr idx
346    }
347    set p1 [lindex $cdf_normal_prob [expr {$idx-1}]]
348    set p2 [lindex $cdf_normal_prob $idx]
349    set x1 [lindex $cdf_normal_x    [expr {$idx-1}]]
350    set x2 [lindex $cdf_normal_x    $idx           ]
351
352    set x  [expr {$x1+($x2-$x1)*($prob-$p1)/($p2-$p1)}]
353
354    return [expr {$mean+$stdev*$x}]
355}
356
357
358# Initialise-cdf-normal --
359#    Initialise the private data for the normal cdf
360#
361# Arguments:
362#    None
363# Result:
364#    None
365# Side effect:
366#    Variable cdf_normal_prob and cdf_normal_x are filled
367#    so that we can use these as a look-up table
368#
369proc ::math::statistics::Initialise-cdf-normal { } {
370    variable cdf_normal_prob
371    variable cdf_normal_x
372
373    variable initialised_cdf
374    set initialised_cdf 1
375
376    set dx [expr {10.0/128.0}]
377
378    set cdf_normal_prob 0.5
379    set cdf_normal_x    0.0
380    for { set i 1 } { $i <= 64 } { incr i } {
381	set x    [expr {$i*$dx}]
382	if { $x != 0.0 } {
383	    set prob [Cdf-toms322 1 5000 [expr {$x*$x}]]
384	} else {
385	    set prob 0.0
386	}
387
388	set cdf_normal_x    [concat [expr {-$x}] $cdf_normal_x $x]
389	set cdf_normal_prob \
390		[concat [expr {0.5-0.5*$prob}] $cdf_normal_prob \
391		[expr {0.5+0.5*$prob}]]
392    }
393}
394
395
396# random-uniform --
397#    Return a list of random numbers satisfying a uniform
398#    distribution (parameters as minimum/maximum)
399#
400# Arguments:
401#    pmin      Minimum of the distribution
402#    pmax      Maximum of the distribution
403#    number    Number of values to generate
404#
405# Result:
406#    List of random numbers
407#
408proc ::math::statistics::random-uniform { pmin pmax number } {
409
410    if { $pmin >= $pmax } {
411	return -code error -errorcode ARG \
412		-errorinfo "Wrong order or zero range" \
413		"Wrong order or zero range"
414    }
415
416    set result {}
417    for { set i 0 }  {$i < $number } { incr i } {
418	lappend result [Inverse-cdf-uniform $pmin $pmax [expr {rand()}]]
419    }
420
421    return $result
422}
423
424
425# random-exponential --
426#    Return a list of random numbers satisfying an exponential
427#    distribution
428#
429# Arguments:
430#    mean      Mean of the distribution
431#    number    Number of values to generate
432#
433# Result:
434#    List of random numbers
435#
436proc ::math::statistics::random-exponential { mean number } {
437
438    if { $mean <= 0.0 } {
439	return -code error -errorcode ARG \
440		-errorinfo "Mean must be positive" \
441		"Mean must be positive"
442    }
443
444    set result {}
445    for { set i 0 }  {$i < $number } { incr i } {
446	lappend result [Inverse-cdf-exponential $mean [expr {rand()}]]
447    }
448
449    return $result
450}
451
452
453# random-normal --
454#    Return a list of random numbers satisfying a normal
455#    distribution
456#
457# Arguments:
458#    mean      Mean of the distribution
459#    stdev     Standard deviation of the distribution
460#    number    Number of values to generate
461#
462# Result:
463#    List of random numbers
464#
465# Note:
466#    This version uses the Box-Muller transformation,
467#    a quick and robust method for generating normally-
468#    distributed numbers.
469#
470proc ::math::statistics::random-normal { mean stdev number } {
471    variable twopi
472
473    if { $stdev <= 0.0 } {
474	return -code error -errorcode ARG \
475		-errorinfo "Standard deviation must be positive" \
476		"Standard deviation must be positive"
477    }
478
479#    set result {}
480#    for { set i 0 }  {$i < $number } { incr i } {
481#        lappend result [Inverse-cdf-normal $mean $stdev [expr {rand()}]]
482#    }
483
484    set result {}
485
486    for { set i 0 }  {$i < $number } { incr i 2 } {
487        set angle [expr {$twopi * rand()}]
488        set rad   [expr {sqrt(-2.0*log(rand()))}]
489        set xrand [expr {$rad * cos($angle)}]
490        set yrand [expr {$rad * sin($angle)}]
491        lappend result [expr {$mean + $stdev * $xrand}]
492        if { $i < $number-1 } {
493            lappend result [expr {$mean + $stdev * $yrand}]
494        }
495    }
496
497    return $result
498}
499
500
501# Cdf-toms322 --
502#    Calculate the cumulative density function for several distributions
503#    according to TOMS322
504#
505# Arguments:
506#    m         First number of degrees of freedom
507#    n         Second number of degrees of freedom
508#    x         Value for which the cdf must be calculated
509#
510# Result:
511#    Cumulatve density at x - details depend on distribution
512#
513# Notes:
514#    F-ratios:
515#        m - degrees of freedom for numerator
516#        n - degrees of freedom for denominator
517#        x - F-ratio
518#    Student's t (two-tailed):
519#        m - 1
520#        n - degrees of freedom
521#        x - square of t
522#    Normal deviate (two-tailed):
523#        m - 1
524#        n - 5000
525#        x - square of deviate
526#    Chi-square:
527#        m - degrees of freedom
528#        n - 5000
529#        x - chi-square/m
530#    The original code can be found at <http://www.netlib.org>
531#
532proc ::math::statistics::Cdf-toms322 { m n x } {
533    if { $x == 0.0 } {
534        return 0.0
535    }
536    set m [expr {$m < 300?  int($m) : 300}]
537    set n [expr {$n < 5000? int($n) : 5000}]
538    if { $m < 1 || $n < 1 } {
539	return -code error -errorcode ARG \
540		-errorinfo "Arguments m anf n must be greater/equal 1"
541    }
542
543    set a [expr {2*($m/2)-$m+2}]
544    set b [expr {2*($n/2)-$n+2}]
545    set w [expr {$x*double($m)/double($n)}]
546    set z [expr {1.0/(1.0+$w)}]
547
548    if { $a == 1 } {
549	if { $b == 1 } {
550	    set p [expr {sqrt($w)}]
551	    set y 0.3183098862
552	    set d [expr {$y*$z/$p}]
553	    set p [expr {2.0*$y*atan($p)}]
554	} else {
555	    set p [expr {sqrt($w*$z)}]
556	    set d [expr {$p*$z/(2.0*$w)}]
557	}
558    } else {
559	if { $b == 1 } {
560	    set p [expr {sqrt($z)}]
561	    set d [expr {$z*$p/2.0}]
562	    set p [expr {1.0-$p}]
563	} else {
564	    set d [expr {$z*$z}]
565	    set p [expr {$z*$w}]
566	}
567    }
568
569    set y [expr {2.0*$w/$z}]
570
571    if { $a == 1 } {
572	for { set j [expr {$b+2}] } { $j <= $n } { incr j 2 } {
573	    set d [expr {(1.0+double($a)/double($j-2)) * $d*$z}]
574	    set p [expr {$p+$d*$y/double($j-1)}]
575	}
576    } else {
577	set power [expr {($n-1)/2}]
578	set zk    [expr {pow($z,$power)}]
579	set d     [expr {($d*$zk*$n)/$b}]
580	set p     [expr {$p*$zk + $w*$z * ($zk-1.0)/($z-1.0)}]
581    }
582
583    set y [expr {$w*$z}]
584    set z [expr {2.0/$z}]
585    set b [expr {$n-2}]
586
587    for { set i [expr {$a+2}] } { $i <= $m } { incr i 2 } {
588	set j [expr {$i+$b}]
589	set d [expr {$y*$d*double($j)/double($i-2)}]
590	set p [expr {$p-$z*$d/double($j)}]
591    }
592    set prob $p
593    if  { $prob < 0.0 } { set prob 0.0 }
594    if  { $prob > 1.0 } { set prob 1.0 }
595
596    return $prob
597}
598
599
600# Inverse-cdf-toms322 --
601#    Return the argument belonging to the cumulative probability
602#    for an F, chi-square or t distribution
603#
604# Arguments:
605#    m         First number of degrees of freedom
606#    n         Second number of degrees of freedom
607#    prob      Cumulative probability for which the "x" value must be
608#              determined
609#
610# Result:
611#    X value that gives the cumulative probability under the
612#    given distribution
613#
614# Note:
615#    See the procedure Cdf-toms322 for more details
616#
617proc ::math::statistics::Inverse-cdf-toms322 { m n prob } {
618    variable cdf_toms322_cached
619    variable OUTOFRANGE
620
621    if { $prob <= 0 || $prob >= 1 } {
622	return -code error -errorcode $OUTOFRANGE $OUTOFRANGE
623    }
624
625    # Is the combination in cache? Then we can simply rely
626    # on that
627    #
628    foreach {m1 n1 prob1 x1} $cdf_toms322_cached {
629	if { $m1 == $m && $n1 == $n && $prob1 == $prob } {
630	    return $x1
631	}
632    }
633
634    #
635    # Otherwise first find a value of x for which Cdf(x) exceeds prob
636    #
637    set x1  1.0
638    set dx1 1.0
639    while { [Cdf-toms322 $m $n $x1] < $prob } {
640	set x1  [expr {$x1+$dx1}]
641	set dx1 [expr {2.0*$dx1}]
642    }
643
644    #
645    # Now, look closer
646    #
647    while { $dx1 > 0.0001 } {
648	set p1 [Cdf-toms322 $m $n $x1]
649	if { $p1 > $prob } {
650	    set x1  [expr {$x1-$dx1}]
651	} else {
652	    set x1  [expr {$x1+$dx1}]
653	}
654	set dx1 [expr {$dx1/2.0}]
655    }
656
657    #
658    # Cache the result
659    #
660    set last end
661    if { [llength $cdf_toms322_cached] > 27 } {
662	set last 26
663    }
664    set cdf_toms322_cached \
665	    [concat [list $m $n $prob $x1] [lrange $cdf_toms322_cached 0 $last]]
666
667    return $x1
668}
669
670
671# HistogramMake --
672#    Distribute the "observations" according to the cdf
673#
674# Arguments:
675#    cdf-values   Values for the cdf (relative number of observations)
676#    number       Total number of "observations" in the histogram
677#
678# Result:
679#    List of numbers, distributed over the buckets
680#
681proc ::math::statistics::HistogramMake { cdf-values number } {
682
683    set assigned  0
684    set result    {}
685    set residue   0.0
686    foreach cdfv $cdf-values {
687	set sum      [expr {$number*($cdfv + $residue)}]
688	set bucket   [expr {int($sum)}]
689	set residue  [expr {$sum-$bucket}]
690	set assigned [expr {$assigned-$bucket}]
691	lappend result $bucket
692    }
693    set remaining [expr {$number-$assigned}]
694    if { $remaining > 0 } {
695	lappend result $remaining
696    } else {
697	lappend result 0
698    }
699
700    return $result
701}
702
703
704# histogram-uniform --
705#    Return the expected histogram for a uniform distribution
706#
707# Arguments:
708#    min       Minimum the distribution
709#    max       Maximum the distribution
710#    limits    upper limits for the histogram buckets
711#    number    Total number of "observations" in the histogram
712#
713# Result:
714#    List of expected number of observations
715#
716proc ::math::statistics::histogram-uniform { min max limits number } {
717    if { $min >= $max } {
718	return -code error -errorcode ARG \
719		-errorinfo "Wrong order or zero range" \
720		"Wrong order or zero range"
721    }
722
723    set cdf_result {}
724    foreach limit $limits {
725	lappend cdf_result [cdf-uniform $min $max $limit]
726    }
727
728    return [HistogramMake $cdf_result $number]
729}
730
731
732# incompleteGamma --
733#     Evaluate the incomplete Gamma function Gamma(p,x)
734#
735# Arguments:
736#     x         X-value
737#     p         Parameter
738#
739# Result:
740#     Value of Gamma(p,x)
741#
742# Note:
743#     Implementation by Eric K. Benedict (2007)
744#     Adapted from Fortran code in the Royal Statistical Society's StatLib
745#     library (http://lib.stat.cmu.edu/apstat/), algorithm AS 32 (with
746#     some modifications from AS 239)
747#
748#     Calculate normalized incomplete gamma function
749#
750#                     1       / x               p-1
751#       P(p,x) =  --------   |   dt exp(-t) * t
752#                 Gamma(p)  / 0
753#
754#     Tested some values against R's pgamma function
755#
756proc ::math::statistics::incompleteGamma {x p {tol 1.0e-9}} {
757    set overflow 1.0e37
758
759    if {$x < 0} {
760        return -code error -errorcode ARG -errorinfo "x must be positive"
761    }
762    if {$p <= 0} {
763        return -code error -errorcode ARG -errorinfo "p must be greater than or equal to zero"
764    }
765
766    # If x is zero, incGamma is zero
767    if {$x == 0.0} {
768        return 0.0
769    }
770
771    # Use normal approx is p > 1000
772    if {$p > 1000} {
773        set pn1 [expr {3.0 * sqrt($p) * (pow(1.0 * $x/$p, 1.0/3.0) + 1.0/(9.0 * $p) - 1.0)}]
774        # pnorm is not robust enough for this calculation (overflows); cdf-normal could also be used
775        return [::math::statistics::pnorm_quicker $pn1]
776    }
777
778    # If x is extremely large compared to a (and now know p < 1000), then return 1.0
779    if {$x > 1.e8} {
780        return 1.0
781    }
782
783    set factor [expr {exp($p * log($x) -$x - [::math::ln_Gamma $p])}]
784
785    # Use series expansion (first option) or continued fraction
786    if {$x <= 1.0 || $x < $p} {
787        set gin 1.0
788        set term 1.0
789        set rn $p
790        while {1} {
791            set rn [expr {$rn + 1.0}]
792            set term [expr {1.0 * $term * $x/$rn}]
793            set gin [expr {$gin + $term}]
794            if {$term < $tol} {
795                set gin [expr {1.0 * $gin * $factor/$p}]
796                break
797            }
798        }
799    } else {
800        set a [expr {1.0 - $p}]
801        set b [expr {$a + $x + 1.0}]
802        set term 0.0
803        set pn1 1.0
804        set pn2 $x
805        set pn3 [expr {$x + 1.0}]
806        set pn4 [expr {$x * $b}]
807        set gin [expr {1.0 * $pn3/$pn4}]
808        while {1} {
809            set a [expr {$a + 1.0}]
810            set b [expr {$b + 2.0}]
811            set term [expr {$term + 1.0}]
812            set an [expr {$a * $term}]
813            set pn5 [expr {$b * $pn3 - $an * $pn1}]
814            set pn6 [expr {$b * $pn4 - $an * $pn2}]
815            if {$pn6 != 0.0} {
816                set rn [expr {1.0 * $pn5/$pn6}]
817                set dif [expr {abs($gin - $rn)}]
818                if {$dif <= $tol && $dif <= $tol * $rn} {
819                    break
820                }
821                set gin $rn
822            }
823            set pn1 $pn3
824            set pn2 $pn4
825            set pn3 $pn5
826            set pn4 $pn6
827            # Too big? Rescale
828            if {abs($pn5) >= $overflow} {
829                set pn1 [expr {$pn1 / $overflow}]
830                set pn2 [expr {$pn2 / $overflow}]
831                set pn3 [expr {$pn3 / $overflow}]
832                set pn4 [expr {$pn4 / $overflow}]
833            }
834        }
835        set gin [expr {1.0 - $factor * $gin}]
836    }
837
838    return $gin
839
840}
841
842
843# pdf-gamma --
844#    Return the probabilities belonging to a gamma distribution
845#
846# Arguments:
847#    alpha     Shape parameter
848#    beta      Rate parameter
849#    x         Value of variate
850#
851# Result:
852#    Probability density of the given value of x to occur
853#
854# Note:
855#    Implemented by Eric Kemp-Benedict, 2007
856#
857#    This uses the following parameterization for the gamma:
858#        GammaDist(x) = beta * (beta * x)^(alpha-1) e^(-beta * x) / GammaFunc(alpha)
859#    Here, alpha is the shape parameter, and beta is the rate parameter
860#    Alternatively, a "scale parameter" theta = 1/beta is sometimes used
861#
862proc ::math::statistics::pdf-gamma { alpha beta x } {
863
864    if {$beta < 0} {
865        return -code error -errorcode ARG -errorinfo "Rate parameter 'beta' must be positive"
866    }
867    if {$x < 0.0} {
868        return 0.0
869    }
870
871    set prod [expr {1.0 * $x * $beta}]
872    set Galpha [expr {exp([::math::ln_Gamma $alpha])}]
873
874    expr {(1.0 * $beta/$Galpha) * pow($prod, ($alpha - 1.0)) * exp(-$prod)}
875}
876
877
878# pdf-poisson --
879#    Return the probabilities belonging to a Poisson
880#    distribution
881#
882# Arguments:
883#    mu       Mean of the distribution
884#    k        Number of occurrences
885#
886# Result:
887#    Probability of k occurrences under the given distribution
888#
889# Note:
890#    Implemented by Eric Kemp-Benedict, 2007
891#
892proc ::math::statistics::pdf-poisson { mu k } {
893    set intk [expr {int($k)}]
894    expr {exp(-$mu + floor($k) * log($mu) - [::math::ln_Gamma [incr intk]])}
895}
896
897
898# pdf-chisquare --
899#    Return the probabilities belonging to a chi square distribution
900#
901# Arguments:
902#    df        Degree of freedom
903#    x         Value of variate
904#
905# Result:
906#    Probability density of the given value of x to occur
907#
908# Note:
909#    Implemented by Eric Kemp-Benedict, 2007
910#
911proc ::math::statistics::pdf-chisquare { df x } {
912
913    if {$df <= 0} {
914        return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive"
915    }
916
917    return [pdf-gamma [expr {0.5*$df}] 0.5 $x]
918}
919
920
921# pdf-students-t --
922#    Return the probabilities belonging to a Student's t distribution
923#
924# Arguments:
925#    degrees   Degree of freedom
926#    x         Value of variate
927#
928# Result:
929#    Probability density of the given value of x to occur
930#
931# Note:
932#    Implemented by Eric Kemp-Benedict, 2007
933#
934proc ::math::statistics::pdf-students-t { degrees x } {
935    variable pi
936
937    if {$degrees <= 0} {
938        return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive"
939    }
940
941    set nplus1over2 [expr {0.5 * ($degrees + 1)}]
942    set f1 [expr {exp([::math::ln_Gamma $nplus1over2] - \
943        [::math::ln_Gamma [expr {$nplus1over2 - 0.5}]])}]
944    set f2 [expr {1.0/sqrt($degrees * $pi)}]
945
946    expr {$f1 * $f2 * pow(1.0 + $x * $x/double($degrees), -$nplus1over2)}
947
948}
949
950
951# pdf-beta --
952#    Return the probabilities belonging to a Beta distribution
953#
954# Arguments:
955#    a         First parameter of the Beta distribution
956#    b         Second parameter of the Beta distribution
957#    x         Value of variate
958#
959# Result:
960#    Probability density of the given value of x to occur
961#
962# Note:
963#    Implemented by Eric Kemp-Benedict, 2008
964#
965proc ::math::statistics::pdf-beta { a b x } {
966    if {$x < 0.0 || $x > 1.0} {
967        return -code error "Value out of range in Beta density: x = $x, not in \[0, 1\]"
968    }
969    if {$a <= 0.0} {
970        return -code error "Value out of range in Beta density: a = $a, must be > 0"
971    }
972    if {$b <= 0.0} {
973        return -code error "Value out of range in Beta density: b = $b, must be > 0"
974    }
975    #
976    # Corner cases ... need to check these!
977    #
978    if {$x == 0.0} {
979        return 0.0
980    }
981    if {$x == 1.0} {
982        return 0.0
983    }
984    set aplusb [expr {$a + $b}]
985    set term1 [expr {[::math::ln_Gamma $aplusb]- [::math::ln_Gamma $a] - [::math::ln_Gamma $b]}]
986    set term2 [expr {($a - 1.0) * log($x) + ($b - 1.0) * log(1.0 - $x)}]
987
988    set term [expr {$term1 + $term2}]
989    if { $term > -200.0 } {
990        return [expr {exp($term)}]
991    } else {
992        return 0.0
993    }
994}
995
996
997# incompleteBeta --
998#    Evaluate the incomplete Beta integral
999#
1000# Arguments:
1001#    a         First parameter of the Beta integral
1002#    b         Second parameter of the Beta integral
1003#    x         Integration limit
1004#    tol       (Optional) error tolerance (defaults to 1.0e-9)
1005#
1006# Result:
1007#    Probability density of the given value of x to occur
1008#
1009# Note:
1010#    Implemented by Eric Kemp-Benedict, 2008
1011#
1012proc ::math::statistics::incompleteBeta {a b x {tol 1.0e-9}} {
1013    if {$x < 0.0 || $x > 1.0} {
1014        return -code error "Value out of range in incomplete Beta function: x = $x, not in \[0, 1\]"
1015    }
1016    if {$a <= 0.0} {
1017        return -code error "Value out of range in incomplete Beta function: a = $a, must be > 0"
1018    }
1019    if {$b <= 0.0} {
1020        return -code error "Value out of range in incomplete Beta function: b = $b, must be > 0"
1021    }
1022
1023    if {$x < $tol} {
1024        return 0.0
1025    }
1026    if {$x > 1.0 - $tol} {
1027        return 1.0
1028    }
1029
1030    # Rearrange if necessary to get continued fraction to behave
1031    if {$x < 0.5} {
1032        return [beta_cont_frac $a $b $x $tol]
1033    } else {
1034        set z [beta_cont_frac $b $a [expr {1.0 - $x}] $tol]
1035        return [expr {1.0 - $z}]
1036    }
1037}
1038
1039
1040# beta_cont_frac --
1041#    Evaluate the incomplete Beta integral via a continued fraction
1042#
1043# Arguments:
1044#    a         First parameter of the Beta integral
1045#    b         Second parameter of the Beta integral
1046#    x         Integration limit
1047#    tol       (Optional) error tolerance (defaults to 1.0e-9)
1048#
1049# Result:
1050#    Probability density of the given value of x to occur
1051#
1052# Note:
1053#    Implemented by Eric Kemp-Benedict, 2008
1054#
1055#    Continued fraction for Ix(a,b)
1056#    Abramowitz & Stegun 26.5.9
1057#
1058proc ::math::statistics::beta_cont_frac {a b x {tol 1.0e-9}} {
1059    set max_iter 512
1060
1061    set aplusb [expr {$a + $b}]
1062    set amin1 [expr {$a - 1}]
1063    set lnGapb [::math::ln_Gamma $aplusb]
1064    set term1 [expr {$lnGapb- [::math::ln_Gamma $a] - [::math::ln_Gamma $b]}]
1065    set term2 [expr {$a * log($x) + ($b - 1.0) * log(1.0 - $x)}]
1066    set pref [expr {exp($term1 + $term2)/$a}]
1067
1068    set z [expr {$x / (1.0 - $x)}]
1069
1070    set v 1.0
1071    set h_1 1.0
1072    set h_2 0.0
1073    set k_1 1.0
1074    set k_2 1.0
1075
1076    for {set m 1} {$m < $max_iter} {incr m} {
1077        set f1 [expr {$amin1 + 2 * $m}]
1078        set e2m [expr {-$z * double(($amin1 + $m) * ($b - $m))/ \
1079            double(($f1 - 1) * $f1)}]
1080        set e2mp1 [expr {$z * double($m * ($aplusb - 1 + $m)) / \
1081            double($f1 * ($f1 + 1))}]
1082        set h_2m [expr {$h_1 + $e2m * $h_2}]
1083        set k_2m [expr {$k_1 + $e2m * $k_2}]
1084
1085        set h_2 $h_2m
1086        set k_2 $k_2m
1087
1088        set h_1 [expr {$h_2m + $e2mp1 * $h_1}]
1089        set k_1 [expr {$k_2m + $e2mp1 * $k_1}]
1090
1091        set vprime [expr {$h_1/$k_1}]
1092
1093        if {abs($v - $vprime) < $tol} {
1094            break
1095        }
1096
1097        set v $vprime
1098
1099    }
1100
1101    if {$m == $max_iter} {
1102        return -code error "beta_cont_frac: Exceeded maximum number of iterations"
1103    }
1104
1105    set retval [expr {$pref * $v}]
1106
1107    # Because of imprecision in underlying Tcl calculations, may fall out of bounds
1108    if {$retval < 0.0} {
1109        set retval 0.0
1110    } elseif {$retval > 1.0} {
1111        set retval 1.0
1112    }
1113
1114    return $retval
1115}
1116
1117
1118# cdf-gamma --
1119#    Return the cumulative probabilities belonging to a gamma distribution
1120#
1121# Arguments:
1122#    alpha     Shape parameter
1123#    beta      Rate parameter
1124#    x         Value of variate
1125#
1126# Result:
1127#    Cumulative probability of the given value of x to occur
1128#
1129# Note:
1130#    Implemented by Eric Kemp-Benedict, 2007
1131#
1132proc ::math::statistics::cdf-gamma { alpha beta x } {
1133    if { $x <= 0 } {
1134        return 0.0
1135    }
1136    incompleteGamma [expr {$beta * $x}] $alpha
1137}
1138
1139
1140# cdf-poisson --
1141#    Return the cumulative probabilities belonging to a Poisson
1142#    distribution
1143#
1144# Arguments:
1145#    mu       Mean of the distribution
1146#    x        Number of occurrences
1147#
1148# Result:
1149#    Probability of k occurrences under the given distribution
1150#
1151# Note:
1152#    Implemented by Eric Kemp-Benedict, 2007
1153#
1154proc ::math::statistics::cdf-poisson { mu x } {
1155    return [expr {1.0 - [incompleteGamma $mu [expr {floor($x) + 1}]]}]
1156}
1157
1158
1159# cdf-chisquare --
1160#    Return the cumulative probabilities belonging to a chi square distribution
1161#
1162# Arguments:
1163#    df        Degree of freedom
1164#    x         Value of variate
1165#
1166# Result:
1167#    Cumulative probability of the given value of x to occur
1168#
1169# Note:
1170#    Implemented by Eric Kemp-Benedict, 2007
1171#
1172proc ::math::statistics::cdf-chisquare { df x } {
1173
1174    if {$df <= 0} {
1175        return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive"
1176    }
1177
1178    return [cdf-gamma [expr {0.5*$df}] 0.5 $x]
1179}
1180
1181
1182# cdf-beta --
1183#    Return the cumulative probabilities belonging to a Beta distribution
1184#
1185# Arguments:
1186#    a         First parameter of the Beta distribution
1187#    b         Second parameter of the Beta distribution
1188#    x         Value of variate
1189#
1190# Result:
1191#    Cumulative probability of the given value of x to occur
1192#
1193# Note:
1194#    Implemented by Eric Kemp-Benedict, 2008
1195#
1196proc ::math::statistics::cdf-beta { a b x } {
1197        incompleteBeta $a $b $x
1198}
1199
1200
1201# random-gamma --
1202#    Generate a list of gamma-distributed deviates
1203#
1204# Arguments:
1205#    alpha     Shape parameter
1206#    beta      Rate parameter
1207#    x         Value of variate
1208#
1209# Result:
1210#    List of random values
1211#
1212# Note:
1213#    Implemented by Eric Kemp-Benedict, 2007
1214#    Generate a list of gamma-distributed random deviates
1215#    Use Cheng's envelope rejection method, as documented in:
1216#        Dagpunar, J.S. 2007
1217#           "Simulation and Monte Carlo: With Applications in Finance and MCMC"
1218#
1219proc ::math::statistics::random-gamma {alpha beta number} {
1220    if {$alpha <= 1} {
1221        set lambda $alpha
1222    } else {
1223        set lambda [expr {sqrt(2.0 * $alpha - 1.0)}]
1224    }
1225    set retval {}
1226    for {set i 0} {$i < $number} {incr i} {
1227        while {1} {
1228            # Two rands: one for deviate, one for acceptance/rejection
1229            set r1 [expr {rand()}]
1230            set r2 [expr {rand()}]
1231            # Calculate deviate from enveloping proposal distribution (a Lorenz distribution)
1232            set lnxovera [expr {(1.0/$lambda) * (log(1.0 - $r1) - log($r1))}]
1233            if {![catch {expr {$alpha * exp($lnxovera)}} x]} {
1234                # Apply acceptance criterion
1235                if {log(4.0*$r1*$r1*$r2) < ($alpha - $lambda) * $lnxovera + $alpha - $x} {
1236                    break
1237                }
1238            }
1239        }
1240        lappend retval [expr {1.0 * $x/$beta}]
1241    }
1242
1243    return $retval
1244}
1245
1246
1247# random-poisson --
1248#    Generate a list of Poisson-distributed deviates
1249#
1250# Arguments:
1251#    mu        Mean value
1252#    number    Number of deviates to return
1253#
1254# Result:
1255#    List of random values
1256#
1257# Note:
1258#    Implemented by Eric Kemp-Benedict, 2007
1259#
1260proc ::math::statistics::random-poisson {mu number} {
1261    if {$mu < 20} {
1262        return [Randp_invert $mu $number]
1263    } else {
1264        return [Randp_PTRS $mu $number]
1265    }
1266}
1267
1268
1269# random-chisquare --
1270#    Return a list of random numbers according to a chi square distribution
1271#
1272# Arguments:
1273#    df        Degree of freedom
1274#    number    Number of values to return
1275#
1276# Result:
1277#    List of random numbers
1278#
1279# Note:
1280#    Implemented by Eric Kemp-Benedict, 2007
1281#
1282proc ::math::statistics::random-chisquare { df number } {
1283
1284    if {$df <= 0} {
1285        return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive"
1286    }
1287
1288    return [random-gamma [expr {0.5*$df}] 0.5 $number]
1289}
1290
1291
1292# random-students-t --
1293#    Return a list of random numbers according to a chi square distribution
1294#
1295# Arguments:
1296#    degrees   Degree of freedom
1297#    number    Number of values to return
1298#
1299# Result:
1300#    List of random numbers
1301#
1302# Note:
1303#    Implemented by Eric Kemp-Benedict, 2007
1304#
1305#    Use method from Appendix 4.3 in Dagpunar, J.S.,
1306#    "Simulation and Monte Carlo: With Applications in Finance and MCMC"
1307#
1308proc ::math::statistics::random-students-t { degrees number } {
1309    variable pi
1310
1311    if {$degrees < 1} {
1312        return -code error -errorcode ARG -errorinfo "Degrees of freedom must be at least 1"
1313    }
1314
1315    set dd [expr {double($degrees)}]
1316    set k [expr {2.0/($dd - 1.0)}]
1317
1318    for {set i 0} {$i < $number} {incr i} {
1319        set r1 [expr {rand()}]
1320        if {$degrees > 1} {
1321            set r2 [expr {rand()}]
1322            set c [expr {cos(2.0 * $pi * $r2)}]
1323            lappend retval [expr {sqrt($dd/ \
1324                (1.0/(1.0 - pow($r1, $k)) \
1325                - $c * $c)) * $c}]
1326        } else {
1327            lappend retval [expr {tan(0.5 * $pi * ($r1 + $r1 - 1))}]
1328        }
1329    }
1330    set retval
1331}
1332
1333
1334# random-beta --
1335#    Return a list of random numbers according to a Beta distribution
1336#
1337# Arguments:
1338#    a         First parameter of the Beta distribution
1339#    b         Second parameter of the Beta distribution
1340#    number    Number of values to return
1341#
1342# Result:
1343#    Cumulative probability of the given value of x to occur
1344#
1345# Note:
1346#    Implemented by Eric Kemp-Benedict, 2008
1347#
1348#    Use trick from J.S. Dagpunar, "Simulation and
1349#        Monte Carlo: With Applications in Finance
1350#        and MCMC", Section 4.5
1351#
1352proc ::math::statistics::random-beta { a b number } {
1353    set retval {}
1354    foreach w [random-gamma $a 1.0 $number] y [random-gamma $b 1.0 $number] {
1355        lappend retval [expr {$w / ($w + $y)}]
1356    }
1357    return $retval
1358}
1359
1360
1361# Random_invert --
1362#    Generate a list of Poisson-distributed deviates - method 1
1363#
1364# Arguments:
1365#    mu        Mean value
1366#    number    Number of deviates to return
1367#
1368# Result:
1369#    List of random values
1370#
1371# Note:
1372#    Implemented by Eric Kemp-Benedict, 2007
1373#
1374#    Generate a poisson-distributed random deviate
1375#    Use algorithm in section 4.9 of Dagpunar, J.S,
1376#       "Simulation and Monte Carlo: With Applications
1377#       in Finance and MCMC", pub. 2007 by Wiley
1378#    This inverts the cdf using a "chop-down" search
1379#    to avoid storing an extra intermediate value.
1380#    It is only good for small mu.
1381#
1382proc ::math::statistics::Randp_invert {mu number} {
1383    set W0 [expr {exp(-$mu)}]
1384
1385    set retval {}
1386
1387    for {set i 0} {$i < $number} {incr i} {
1388        set W $W0
1389        set R [expr {rand()}]
1390        set X 0
1391
1392        while {$R > $W} {
1393            set R [expr {$R - $W}]
1394            incr X
1395            set W [expr {$W * $mu/double($X)}]
1396        }
1397
1398        lappend retval $X
1399    }
1400
1401    return $retval
1402}
1403
1404
1405# Random_PTRS --
1406#    Generate a list of Poisson-distributed deviates - method 2
1407#
1408# Arguments:
1409#    mu        Mean value
1410#    number    Number of deviates to return
1411#
1412# Result:
1413#    List of random values
1414#
1415# Note:
1416#    Implemented by Eric Kemp-Benedict, 2007
1417#    Generate a poisson-distributed random deviate
1418#    Use the transformed rejection method with
1419#    squeeze of Hoermann:
1420#        Wolfgang Hoermann, "The Transformed Rejection Method
1421#        for Generating Poisson Random Variables,"
1422#        Preprint #2, Dept of Applied Statistics and
1423#        Data Processing, Wirtshcaftsuniversitaet Wien,
1424#        http://statistik.wu-wien.ac.at/
1425#    This method works for mu >= 10.
1426#
1427proc ::math::statistics::Randp_PTRS {mu number} {
1428    set smu [expr {sqrt($mu)}]
1429    set b [expr {0.931 + 2.53 * $smu}]
1430    set a [expr {-0.059 + 0.02483 * $b}]
1431    set vr [expr {0.9277 - 3.6224/($b - 2.0)}]
1432    set invalpha [expr {1.1239 + 1.1328/($b - 3.4)}]
1433    set lnmu [expr {log($mu)}]
1434
1435    set retval {}
1436    for {set i 0} {$i < $number} {incr i} {
1437        while 1 {
1438            set U [expr {rand() - 0.5}]
1439            set V [expr {rand()}]
1440
1441            set us [expr {0.5 - abs($U)}]
1442            set k [expr {int(floor((2.0 * $a/$us + $b) * $U + $mu + 0.43))}]
1443
1444            if {$us >= 0.07 && $V <= $vr} {
1445                break
1446            }
1447
1448            if {$k < 0} {
1449                continue
1450            }
1451
1452            if {$us < 0.013 && $V > $us} {
1453                continue
1454            }
1455
1456            set kp1 [expr {$k+1}]
1457            if {log($V * $invalpha / ($a/($us * $us) + $b)) <= -$mu + $k * $lnmu - [::math::ln_Gamma $kp1]} {
1458                break
1459            }
1460        }
1461
1462        lappend retval $k
1463    }
1464    return $retval
1465}
1466
1467
1468#
1469# Simple numerical tests
1470#
1471if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } {
1472
1473    #
1474    # Apparent accuracy: at least one digit more than the ones in the
1475    # given numbers
1476    #
1477    puts "Normal distribution - two-tailed"
1478    foreach z    {4.417 3.891 3.291 2.576 2.241 1.960 1.645 1.150 0.674
1479    0.319 0.126 0.063 0.0125} \
1480	    pexp {1.e-5 1.e-4 1.e-3 1.e-2 0.025 0.050 0.100 0.250 0.500
1481    0.750 0.900 0.950 0.990 } {
1482	set prob [::math::statistics::Cdf-toms322 1 5000 [expr {$z*$z}]]
1483	puts "$z - $pexp - [expr {1.0-$prob}]"
1484    }
1485
1486    puts "Normal distribution (inverted; one-tailed)"
1487    foreach p {0.001 0.01 0.1 0.25 0.5 0.75 0.9 0.99 0.999} {
1488	puts "$p - [::math::statistics::Inverse-cdf-normal 0.0 1.0 $p]"
1489    }
1490    puts "Normal random variables"
1491    set rndvars [::math::statistics::random-normal 1.0 2.0 20]
1492    puts $rndvars
1493    puts "Normal uniform variables"
1494    set rndvars [::math::statistics::random-uniform 1.0 2.0 20]
1495    puts $rndvars
1496    puts "Normal exponential variables"
1497    set rndvars [::math::statistics::random-exponential 2.0 20]
1498    puts $rndvars
1499}
1500