1# pdf_stat.tcl -- 2# 3# Collection of procedures for evaluating probability and 4# cumulative density functions 5# Part of "math::statistics" 6# 7# january 2008: added procedures by Eric Kemp Benedict for 8# Gamma, Poisson and t-distributed variables. 9# Replacing some older versions. 10# 11 12# ::math::statistics -- 13# Namespace holding the procedures and variables 14# 15namespace eval ::math::statistics { 16 17 namespace export pdf-normal pdf-uniform \ 18 pdf-exponential \ 19 cdf-normal cdf-uniform \ 20 cdf-exponential \ 21 cdf-students-t \ 22 random-normal random-uniform \ 23 random-exponential \ 24 histogram-uniform \ 25 pdf-gamma pdf-poisson pdf-chisquare pdf-students-t pdf-beta \ 26 cdf-gamma cdf-poisson cdf-chisquare cdf-beta \ 27 random-gamma random-poisson random-chisquare random-students-t random-beta \ 28 incompleteGamma incompleteBeta 29 30 variable cdf_normal_prob {} 31 variable cdf_normal_x {} 32 variable cdf_toms322_cached {} 33 variable initialised_cdf 0 34 variable twopi [expr {2.0*acos(-1.0)}] 35 variable pi [expr {acos(-1.0)}] 36} 37 38 39# pdf-normal -- 40# Return the probabilities belonging to a normal distribution 41# 42# Arguments: 43# mean Mean of the distribution 44# stdev Standard deviation 45# x Value for which the probability must be determined 46# 47# Result: 48# Probability of value x under the given distribution 49# 50proc ::math::statistics::pdf-normal { mean stdev x } { 51 variable NEGSTDEV 52 variable factorNormalPdf 53 54 if { $stdev <= 0.0 } { 55 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV 56 } 57 58 set xn [expr {($x-$mean)/$stdev}] 59 set prob [expr {exp(-$xn*$xn/2.0)/$stdev/$factorNormalPdf}] 60 61 return $prob 62} 63 64 65# pdf-uniform -- 66# Return the probabilities belonging to a uniform distribution 67# (parameters as minimum/maximum) 68# 69# Arguments: 70# pmin Minimum of the distribution 71# pmax Maximum of the distribution 72# x Value for which the probability must be determined 73# 74# Result: 75# Probability of value x under the given distribution 76# 77proc ::math::statistics::pdf-uniform { pmin pmax x } { 78 79 if { $pmin >= $pmax } { 80 return -code error -errorcode ARG \ 81 -errorinfo "Wrong order or zero range" \ 82 "Wrong order or zero range" 83 } 84 85 set prob [expr {1.0/($pmax-$min)}] 86 87 if { $x < $pmin || $x > $pmax } { return 0.0 } 88 89 return $prob 90} 91 92 93# pdf-exponential -- 94# Return the probabilities belonging to an exponential 95# distribution 96# 97# Arguments: 98# mean Mean of the distribution 99# x Value for which the probability must be determined 100# 101# Result: 102# Probability of value x under the given distribution 103# 104proc ::math::statistics::pdf-exponential { mean x } { 105 variable NEGSTDEV 106 variable OUTOFRANGE 107 108 if { $stdev <= 0.0 } { 109 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV 110 } 111 if { $mean <= 0.0 } { 112 return -code error -errorcode ARG -errorinfo $OUTOFRANGE \ 113 "$OUTOFRANGE: mean must be positive" 114 } 115 116 if { $x < 0.0 } { return 0.0 } 117 if { $x > 700.0*$mean } { return 0.0 } 118 119 set prob [expr {exp(-$x/$mean)/$mean}] 120 121 return $prob 122} 123 124 125# cdf-normal -- 126# Return the cumulative probability belonging to a normal distribution 127# 128# Arguments: 129# mean Mean of the distribution 130# stdev Standard deviation 131# x Value for which the probability must be determined 132# 133# Result: 134# Cumulative probability of value x under the given distribution 135# 136proc ::math::statistics::cdf-normal { mean stdev x } { 137 variable NEGSTDEV 138 139 if { $stdev <= 0.0 } { 140 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV 141 } 142 143 set xn [expr {($x-$mean)/$stdev}] 144 set prob1 [Cdf-toms322 1 5000 [expr {$xn*$xn}]] 145 if { $xn > 0.0 } { 146 set prob [expr {0.5+0.5*$prob1}] 147 } else { 148 set prob [expr {0.5-0.5*$prob1}] 149 } 150 151 return $prob 152} 153 154 155# cdf-students-t -- 156# Return the cumulative probability belonging to the 157# Student's t distribution 158# 159# Arguments: 160# degrees Number of degrees of freedom 161# x Value for which the probability must be determined 162# 163# Result: 164# Cumulative probability of value x under the given distribution 165# 166proc ::math::statistics::cdf-students-t { degrees x } { 167 168 if { $degrees <= 0 } { 169 return -code error -errorcode ARG -errorinfo \ 170 "Number of degrees of freedom must be positive" \ 171 "Number of degrees of freedom must be positive" 172 } 173 174 set prob1 [Cdf-toms322 1 $degrees [expr {$x*$x}]] 175 set prob [expr {0.5+0.5*$prob1}] 176 177 return $prob 178} 179 180 181# cdf-uniform -- 182# Return the cumulative probabilities belonging to a uniform 183# distribution (parameters as minimum/maximum) 184# 185# Arguments: 186# pmin Minimum of the distribution 187# pmax Maximum of the distribution 188# x Value for which the probability must be determined 189# 190# Result: 191# Cumulative probability of value x under the given distribution 192# 193proc ::math::statistics::cdf-uniform { pmin pmax x } { 194 195 if { $pmin >= $pmax } { 196 return -code error -errorcode ARG \ 197 -errorinfo "Wrong order or zero range" \ 198 } 199 200 set prob [expr {($x-$pmin)/($pmax-$min)}] 201 202 if { $x < $pmin } { return 0.0 } 203 if { $x > $pmax } { return 1.0 } 204 205 return $prob 206} 207 208 209# cdf-exponential -- 210# Return the cumulative probabilities belonging to an exponential 211# distribution 212# 213# Arguments: 214# mean Mean of the distribution 215# x Value for which the probability must be determined 216# 217# Result: 218# Cumulative probability of value x under the given distribution 219# 220proc ::math::statistics::cdf-exponential { mean x } { 221 variable NEGSTDEV 222 variable OUTOFRANGE 223 224 if { $mean <= 0.0 } { 225 return -code error -errorcode ARG -errorinfo $OUTOFRANGE \ 226 "$OUTOFRANGE: mean must be positive" 227 } 228 229 if { $x < 0.0 } { return 0.0 } 230 if { $x > 30.0*$mean } { return 1.0 } 231 232 set prob [expr {1.0-exp(-$x/$mean)}] 233 234 return $prob 235} 236 237 238# Inverse-cdf-uniform -- 239# Return the argument belonging to the cumulative probability 240# for a uniform distribution (parameters as minimum/maximum) 241# 242# Arguments: 243# pmin Minimum of the distribution 244# pmax Maximum of the distribution 245# prob Cumulative probability for which the "x" value must be 246# determined 247# 248# Result: 249# X value that gives the cumulative probability under the 250# given distribution 251# 252proc ::math::statistics::Inverse-cdf-uniform { pmin pmax prob } { 253 254 if {0} { 255 if { $pmin >= $pmax } { 256 return -code error -errorcode ARG \ 257 -errorinfo "Wrong order or zero range" \ 258 "Wrong order or zero range" 259 } 260 } 261 262 set x [expr {$pmin+$prob*($pmax-$pmin)}] 263 264 if { $x < $pmin } { return $pmin } 265 if { $x > $pmax } { return $pmax } 266 267 return $x 268} 269 270 271# Inverse-cdf-exponential -- 272# Return the argument belonging to the cumulative probability 273# for an exponential distribution 274# 275# Arguments: 276# mean Mean of the distribution 277# prob Cumulative probability for which the "x" value must be 278# determined 279# 280# Result: 281# X value that gives the cumulative probability under the 282# given distribution 283# 284proc ::math::statistics::Inverse-cdf-exponential { mean prob } { 285 286 if {0} { 287 if { $mean <= 0.0 } { 288 return -code error -errorcode ARG \ 289 -errorinfo "Mean must be positive" \ 290 "Mean must be positive" 291 } 292 } 293 294 set x [expr {-$mean*log(1.0-$prob)}] 295 296 return $x 297} 298 299 300# Inverse-cdf-normal -- 301# Return the argument belonging to the cumulative probability 302# for a normal distribution 303# 304# Arguments: 305# mean Mean of the distribution 306# stdev Standard deviation of the distribution 307# prob Cumulative probability for which the "x" value must be 308# determined 309# 310# Result: 311# X value that gives the cumulative probability under the 312# given distribution 313# 314proc ::math::statistics::Inverse-cdf-normal { mean stdev prob } { 315 variable cdf_normal_prob 316 variable cdf_normal_x 317 318 variable initialised_cdf 319 if { $initialised_cdf == 0 } { 320 Initialise-cdf-normal 321 } 322 323 # Look for the proper probability level first, 324 # then interpolate 325 # 326 # Note: the numerical data are connected to the length of 327 # the lists - see Initialise-cdf-normal 328 # 329 set size 32 330 set idx 64 331 for { set i 0 } { $i <= 7 } { incr i } { 332 set upper [lindex $cdf_normal_prob $idx] 333 if { $prob > $upper } { 334 set idx [expr {$idx+$size}] 335 } else { 336 set idx [expr {$idx-$size}] 337 } 338 set size [expr {$size/2}] 339 } 340 # 341 # We have found a value that is close to the one we need, 342 # now find the enclosing interval 343 # 344 if { $upper < $prob } { 345 incr idx 346 } 347 set p1 [lindex $cdf_normal_prob [expr {$idx-1}]] 348 set p2 [lindex $cdf_normal_prob $idx] 349 set x1 [lindex $cdf_normal_x [expr {$idx-1}]] 350 set x2 [lindex $cdf_normal_x $idx ] 351 352 set x [expr {$x1+($x2-$x1)*($prob-$p1)/($p2-$p1)}] 353 354 return [expr {$mean+$stdev*$x}] 355} 356 357 358# Initialise-cdf-normal -- 359# Initialise the private data for the normal cdf 360# 361# Arguments: 362# None 363# Result: 364# None 365# Side effect: 366# Variable cdf_normal_prob and cdf_normal_x are filled 367# so that we can use these as a look-up table 368# 369proc ::math::statistics::Initialise-cdf-normal { } { 370 variable cdf_normal_prob 371 variable cdf_normal_x 372 373 variable initialised_cdf 374 set initialised_cdf 1 375 376 set dx [expr {10.0/128.0}] 377 378 set cdf_normal_prob 0.5 379 set cdf_normal_x 0.0 380 for { set i 1 } { $i <= 64 } { incr i } { 381 set x [expr {$i*$dx}] 382 if { $x != 0.0 } { 383 set prob [Cdf-toms322 1 5000 [expr {$x*$x}]] 384 } else { 385 set prob 0.0 386 } 387 388 set cdf_normal_x [concat [expr {-$x}] $cdf_normal_x $x] 389 set cdf_normal_prob \ 390 [concat [expr {0.5-0.5*$prob}] $cdf_normal_prob \ 391 [expr {0.5+0.5*$prob}]] 392 } 393} 394 395 396# random-uniform -- 397# Return a list of random numbers satisfying a uniform 398# distribution (parameters as minimum/maximum) 399# 400# Arguments: 401# pmin Minimum of the distribution 402# pmax Maximum of the distribution 403# number Number of values to generate 404# 405# Result: 406# List of random numbers 407# 408proc ::math::statistics::random-uniform { pmin pmax number } { 409 410 if { $pmin >= $pmax } { 411 return -code error -errorcode ARG \ 412 -errorinfo "Wrong order or zero range" \ 413 "Wrong order or zero range" 414 } 415 416 set result {} 417 for { set i 0 } {$i < $number } { incr i } { 418 lappend result [Inverse-cdf-uniform $pmin $pmax [expr {rand()}]] 419 } 420 421 return $result 422} 423 424 425# random-exponential -- 426# Return a list of random numbers satisfying an exponential 427# distribution 428# 429# Arguments: 430# mean Mean of the distribution 431# number Number of values to generate 432# 433# Result: 434# List of random numbers 435# 436proc ::math::statistics::random-exponential { mean number } { 437 438 if { $mean <= 0.0 } { 439 return -code error -errorcode ARG \ 440 -errorinfo "Mean must be positive" \ 441 "Mean must be positive" 442 } 443 444 set result {} 445 for { set i 0 } {$i < $number } { incr i } { 446 lappend result [Inverse-cdf-exponential $mean [expr {rand()}]] 447 } 448 449 return $result 450} 451 452 453# random-normal -- 454# Return a list of random numbers satisfying a normal 455# distribution 456# 457# Arguments: 458# mean Mean of the distribution 459# stdev Standard deviation of the distribution 460# number Number of values to generate 461# 462# Result: 463# List of random numbers 464# 465# Note: 466# This version uses the Box-Muller transformation, 467# a quick and robust method for generating normally- 468# distributed numbers. 469# 470proc ::math::statistics::random-normal { mean stdev number } { 471 variable twopi 472 473 if { $stdev <= 0.0 } { 474 return -code error -errorcode ARG \ 475 -errorinfo "Standard deviation must be positive" \ 476 "Standard deviation must be positive" 477 } 478 479# set result {} 480# for { set i 0 } {$i < $number } { incr i } { 481# lappend result [Inverse-cdf-normal $mean $stdev [expr {rand()}]] 482# } 483 484 set result {} 485 486 for { set i 0 } {$i < $number } { incr i 2 } { 487 set angle [expr {$twopi * rand()}] 488 set rad [expr {sqrt(-2.0*log(rand()))}] 489 set xrand [expr {$rad * cos($angle)}] 490 set yrand [expr {$rad * sin($angle)}] 491 lappend result [expr {$mean + $stdev * $xrand}] 492 if { $i < $number-1 } { 493 lappend result [expr {$mean + $stdev * $yrand}] 494 } 495 } 496 497 return $result 498} 499 500 501# Cdf-toms322 -- 502# Calculate the cumulative density function for several distributions 503# according to TOMS322 504# 505# Arguments: 506# m First number of degrees of freedom 507# n Second number of degrees of freedom 508# x Value for which the cdf must be calculated 509# 510# Result: 511# Cumulatve density at x - details depend on distribution 512# 513# Notes: 514# F-ratios: 515# m - degrees of freedom for numerator 516# n - degrees of freedom for denominator 517# x - F-ratio 518# Student's t (two-tailed): 519# m - 1 520# n - degrees of freedom 521# x - square of t 522# Normal deviate (two-tailed): 523# m - 1 524# n - 5000 525# x - square of deviate 526# Chi-square: 527# m - degrees of freedom 528# n - 5000 529# x - chi-square/m 530# The original code can be found at <http://www.netlib.org> 531# 532proc ::math::statistics::Cdf-toms322 { m n x } { 533 if { $x == 0.0 } { 534 return 0.0 535 } 536 set m [expr {$m < 300? int($m) : 300}] 537 set n [expr {$n < 5000? int($n) : 5000}] 538 if { $m < 1 || $n < 1 } { 539 return -code error -errorcode ARG \ 540 -errorinfo "Arguments m anf n must be greater/equal 1" 541 } 542 543 set a [expr {2*($m/2)-$m+2}] 544 set b [expr {2*($n/2)-$n+2}] 545 set w [expr {$x*double($m)/double($n)}] 546 set z [expr {1.0/(1.0+$w)}] 547 548 if { $a == 1 } { 549 if { $b == 1 } { 550 set p [expr {sqrt($w)}] 551 set y 0.3183098862 552 set d [expr {$y*$z/$p}] 553 set p [expr {2.0*$y*atan($p)}] 554 } else { 555 set p [expr {sqrt($w*$z)}] 556 set d [expr {$p*$z/(2.0*$w)}] 557 } 558 } else { 559 if { $b == 1 } { 560 set p [expr {sqrt($z)}] 561 set d [expr {$z*$p/2.0}] 562 set p [expr {1.0-$p}] 563 } else { 564 set d [expr {$z*$z}] 565 set p [expr {$z*$w}] 566 } 567 } 568 569 set y [expr {2.0*$w/$z}] 570 571 if { $a == 1 } { 572 for { set j [expr {$b+2}] } { $j <= $n } { incr j 2 } { 573 set d [expr {(1.0+double($a)/double($j-2)) * $d*$z}] 574 set p [expr {$p+$d*$y/double($j-1)}] 575 } 576 } else { 577 set power [expr {($n-1)/2}] 578 set zk [expr {pow($z,$power)}] 579 set d [expr {($d*$zk*$n)/$b}] 580 set p [expr {$p*$zk + $w*$z * ($zk-1.0)/($z-1.0)}] 581 } 582 583 set y [expr {$w*$z}] 584 set z [expr {2.0/$z}] 585 set b [expr {$n-2}] 586 587 for { set i [expr {$a+2}] } { $i <= $m } { incr i 2 } { 588 set j [expr {$i+$b}] 589 set d [expr {$y*$d*double($j)/double($i-2)}] 590 set p [expr {$p-$z*$d/double($j)}] 591 } 592 set prob $p 593 if { $prob < 0.0 } { set prob 0.0 } 594 if { $prob > 1.0 } { set prob 1.0 } 595 596 return $prob 597} 598 599 600# Inverse-cdf-toms322 -- 601# Return the argument belonging to the cumulative probability 602# for an F, chi-square or t distribution 603# 604# Arguments: 605# m First number of degrees of freedom 606# n Second number of degrees of freedom 607# prob Cumulative probability for which the "x" value must be 608# determined 609# 610# Result: 611# X value that gives the cumulative probability under the 612# given distribution 613# 614# Note: 615# See the procedure Cdf-toms322 for more details 616# 617proc ::math::statistics::Inverse-cdf-toms322 { m n prob } { 618 variable cdf_toms322_cached 619 variable OUTOFRANGE 620 621 if { $prob <= 0 || $prob >= 1 } { 622 return -code error -errorcode $OUTOFRANGE $OUTOFRANGE 623 } 624 625 # Is the combination in cache? Then we can simply rely 626 # on that 627 # 628 foreach {m1 n1 prob1 x1} $cdf_toms322_cached { 629 if { $m1 == $m && $n1 == $n && $prob1 == $prob } { 630 return $x1 631 } 632 } 633 634 # 635 # Otherwise first find a value of x for which Cdf(x) exceeds prob 636 # 637 set x1 1.0 638 set dx1 1.0 639 while { [Cdf-toms322 $m $n $x1] < $prob } { 640 set x1 [expr {$x1+$dx1}] 641 set dx1 [expr {2.0*$dx1}] 642 } 643 644 # 645 # Now, look closer 646 # 647 while { $dx1 > 0.0001 } { 648 set p1 [Cdf-toms322 $m $n $x1] 649 if { $p1 > $prob } { 650 set x1 [expr {$x1-$dx1}] 651 } else { 652 set x1 [expr {$x1+$dx1}] 653 } 654 set dx1 [expr {$dx1/2.0}] 655 } 656 657 # 658 # Cache the result 659 # 660 set last end 661 if { [llength $cdf_toms322_cached] > 27 } { 662 set last 26 663 } 664 set cdf_toms322_cached \ 665 [concat [list $m $n $prob $x1] [lrange $cdf_toms322_cached 0 $last]] 666 667 return $x1 668} 669 670 671# HistogramMake -- 672# Distribute the "observations" according to the cdf 673# 674# Arguments: 675# cdf-values Values for the cdf (relative number of observations) 676# number Total number of "observations" in the histogram 677# 678# Result: 679# List of numbers, distributed over the buckets 680# 681proc ::math::statistics::HistogramMake { cdf-values number } { 682 683 set assigned 0 684 set result {} 685 set residue 0.0 686 foreach cdfv $cdf-values { 687 set sum [expr {$number*($cdfv + $residue)}] 688 set bucket [expr {int($sum)}] 689 set residue [expr {$sum-$bucket}] 690 set assigned [expr {$assigned-$bucket}] 691 lappend result $bucket 692 } 693 set remaining [expr {$number-$assigned}] 694 if { $remaining > 0 } { 695 lappend result $remaining 696 } else { 697 lappend result 0 698 } 699 700 return $result 701} 702 703 704# histogram-uniform -- 705# Return the expected histogram for a uniform distribution 706# 707# Arguments: 708# min Minimum the distribution 709# max Maximum the distribution 710# limits upper limits for the histogram buckets 711# number Total number of "observations" in the histogram 712# 713# Result: 714# List of expected number of observations 715# 716proc ::math::statistics::histogram-uniform { min max limits number } { 717 if { $min >= $max } { 718 return -code error -errorcode ARG \ 719 -errorinfo "Wrong order or zero range" \ 720 "Wrong order or zero range" 721 } 722 723 set cdf_result {} 724 foreach limit $limits { 725 lappend cdf_result [cdf-uniform $min $max $limit] 726 } 727 728 return [HistogramMake $cdf_result $number] 729} 730 731 732# incompleteGamma -- 733# Evaluate the incomplete Gamma function Gamma(p,x) 734# 735# Arguments: 736# x X-value 737# p Parameter 738# 739# Result: 740# Value of Gamma(p,x) 741# 742# Note: 743# Implementation by Eric K. Benedict (2007) 744# Adapted from Fortran code in the Royal Statistical Society's StatLib 745# library (http://lib.stat.cmu.edu/apstat/), algorithm AS 32 (with 746# some modifications from AS 239) 747# 748# Calculate normalized incomplete gamma function 749# 750# 1 / x p-1 751# P(p,x) = -------- | dt exp(-t) * t 752# Gamma(p) / 0 753# 754# Tested some values against R's pgamma function 755# 756proc ::math::statistics::incompleteGamma {x p {tol 1.0e-9}} { 757 set overflow 1.0e37 758 759 if {$x < 0} { 760 return -code error -errorcode ARG -errorinfo "x must be positive" 761 } 762 if {$p <= 0} { 763 return -code error -errorcode ARG -errorinfo "p must be greater than or equal to zero" 764 } 765 766 # If x is zero, incGamma is zero 767 if {$x == 0.0} { 768 return 0.0 769 } 770 771 # Use normal approx is p > 1000 772 if {$p > 1000} { 773 set pn1 [expr {3.0 * sqrt($p) * (pow(1.0 * $x/$p, 1.0/3.0) + 1.0/(9.0 * $p) - 1.0)}] 774 # pnorm is not robust enough for this calculation (overflows); cdf-normal could also be used 775 return [::math::statistics::pnorm_quicker $pn1] 776 } 777 778 # If x is extremely large compared to a (and now know p < 1000), then return 1.0 779 if {$x > 1.e8} { 780 return 1.0 781 } 782 783 set factor [expr {exp($p * log($x) -$x - [::math::ln_Gamma $p])}] 784 785 # Use series expansion (first option) or continued fraction 786 if {$x <= 1.0 || $x < $p} { 787 set gin 1.0 788 set term 1.0 789 set rn $p 790 while {1} { 791 set rn [expr {$rn + 1.0}] 792 set term [expr {1.0 * $term * $x/$rn}] 793 set gin [expr {$gin + $term}] 794 if {$term < $tol} { 795 set gin [expr {1.0 * $gin * $factor/$p}] 796 break 797 } 798 } 799 } else { 800 set a [expr {1.0 - $p}] 801 set b [expr {$a + $x + 1.0}] 802 set term 0.0 803 set pn1 1.0 804 set pn2 $x 805 set pn3 [expr {$x + 1.0}] 806 set pn4 [expr {$x * $b}] 807 set gin [expr {1.0 * $pn3/$pn4}] 808 while {1} { 809 set a [expr {$a + 1.0}] 810 set b [expr {$b + 2.0}] 811 set term [expr {$term + 1.0}] 812 set an [expr {$a * $term}] 813 set pn5 [expr {$b * $pn3 - $an * $pn1}] 814 set pn6 [expr {$b * $pn4 - $an * $pn2}] 815 if {$pn6 != 0.0} { 816 set rn [expr {1.0 * $pn5/$pn6}] 817 set dif [expr {abs($gin - $rn)}] 818 if {$dif <= $tol && $dif <= $tol * $rn} { 819 break 820 } 821 set gin $rn 822 } 823 set pn1 $pn3 824 set pn2 $pn4 825 set pn3 $pn5 826 set pn4 $pn6 827 # Too big? Rescale 828 if {abs($pn5) >= $overflow} { 829 set pn1 [expr {$pn1 / $overflow}] 830 set pn2 [expr {$pn2 / $overflow}] 831 set pn3 [expr {$pn3 / $overflow}] 832 set pn4 [expr {$pn4 / $overflow}] 833 } 834 } 835 set gin [expr {1.0 - $factor * $gin}] 836 } 837 838 return $gin 839 840} 841 842 843# pdf-gamma -- 844# Return the probabilities belonging to a gamma distribution 845# 846# Arguments: 847# alpha Shape parameter 848# beta Rate parameter 849# x Value of variate 850# 851# Result: 852# Probability density of the given value of x to occur 853# 854# Note: 855# Implemented by Eric Kemp-Benedict, 2007 856# 857# This uses the following parameterization for the gamma: 858# GammaDist(x) = beta * (beta * x)^(alpha-1) e^(-beta * x) / GammaFunc(alpha) 859# Here, alpha is the shape parameter, and beta is the rate parameter 860# Alternatively, a "scale parameter" theta = 1/beta is sometimes used 861# 862proc ::math::statistics::pdf-gamma { alpha beta x } { 863 864 if {$beta < 0} { 865 return -code error -errorcode ARG -errorinfo "Rate parameter 'beta' must be positive" 866 } 867 if {$x < 0.0} { 868 return 0.0 869 } 870 871 set prod [expr {1.0 * $x * $beta}] 872 set Galpha [expr {exp([::math::ln_Gamma $alpha])}] 873 874 expr {(1.0 * $beta/$Galpha) * pow($prod, ($alpha - 1.0)) * exp(-$prod)} 875} 876 877 878# pdf-poisson -- 879# Return the probabilities belonging to a Poisson 880# distribution 881# 882# Arguments: 883# mu Mean of the distribution 884# k Number of occurrences 885# 886# Result: 887# Probability of k occurrences under the given distribution 888# 889# Note: 890# Implemented by Eric Kemp-Benedict, 2007 891# 892proc ::math::statistics::pdf-poisson { mu k } { 893 set intk [expr {int($k)}] 894 expr {exp(-$mu + floor($k) * log($mu) - [::math::ln_Gamma [incr intk]])} 895} 896 897 898# pdf-chisquare -- 899# Return the probabilities belonging to a chi square distribution 900# 901# Arguments: 902# df Degree of freedom 903# x Value of variate 904# 905# Result: 906# Probability density of the given value of x to occur 907# 908# Note: 909# Implemented by Eric Kemp-Benedict, 2007 910# 911proc ::math::statistics::pdf-chisquare { df x } { 912 913 if {$df <= 0} { 914 return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive" 915 } 916 917 return [pdf-gamma [expr {0.5*$df}] 0.5 $x] 918} 919 920 921# pdf-students-t -- 922# Return the probabilities belonging to a Student's t distribution 923# 924# Arguments: 925# degrees Degree of freedom 926# x Value of variate 927# 928# Result: 929# Probability density of the given value of x to occur 930# 931# Note: 932# Implemented by Eric Kemp-Benedict, 2007 933# 934proc ::math::statistics::pdf-students-t { degrees x } { 935 variable pi 936 937 if {$degrees <= 0} { 938 return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive" 939 } 940 941 set nplus1over2 [expr {0.5 * ($degrees + 1)}] 942 set f1 [expr {exp([::math::ln_Gamma $nplus1over2] - \ 943 [::math::ln_Gamma [expr {$nplus1over2 - 0.5}]])}] 944 set f2 [expr {1.0/sqrt($degrees * $pi)}] 945 946 expr {$f1 * $f2 * pow(1.0 + $x * $x/double($degrees), -$nplus1over2)} 947 948} 949 950 951# pdf-beta -- 952# Return the probabilities belonging to a Beta distribution 953# 954# Arguments: 955# a First parameter of the Beta distribution 956# b Second parameter of the Beta distribution 957# x Value of variate 958# 959# Result: 960# Probability density of the given value of x to occur 961# 962# Note: 963# Implemented by Eric Kemp-Benedict, 2008 964# 965proc ::math::statistics::pdf-beta { a b x } { 966 if {$x < 0.0 || $x > 1.0} { 967 return -code error "Value out of range in Beta density: x = $x, not in \[0, 1\]" 968 } 969 if {$a <= 0.0} { 970 return -code error "Value out of range in Beta density: a = $a, must be > 0" 971 } 972 if {$b <= 0.0} { 973 return -code error "Value out of range in Beta density: b = $b, must be > 0" 974 } 975 # 976 # Corner cases ... need to check these! 977 # 978 if {$x == 0.0} { 979 return 0.0 980 } 981 if {$x == 1.0} { 982 return 0.0 983 } 984 set aplusb [expr {$a + $b}] 985 set term1 [expr {[::math::ln_Gamma $aplusb]- [::math::ln_Gamma $a] - [::math::ln_Gamma $b]}] 986 set term2 [expr {($a - 1.0) * log($x) + ($b - 1.0) * log(1.0 - $x)}] 987 988 set term [expr {$term1 + $term2}] 989 if { $term > -200.0 } { 990 return [expr {exp($term)}] 991 } else { 992 return 0.0 993 } 994} 995 996 997# incompleteBeta -- 998# Evaluate the incomplete Beta integral 999# 1000# Arguments: 1001# a First parameter of the Beta integral 1002# b Second parameter of the Beta integral 1003# x Integration limit 1004# tol (Optional) error tolerance (defaults to 1.0e-9) 1005# 1006# Result: 1007# Probability density of the given value of x to occur 1008# 1009# Note: 1010# Implemented by Eric Kemp-Benedict, 2008 1011# 1012proc ::math::statistics::incompleteBeta {a b x {tol 1.0e-9}} { 1013 if {$x < 0.0 || $x > 1.0} { 1014 return -code error "Value out of range in incomplete Beta function: x = $x, not in \[0, 1\]" 1015 } 1016 if {$a <= 0.0} { 1017 return -code error "Value out of range in incomplete Beta function: a = $a, must be > 0" 1018 } 1019 if {$b <= 0.0} { 1020 return -code error "Value out of range in incomplete Beta function: b = $b, must be > 0" 1021 } 1022 1023 if {$x < $tol} { 1024 return 0.0 1025 } 1026 if {$x > 1.0 - $tol} { 1027 return 1.0 1028 } 1029 1030 # Rearrange if necessary to get continued fraction to behave 1031 if {$x < 0.5} { 1032 return [beta_cont_frac $a $b $x $tol] 1033 } else { 1034 set z [beta_cont_frac $b $a [expr {1.0 - $x}] $tol] 1035 return [expr {1.0 - $z}] 1036 } 1037} 1038 1039 1040# beta_cont_frac -- 1041# Evaluate the incomplete Beta integral via a continued fraction 1042# 1043# Arguments: 1044# a First parameter of the Beta integral 1045# b Second parameter of the Beta integral 1046# x Integration limit 1047# tol (Optional) error tolerance (defaults to 1.0e-9) 1048# 1049# Result: 1050# Probability density of the given value of x to occur 1051# 1052# Note: 1053# Implemented by Eric Kemp-Benedict, 2008 1054# 1055# Continued fraction for Ix(a,b) 1056# Abramowitz & Stegun 26.5.9 1057# 1058proc ::math::statistics::beta_cont_frac {a b x {tol 1.0e-9}} { 1059 set max_iter 512 1060 1061 set aplusb [expr {$a + $b}] 1062 set amin1 [expr {$a - 1}] 1063 set lnGapb [::math::ln_Gamma $aplusb] 1064 set term1 [expr {$lnGapb- [::math::ln_Gamma $a] - [::math::ln_Gamma $b]}] 1065 set term2 [expr {$a * log($x) + ($b - 1.0) * log(1.0 - $x)}] 1066 set pref [expr {exp($term1 + $term2)/$a}] 1067 1068 set z [expr {$x / (1.0 - $x)}] 1069 1070 set v 1.0 1071 set h_1 1.0 1072 set h_2 0.0 1073 set k_1 1.0 1074 set k_2 1.0 1075 1076 for {set m 1} {$m < $max_iter} {incr m} { 1077 set f1 [expr {$amin1 + 2 * $m}] 1078 set e2m [expr {-$z * double(($amin1 + $m) * ($b - $m))/ \ 1079 double(($f1 - 1) * $f1)}] 1080 set e2mp1 [expr {$z * double($m * ($aplusb - 1 + $m)) / \ 1081 double($f1 * ($f1 + 1))}] 1082 set h_2m [expr {$h_1 + $e2m * $h_2}] 1083 set k_2m [expr {$k_1 + $e2m * $k_2}] 1084 1085 set h_2 $h_2m 1086 set k_2 $k_2m 1087 1088 set h_1 [expr {$h_2m + $e2mp1 * $h_1}] 1089 set k_1 [expr {$k_2m + $e2mp1 * $k_1}] 1090 1091 set vprime [expr {$h_1/$k_1}] 1092 1093 if {abs($v - $vprime) < $tol} { 1094 break 1095 } 1096 1097 set v $vprime 1098 1099 } 1100 1101 if {$m == $max_iter} { 1102 return -code error "beta_cont_frac: Exceeded maximum number of iterations" 1103 } 1104 1105 set retval [expr {$pref * $v}] 1106 1107 # Because of imprecision in underlying Tcl calculations, may fall out of bounds 1108 if {$retval < 0.0} { 1109 set retval 0.0 1110 } elseif {$retval > 1.0} { 1111 set retval 1.0 1112 } 1113 1114 return $retval 1115} 1116 1117 1118# cdf-gamma -- 1119# Return the cumulative probabilities belonging to a gamma distribution 1120# 1121# Arguments: 1122# alpha Shape parameter 1123# beta Rate parameter 1124# x Value of variate 1125# 1126# Result: 1127# Cumulative probability of the given value of x to occur 1128# 1129# Note: 1130# Implemented by Eric Kemp-Benedict, 2007 1131# 1132proc ::math::statistics::cdf-gamma { alpha beta x } { 1133 if { $x <= 0 } { 1134 return 0.0 1135 } 1136 incompleteGamma [expr {$beta * $x}] $alpha 1137} 1138 1139 1140# cdf-poisson -- 1141# Return the cumulative probabilities belonging to a Poisson 1142# distribution 1143# 1144# Arguments: 1145# mu Mean of the distribution 1146# x Number of occurrences 1147# 1148# Result: 1149# Probability of k occurrences under the given distribution 1150# 1151# Note: 1152# Implemented by Eric Kemp-Benedict, 2007 1153# 1154proc ::math::statistics::cdf-poisson { mu x } { 1155 return [expr {1.0 - [incompleteGamma $mu [expr {floor($x) + 1}]]}] 1156} 1157 1158 1159# cdf-chisquare -- 1160# Return the cumulative probabilities belonging to a chi square distribution 1161# 1162# Arguments: 1163# df Degree of freedom 1164# x Value of variate 1165# 1166# Result: 1167# Cumulative probability of the given value of x to occur 1168# 1169# Note: 1170# Implemented by Eric Kemp-Benedict, 2007 1171# 1172proc ::math::statistics::cdf-chisquare { df x } { 1173 1174 if {$df <= 0} { 1175 return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive" 1176 } 1177 1178 return [cdf-gamma [expr {0.5*$df}] 0.5 $x] 1179} 1180 1181 1182# cdf-beta -- 1183# Return the cumulative probabilities belonging to a Beta distribution 1184# 1185# Arguments: 1186# a First parameter of the Beta distribution 1187# b Second parameter of the Beta distribution 1188# x Value of variate 1189# 1190# Result: 1191# Cumulative probability of the given value of x to occur 1192# 1193# Note: 1194# Implemented by Eric Kemp-Benedict, 2008 1195# 1196proc ::math::statistics::cdf-beta { a b x } { 1197 incompleteBeta $a $b $x 1198} 1199 1200 1201# random-gamma -- 1202# Generate a list of gamma-distributed deviates 1203# 1204# Arguments: 1205# alpha Shape parameter 1206# beta Rate parameter 1207# x Value of variate 1208# 1209# Result: 1210# List of random values 1211# 1212# Note: 1213# Implemented by Eric Kemp-Benedict, 2007 1214# Generate a list of gamma-distributed random deviates 1215# Use Cheng's envelope rejection method, as documented in: 1216# Dagpunar, J.S. 2007 1217# "Simulation and Monte Carlo: With Applications in Finance and MCMC" 1218# 1219proc ::math::statistics::random-gamma {alpha beta number} { 1220 if {$alpha <= 1} { 1221 set lambda $alpha 1222 } else { 1223 set lambda [expr {sqrt(2.0 * $alpha - 1.0)}] 1224 } 1225 set retval {} 1226 for {set i 0} {$i < $number} {incr i} { 1227 while {1} { 1228 # Two rands: one for deviate, one for acceptance/rejection 1229 set r1 [expr {rand()}] 1230 set r2 [expr {rand()}] 1231 # Calculate deviate from enveloping proposal distribution (a Lorenz distribution) 1232 set lnxovera [expr {(1.0/$lambda) * (log(1.0 - $r1) - log($r1))}] 1233 if {![catch {expr {$alpha * exp($lnxovera)}} x]} { 1234 # Apply acceptance criterion 1235 if {log(4.0*$r1*$r1*$r2) < ($alpha - $lambda) * $lnxovera + $alpha - $x} { 1236 break 1237 } 1238 } 1239 } 1240 lappend retval [expr {1.0 * $x/$beta}] 1241 } 1242 1243 return $retval 1244} 1245 1246 1247# random-poisson -- 1248# Generate a list of Poisson-distributed deviates 1249# 1250# Arguments: 1251# mu Mean value 1252# number Number of deviates to return 1253# 1254# Result: 1255# List of random values 1256# 1257# Note: 1258# Implemented by Eric Kemp-Benedict, 2007 1259# 1260proc ::math::statistics::random-poisson {mu number} { 1261 if {$mu < 20} { 1262 return [Randp_invert $mu $number] 1263 } else { 1264 return [Randp_PTRS $mu $number] 1265 } 1266} 1267 1268 1269# random-chisquare -- 1270# Return a list of random numbers according to a chi square distribution 1271# 1272# Arguments: 1273# df Degree of freedom 1274# number Number of values to return 1275# 1276# Result: 1277# List of random numbers 1278# 1279# Note: 1280# Implemented by Eric Kemp-Benedict, 2007 1281# 1282proc ::math::statistics::random-chisquare { df number } { 1283 1284 if {$df <= 0} { 1285 return -code error -errorcode ARG -errorinfo "Degrees of freedom must be positive" 1286 } 1287 1288 return [random-gamma [expr {0.5*$df}] 0.5 $number] 1289} 1290 1291 1292# random-students-t -- 1293# Return a list of random numbers according to a chi square distribution 1294# 1295# Arguments: 1296# degrees Degree of freedom 1297# number Number of values to return 1298# 1299# Result: 1300# List of random numbers 1301# 1302# Note: 1303# Implemented by Eric Kemp-Benedict, 2007 1304# 1305# Use method from Appendix 4.3 in Dagpunar, J.S., 1306# "Simulation and Monte Carlo: With Applications in Finance and MCMC" 1307# 1308proc ::math::statistics::random-students-t { degrees number } { 1309 variable pi 1310 1311 if {$degrees < 1} { 1312 return -code error -errorcode ARG -errorinfo "Degrees of freedom must be at least 1" 1313 } 1314 1315 set dd [expr {double($degrees)}] 1316 set k [expr {2.0/($dd - 1.0)}] 1317 1318 for {set i 0} {$i < $number} {incr i} { 1319 set r1 [expr {rand()}] 1320 if {$degrees > 1} { 1321 set r2 [expr {rand()}] 1322 set c [expr {cos(2.0 * $pi * $r2)}] 1323 lappend retval [expr {sqrt($dd/ \ 1324 (1.0/(1.0 - pow($r1, $k)) \ 1325 - $c * $c)) * $c}] 1326 } else { 1327 lappend retval [expr {tan(0.5 * $pi * ($r1 + $r1 - 1))}] 1328 } 1329 } 1330 set retval 1331} 1332 1333 1334# random-beta -- 1335# Return a list of random numbers according to a Beta distribution 1336# 1337# Arguments: 1338# a First parameter of the Beta distribution 1339# b Second parameter of the Beta distribution 1340# number Number of values to return 1341# 1342# Result: 1343# Cumulative probability of the given value of x to occur 1344# 1345# Note: 1346# Implemented by Eric Kemp-Benedict, 2008 1347# 1348# Use trick from J.S. Dagpunar, "Simulation and 1349# Monte Carlo: With Applications in Finance 1350# and MCMC", Section 4.5 1351# 1352proc ::math::statistics::random-beta { a b number } { 1353 set retval {} 1354 foreach w [random-gamma $a 1.0 $number] y [random-gamma $b 1.0 $number] { 1355 lappend retval [expr {$w / ($w + $y)}] 1356 } 1357 return $retval 1358} 1359 1360 1361# Random_invert -- 1362# Generate a list of Poisson-distributed deviates - method 1 1363# 1364# Arguments: 1365# mu Mean value 1366# number Number of deviates to return 1367# 1368# Result: 1369# List of random values 1370# 1371# Note: 1372# Implemented by Eric Kemp-Benedict, 2007 1373# 1374# Generate a poisson-distributed random deviate 1375# Use algorithm in section 4.9 of Dagpunar, J.S, 1376# "Simulation and Monte Carlo: With Applications 1377# in Finance and MCMC", pub. 2007 by Wiley 1378# This inverts the cdf using a "chop-down" search 1379# to avoid storing an extra intermediate value. 1380# It is only good for small mu. 1381# 1382proc ::math::statistics::Randp_invert {mu number} { 1383 set W0 [expr {exp(-$mu)}] 1384 1385 set retval {} 1386 1387 for {set i 0} {$i < $number} {incr i} { 1388 set W $W0 1389 set R [expr {rand()}] 1390 set X 0 1391 1392 while {$R > $W} { 1393 set R [expr {$R - $W}] 1394 incr X 1395 set W [expr {$W * $mu/double($X)}] 1396 } 1397 1398 lappend retval $X 1399 } 1400 1401 return $retval 1402} 1403 1404 1405# Random_PTRS -- 1406# Generate a list of Poisson-distributed deviates - method 2 1407# 1408# Arguments: 1409# mu Mean value 1410# number Number of deviates to return 1411# 1412# Result: 1413# List of random values 1414# 1415# Note: 1416# Implemented by Eric Kemp-Benedict, 2007 1417# Generate a poisson-distributed random deviate 1418# Use the transformed rejection method with 1419# squeeze of Hoermann: 1420# Wolfgang Hoermann, "The Transformed Rejection Method 1421# for Generating Poisson Random Variables," 1422# Preprint #2, Dept of Applied Statistics and 1423# Data Processing, Wirtshcaftsuniversitaet Wien, 1424# http://statistik.wu-wien.ac.at/ 1425# This method works for mu >= 10. 1426# 1427proc ::math::statistics::Randp_PTRS {mu number} { 1428 set smu [expr {sqrt($mu)}] 1429 set b [expr {0.931 + 2.53 * $smu}] 1430 set a [expr {-0.059 + 0.02483 * $b}] 1431 set vr [expr {0.9277 - 3.6224/($b - 2.0)}] 1432 set invalpha [expr {1.1239 + 1.1328/($b - 3.4)}] 1433 set lnmu [expr {log($mu)}] 1434 1435 set retval {} 1436 for {set i 0} {$i < $number} {incr i} { 1437 while 1 { 1438 set U [expr {rand() - 0.5}] 1439 set V [expr {rand()}] 1440 1441 set us [expr {0.5 - abs($U)}] 1442 set k [expr {int(floor((2.0 * $a/$us + $b) * $U + $mu + 0.43))}] 1443 1444 if {$us >= 0.07 && $V <= $vr} { 1445 break 1446 } 1447 1448 if {$k < 0} { 1449 continue 1450 } 1451 1452 if {$us < 0.013 && $V > $us} { 1453 continue 1454 } 1455 1456 set kp1 [expr {$k+1}] 1457 if {log($V * $invalpha / ($a/($us * $us) + $b)) <= -$mu + $k * $lnmu - [::math::ln_Gamma $kp1]} { 1458 break 1459 } 1460 } 1461 1462 lappend retval $k 1463 } 1464 return $retval 1465} 1466 1467 1468# 1469# Simple numerical tests 1470# 1471if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } { 1472 1473 # 1474 # Apparent accuracy: at least one digit more than the ones in the 1475 # given numbers 1476 # 1477 puts "Normal distribution - two-tailed" 1478 foreach z {4.417 3.891 3.291 2.576 2.241 1.960 1.645 1.150 0.674 1479 0.319 0.126 0.063 0.0125} \ 1480 pexp {1.e-5 1.e-4 1.e-3 1.e-2 0.025 0.050 0.100 0.250 0.500 1481 0.750 0.900 0.950 0.990 } { 1482 set prob [::math::statistics::Cdf-toms322 1 5000 [expr {$z*$z}]] 1483 puts "$z - $pexp - [expr {1.0-$prob}]" 1484 } 1485 1486 puts "Normal distribution (inverted; one-tailed)" 1487 foreach p {0.001 0.01 0.1 0.25 0.5 0.75 0.9 0.99 0.999} { 1488 puts "$p - [::math::statistics::Inverse-cdf-normal 0.0 1.0 $p]" 1489 } 1490 puts "Normal random variables" 1491 set rndvars [::math::statistics::random-normal 1.0 2.0 20] 1492 puts $rndvars 1493 puts "Normal uniform variables" 1494 set rndvars [::math::statistics::random-uniform 1.0 2.0 20] 1495 puts $rndvars 1496 puts "Normal exponential variables" 1497 set rndvars [::math::statistics::random-exponential 2.0 20] 1498 puts $rndvars 1499} 1500