# R code to explore the geometric, negative binomial, # gamma, beta and bivariate normal parametric families of distributions # discrete case: ##### geometric distribution: # for 0 < p < 1, X ~ Geometric( p ) has PMF # f_X ( x ) = p ( 1 - p )^x for x = 0, 1, ... and 0 otherwise # recall that this distribution arises when you're observing # an IID Bernoulli (success/failure) process with success probability p # and X counts the number of failures until the first success p <- 0.9 cbind( 0:10, dgeom( 0:10, p ) ) par( mfrow = c( 2, 1 ) ) plot( 0, 0, xlab = 'x', ylab = 'Probability', type = 'n', xlim = c( -0.5, 3.5 ), ylim = c( 0, 1 ), main = 'Geometric PMF with p = 0.9 (spike plot)' ) for ( i in 0:5 ) { segments( i, 0, i, dgeom( i, p ), lwd = 2, col = 'red' ) } seed <- 1 set.seed( seed ) x.star.0.9 <- rgeom( 10000, 0.9 ) table( x.star.0.9 ) # x.star.0.9 # 0 1 2 3 # 8979 926 88 7 hist( x.star.0.9, prob = T, breaks = ( 0:4 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 1 ), main = 'Geometric PMF with p = 0.9 (histogram)' ) par( mfrow = c( 3, 1 ) ) hist( x.star.0.9, prob = T, breaks = ( 0:30 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 1 ), main = 'Geometric PMF with p = 0.9 (histogram)' ) x.star.0.5 <- rgeom( 10000, 0.5 ) table( x.star.0.5 ) # x.star.0.5 # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 # 4999 2461 1222 650 339 174 75 33 20 16 3 4 2 2 hist( x.star.0.5, prob = T, breaks = ( 0:30 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 1 ), main = 'Geometric PMF with p = 0.5 (histogram)' ) x.star.0.1 <- rgeom( 10000, 0.1 ) table( x.star.0.1 ) hist( x.star.0.1[ x.star.0.1 <= 29 ], prob = T, breaks = ( 0:30 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 1 ), main = 'Geometric PMF with p = 0.1 (histogram)' ) par( mfrow = c( 1, 1 ) ) ##### negative binomial distribution: # for 0 < p < 1, X ~ NegativeBinomial( n, p ) has PMF # f_X ( x ) = choose( n + x - 1, x ) p^n ( 1 - p )^x for x = 0, 1, ... and 0 otherwise # recall that this distribution arises when you're observing # an IID Bernoulli (success/failure) process with success probability p # and X counts the number of failures until the nth success # so the Geometric( p ) distribution is a special case # of the Negative Binomial distribution with n = 1 x.star.10.0.9 <- rnbinom( 10000, 10, 0.9 ) table( x.star.10.0.9 ) # x.star.10.0.9 # 0 1 2 3 4 5 6 7 8 # 3547 3467 1961 707 235 60 16 4 3 hist( x.star.10.0.9, prob = T, breaks = ( 0:9 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.9 )' ) x.star.10.0.5 <- rnbinom( 10000, 10, 0.5 ) table( x.star.10.0.5 ) hist( x.star.10.0.5, prob = T, breaks = ( 0:38 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.5 )' ) x.star.10.0.1 <- rnbinom( 10000, 10, 0.1 ) table( x.star.10.0.1 ) hist( x.star.10.0.1, prob = T, breaks = ( 0:253 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.1 )' ) par( mfrow = c( 3, 1 ) ) hist( x.star.10.0.9, prob = T, breaks = ( 0:245 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.9 )' ) hist( x.star.10.0.5, prob = T, breaks = ( 0:245 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.5 )' ) hist( x.star.10.0.1, prob = T, breaks = ( 0:253 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.1 )' ) par( mfrow = c( 1, 1 ) ) x.star.25.0.5 <- rnbinom( 10000, 25, 0.5 ) table( x.star.25.0.5 ) par( mfrow = c( 2, 1 ) ) hist( x.star.10.0.5, prob = T, breaks = ( 0:60 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 10, 0.5 )' ) hist( x.star.25.0.5, prob = T, breaks = ( 0:60 ) - 0.5, xlab = 'x', ylab = 'Probability', ylim = c( 0, 0.4 ), main = 'Negative Binomial PMF with ( n, p ) = ( 25, 0.5 )' ) par( mfrow = c( 1, 1 ) ) library( moments ) c( skewness( x.star.10.0.5 ), skewness( x.star.25.0.5 ) ) ##### bivariate normal density for galton's father-son height data install.packages( 'mvtnorm' ) library( mvtnorm ) n.grid <- 100 # fathers had ( mean, sd ) = ( 67, 2.5 ) x.grid <- seq( 67 - 3 * 2.5, 67 + 3 * 2.5, length = n.grid ) # sons had ( mean, sd ) = ( 68, 2.5 ) y.grid <- seq( 68 - 3 * 2.5, 68 + 3 * 2.5, length = n.grid ) # correlation was +0.7 Sigma <- matrix( c( 2.5^2, 0.7 * 2.5 * 2.5, 0.7 * 2.5 * 2.5, 2.5^2 ), 2, 2 ) f.grid <- matrix( NA, n.grid, n.grid ) for ( i in 1:n.grid ) { for ( j in 1:n.grid ) { f.grid[ i, j ] <- dmvnorm( c( x.grid[ i ], y.grid[ j ] ), c( 67, 68 ), Sigma ) } } par( mfrow = c( 1, 1 ) ) contour( x.grid, y.grid, f.grid, nlevels = 25, col = 'red', xlab = 'height of father', ylab = 'height of son' ) library( plotly ) f.grid.t <- t( f.grid ) interactive.bivariate.density.perspective.plot <- plot_ly( x = x.grid, y = y.grid, z = f.grid.t ) %>% add_surface( contours = list( z = list( show = T, usecolormap = T, highlightcolor = '#ff0000', project = list( z = T ) ) ) ) %>% layout( title = 'Perspective plot of the bivariate density', scene = list( xaxis = list( title = 'x' ), yaxis = list( title = 'y' ), zaxis = list( title = 'f' ), camera = list( eye = list( x = 1.5, y = 0.5, z = -0.5 ) ) ) ) %>% colorbar( title = 'Bivariate Density' ) interactive.bivariate.density.perspective.plot ##### gamma family of distributions # alpha > 0, beta > 0 x.grid.1 <- seq( 0, 10, length = 500 ) # alpha = 1, beta = 1 alpha <- 1 beta <- 1 plot( x.grid.1, dgamma( x.grid.1, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density' ) # exponential distribution hidden inside gamma family with alpha = 1 alpha <- 1 beta <- 10 plot( x.grid.1, dgamma( x.grid.1, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density' ) alpha <- 1 beta <- 0.1 x.grid.2 <- seq( 0, 50, length = 500 ) plot( x.grid.2, dgamma( x.grid.2, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density' ) alpha <- 2 beta <- 1 plot( x.grid.1, dgamma( x.grid.1, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Gamma family, ( alpha, beta ) = ( 2, 1 )' ) alpha <- 2 beta <- 10 plot( x.grid.1, dgamma( x.grid.1, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Gamma family, ( alpha, beta ) = ( 2, 10 )' ) alpha <- 2 beta <- 0.1 plot( x.grid.2, dgamma( x.grid.2, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Gamma family, ( alpha, beta ) = ( 2, 0.1 )' ) ##### beta family of distributions x.grid.3 <- seq( 0, 1, length = 500 ) # alpha > 0, beta > 0 alpha <- 1 beta <- 1 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 1, 1 )' ) # uniform distribution hidden inside beta family with alpha = beta = 1 alpha <- 3 beta <- 1 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 3, 1 )' ) alpha <- 1 beta <- 3 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 1, 3 )' ) alpha <- 2 beta <- 2 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 2, 2 )' ) alpha <- 3 beta <- 3 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 3, 3 )' ) alpha <- 10 beta <- 10 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 10, 10 )' ) alpha <- 3 beta <- 6 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 3, 6 )' ) alpha <- 0.5 beta <- 0.5 plot( x.grid.3, dbeta( x.grid.3, alpha, beta ), type = 'l', lwd = 2, col = 'red', xlab = 'x', ylab = 'Density', main = 'Beta family, ( alpha, beta ) = ( 0.5, 0.5 )' )