# Load in the graphics library library(lattice) library(grid) # SECTION 1.2: visualizing random samples and random variables # THE EMPIRICAL DISTRIBUTION FUNCTION OF A SAMPLE mysample <- c(3,8,7,2,2,5) # Prepare the data to be plotted x <- sort(mysample) y <- (6:1)/6 xyplot(y~x, type=c('S','p')) # plot both steps and points # THE LAW OF LARGE NUMEBRS # Define a random number generator. # This generates a random sample of size n, with tunable parameter r. # -- runif(n) generates n copies of a uniform [0,1] random variable. rexp <- function(n,r) -1/r*log(runif(n)) # Generate a random sample of length 10 mysample <- rexp(20,r=3) x <- sort(mysample) y <- (length(x):1)/length(x) # Generate the points to be plotted for the true (theoretical) dist. func. rngx <- seq(0,3,by=.001) rngy <- exp(-3*rngx) # Assemble both into a single dataframe df <- data.frame(x=c(x,rngx),y=c(y,rngy), type=rep(c('sample','dist'),times=c(length(x),length(rngx)))) # Plot both the sample and the theory curve as separate lines xyplot(y~x, groups=type, data=df, type='S', auto.key=TRUE) # More elaborate code for doing basically the same thing df <- rbind(data.frame(x=sort(rexp(20,r=3)),y=(20:1)/20,type='sample',n='n=20'), data.frame(x=sort(rexp(200,r=3)),y=(200:1)/200,type='sample',n='n=200'), data.frame(x=rngx,y=rngy,type='dist',n='n=20'), data.frame(x=rngx,y=rngy,type='dist',n='n=200')) xyplot(y~x|n, groups=type, data=df, type='S', xlab='',ylab='',scales=list(alternating=FALSE), auto.key=list(lines=TRUE,points=FALSE)) # CONTINUOUS RANDOM VARIABLES: HISTOGRAMS & DENSITY mysample <- rnorm(200,mean=10,sd=3) x <- sort(mysample) y <- (200:1)/200 g1 <- xyplot(y~x, type=c('S','p'), xlim=c(2,18), ylim=c(0,1), xlab='',ylab='',sub='Emp. dist. func') rngx <- seq(2,18,by=.01) rngy <- pnorm(rngx, mean=10,sd=3, lower.tail=FALSE) g2 <- xyplot(rngy~rngx, type='l', xlim=c(2,18), ylim=c(0,1), xlab='',ylab='',sub='Dist. func.') g3 <- histogram(~mysample, xlim=c(2,18), xlab='',ylab='',sub='Histogram') rngx <- seq(2,18,by=.01) rngdy <- dnorm(rngx, mean=10,sd=3) g4 <- xyplot(rngdy~rngx, type='l', xlim=c(2,18), xlab='',ylab='',sub='Density') grid.newpage() print(g1,position=c(0,.5,.5,1), more=TRUE) print(g2,position=c(0,0,.5,.5), more=TRUE) print(g3,position=c(.5,.5,1,1), more=TRUE) print(g4,position=c(.5,0,1,.5)) # DISCRETE RANDOM VARIABLES: HISTOGRAMS & DENSITY ronedice <- function(n) sample(1:6,n,replace=TRUE) mysample <- ronedice(100)+ronedice(100) x <- sort(mysample) y <- (100:1)/100 g1 <- xyplot(y~x, type=c('S','p'), xlim=c(0,13), ylim=c(-.01,1.01), xlab='',ylab='',sub='Emp. dist. func') rngx <- 1:13 rngdy <- ifelse(rngx<8, floor(rngx)-1, 13-floor(rngx))/36 g2 <- xyplot(1-cumsum(rngdy)~rngx, type='s', xlim=c(0,13), ylim=c(-.01,1.01), lwd=5, xlab='',ylab='',sub='Dist. func.') g3 <- histogram(~mysample, xlim=c(0,13), xlab='',ylab='',sub='Histogram') g4 <- barchart(rngdy~rngx, type='l', xlim=c(0,13), ylim=c(-.01,.2), horizontal=FALSE, origin=0, xlab='',ylab='',sub='Density') grid.newpage() print(g1,position=c(0,.5,.5,1), more=TRUE) print(g2,position=c(0,0,.5,.5), more=TRUE) print(g3,position=c(.5,.5,1,1), more=TRUE) print(g4,position=c(.5,0,1,.5)) # SECTION 1.3: FITTING DISTRIBUTIONS mysample <- c(3,8,7,2,2,5) # Generate the points to be plotted for the empirical distribution function x <- sort(mysample) y <- (length(x):1)/length(x) # Generate the points to be plotted for the RNG distribution function lambda <- .15 rngx <- seq(0,10,by=.01) rngy <- exp(-lambda*rngx) # Assemble both into a single dataframe df <- data.frame(x=c(x,rngx),y=c(y,rngy), type=rep(c('sample','dist'),times=c(length(x),length(rngx)))) # Plot both the sample and the theory curve as separate lines xyplot(y~x, groups=type, data=df, type='S', auto.key=list(lines=TRUE,points=FALSE), main=paste('How does Exp(',lambda,') fit?',sep=''))