##### Script for SlideDeck b4 ### ### Measures of Spread ### ### Preamble # Load additional functions source("https://rfs.kvasaheim.com/stat200.R") ### ### ### # Load and attach the data dt = read.csv("http://rfs.kvasaheim.com/data/geography.csv") attach(dt) ### Ex1: Geography Quiz # sample statistics mean(Score) median(Score) modal(Score) var(Score) sd(Score) IQR(Score) cv(Score) hildebrand.rule(Score) # Empirical rule: %within 1stdev of the mean vv = Score wi2 = sum( isBetween( vv, min=mean(vv)-sd(vv), max=mean(vv)+sd(vv) ) ) wi2/length(vv) # %within 2stdev of the mean wi2 = sum( isBetween( vv, min=mean(vv)-2*sd(vv), max=mean(vv)+2*sd(vv) ) ) wi2/length(vv) ### The Histogram/Barplot par(cex.axis=0.9, mar=c(4,1,1,1) ) par(cex.lab=1.2, font.lab=2) hist(Score, breaks=seq(0,7,1)-0.5, col=rgb(1-0:8/8,0,0), border="white", xaxt="n",yaxt="n", xlab="", ylab="", main="") title(xlab="Score on the Geography Quiz", line=2.25) mtext(side=1, at=0:6, text=0:6) segments(-0.49,0, 6.5,0) # ### ### ### # Deatch geography datasest detach(dt) # Load and attach the crime dataset dt = read.csv("http://rfs.kvasaheim.com/data/crime.csv") attach(dt) ### Ex2: School Enrollment in 1990 mean(enroll90) median(enroll90) modal(enroll90) var(enroll90) sd(enroll90) IQR(enroll90) cv(enroll90) hildebrand.rule(enroll90) # Empirical rule: %within 1stdev of the mean vv = enroll90 wi2 = sum( isBetween( vv, min=mean(vv)-sd(vv), max=mean(vv)+sd(vv) ) ) wi2/length(vv) # %within 2stdev of the mean wi2 = sum( isBetween( vv, min=mean(vv)-2*sd(vv), max=mean(vv)+2*sd(vv) ) ) wi2/length(vv) ### Histograms par(cex.axis=0.9, mar=c(4,1,1,1) ) par(cex.lab=1.2, font.lab=2) histogram(enroll90, breaks=seq(80,115,2), col="green4", border="white", xaxt="n",xlab="") title(xlab="School Enrollment Rate (1990)", line=2.25) axis(1, at=seq(0,120,10)) ### Ex3: School Enrollment in 2000 mean(enroll00) median(enroll00) modal(enroll00) var(enroll00) sd(enroll00) IQR(enroll00) cv(enroll00) hildebrand.rule(enroll00) # Empirical rule: %within 1stdev of the mean vv = enroll00 wi2 = sum( isBetween( vv, min=mean(vv)-sd(vv), max=mean(vv)+sd(vv) ) ) wi2/length(vv) # %within 2stdev of the mean wi2 = sum( isBetween( vv, min=mean(vv)-2*sd(vv), max=mean(vv)+2*sd(vv) ) ) wi2/length(vv) # Histogram par(cex.axis=0.9, mar=c(4,1,1,1) ) par(cex.lab=1.2, font.lab=2) histogram(enroll00, breaks=seq(80,115,2), col="blue4", border="white", xaxt="n",xlab="") title(xlab="School Enrollment Rate (2000)", line=2.25) axis(1, at=seq(0,120,10)) ### Ex4: Violent Crime rate in 2000 # Sample statistics mean(vcrime00) median(vcrime00) modal(vcrime00) var(vcrime00) sd(vcrime00) IQR(vcrime00) cv(vcrime00) hildebrand.rule(vcrime00) # Empirical rule: %within 1stdev of the mean vv = vcrime00 wi2 = sum( isBetween( vv, min=mean(vv)-sd(vv), max=mean(vv)+sd(vv) ) ) wi2/length(vv) # %within 2stdev of the mean wi2 = sum( isBetween( vv, min=mean(vv)-2*sd(vv), max=mean(vv)+2*sd(vv) ) ) wi2/length(vv) # histogram par(cex.axis=0.9, mar=c(4,1,1,1) ) par(cex.lab=1.2, font.lab=2) histogram(vcrime00, breaks=seq(0,1800,100), col="red4", border="white", xaxt="n",xlab="") title(xlab="Violent Crime Rate (2000)", line=2.50) axis(1, at=seq(0,2000,200)) ##### ### Graphic for the Empirical Rule rv = rnorm(1e6, m=0, s=20) summary(rv) hh = hist(rv, breaks=seq(-100,100,10), plot=FALSE ) names(hh) # par(bg="transparent") par(cex.axis=0.9, mar=c(4,1,1,1) ) par(cex.lab=1.2, font.lab=2) plot.new() plot.window( xlim=c(-80,80), ylim=c(0,0.02)) rect(-60,0, -50,hh$density[5], col="red4") rect(-50,0, -40,hh$density[6], col="red4") rect(-40,0, -30,hh$density[7], col="purple") rect(-30,0, -20,hh$density[8], col="purple") rect(-20,0, -10,hh$density[9], col="lightblue") rect(-10,0, -00,hh$density[10], col="lightblue") rect(10,0, 00,hh$density[11], col="lightblue") rect(20,0, 10,hh$density[12], col="lightblue") rect(30,0, 20,hh$density[13], col="purple") rect(40,0, 30,hh$density[14], col="purple") rect(50,0, 40,hh$density[15], col="red4") rect(60,0, 50,hh$density[16], col="red4") title(xlab="Random Variable", line=2.50) axis(1, at=0, label=expression(mu) ) axis(1, at=-20, label=expression(mu - 1*sigma) ) axis(1, at=+20, label=expression(mu + 1*sigma) ) axis(1, at=-40, label=expression(mu - 2*sigma) ) axis(1, at=+40, label=expression(mu + 2*sigma) ) axis(1, at=-60, label=expression(mu - 3*sigma) ) axis(1, at=+60, label=expression(mu + 3*sigma) )