# STAT 29000
# Project 1 Solutions
# by Mark Daniel Ward
#1
# diff(LakeHuron) gives the year-to-year rises and falls of the data
# diff(LakeHuron) == max(diff(LakeHuron)) gives a vector of FALSE's and one TRUE
# showing which of the positions are the max
# which(diff(LakeHuron) == max(diff(LakeHuron))) shows that
# the maximum occurs in the 85th year of the data set.
# So the biggest rise of the LakeHuron data set is between the year 1959:
1874 + which(diff(LakeHuron) == max(diff(LakeHuron)))
# and the year 1960:
1874 + 1 + which(diff(LakeHuron) == max(diff(LakeHuron)))
#2a
# we first load the MASS library
library(MASS)
# the average duration is 3.460814 minutes:
mean(geyser$duration)
#2b
# we can sort the geyser durations, from largest to smallest, as follows:
# sort(geyser$duration, decreasing=TRUE)
# and then we can just extract the largest 10 entries of this vector:
sort(geyser$duration, decreasing=TRUE)[1:10]
#2c
# there are 194 durations that last 3 minutes or longer:
length(geyser$duration[geyser$duration >= 3])
#3a
# we see that the 20th car has the highest gas mileage
# which(mtcars$mpg == max(mtcars$mpg))
# so we can get the name of this car this way:
# row.names(mtcars[20,])
# or rolling everything into one line of code:
row.names(mtcars[which(mtcars$mpg == max(mtcars$mpg)),])
#3b
# we see that the 31st car has the highest horsepower
# which(mtcars$hp == max(mtcars$hp))
# so we can get the name of this car this way:
# row.names(mtcars[31,])
# or rolling everything into one line of code:
row.names(mtcars[which(mtcars$hp == max(mtcars$hp)),])
#3c
# we see that the 29th car has the highest horsepower
# which(mtcars$qsec == min(mtcars$qsec))
# so we can get the name of this car this way:
# row.names(mtcars[29,])
# or rolling everything into one line of code:
row.names(mtcars[which(mtcars$qsec == min(mtcars$qsec)),])
#3d
# mtcars$am == 1 gives a vector of TRUE's and FALSE's,
# for whether the cars do or do not have manual transmission
# then we can sum to get the total number of TRUE's,
# because when we sum, TRUE's become 1's and FALSE's become 0's
sum(mtcars$am == 1)
#3e
# similar to 3d, but we add the condition that there are 6 cylinders:
sum(mtcars$am == 1 & mtcars$cyl == 6)
#4a
# the populations of Indiana and Pennsylvania are 5313 and 11860, respectively:
# state.x77[,"Population"]["Indiana"]
# state.x77[,"Population"]["Pennsylvania"]
# now we use these conditions to index the vector state.x77[,"Population"] as follows:
# state.x77[,"Population"][state.x77[,"Population"] > 5313 & state.x77[,"Population"] < 11860]
# or we can roll this into one line as follows:
state.x77[,"Population"][ state.x77[,"Population"] > state.x77[,"Population"]["Indiana"] & state.x77[,"Population"] < state.x77[,"Population"]["Pennsylvania"] ]
#4b
# the populations of Indiana and Pennsylvania are 5313 and 11860, respectively:
# state.x77[,"Area"]["Indiana"]
# state.x77[,"Area"]["Pennsylvania"]
# now we use these conditions to index the vector state.x77[,"Area"] as follows:
# state.x77[,"Area"][state.x77[,"Area"] > 36097 & state.x77[,"Area"] < 44966]
# or we can roll this into one line as follows:
state.x77[,"Area"][state.x77[,"Area"] > state.x77[,"Area"]["Indiana"] & state.x77[,"Area"] < state.x77[,"Area"]["Pennsylvania"]]
#5a
# the value of the mean of |Z| is approximately
mean(abs(rnorm(1000000)))
#5b
# the value of the var of |Z| is approximately
var(abs(rnorm(1000000)))
#6
# we define the countas function to be:
countas <- function(v) {
length(grep("a", v))
}
#7a
# we find which elements equal 3, and then take the first such element, so
# we define the firstthree function to be:
firstthree <- function(v) {
which(v==3)[1]
}
# and it works as required:
firstthree( c(-2.5,3,3,0.001,22,5,7,19,3,17) )
#7b
# we find which elements equal 3, and then take the third such element, so
# we define the thirdthree function to be:
thirdthree <- function(v) {
which(v==3)[3]
}
# and it works as required:
thirdthree( c(-2.5,3,3,0.001,22,5,7,19,3,17) )
#8
# we make a table, sort it into decreasing order (i.e., biggest elements first)
# and then we take the biggest five elements, so we define the topfive function to be:
topfive <- function(v) {
sort(table(v), decreasing=TRUE)[1:5]
}
#9a
# Euler's number is approximately
sum(1/factorial(0:100))
# and indeed, if we subtract 2.718281828459, we get a very small number:
sum(1/factorial(0:100)) - 2.718281828459
#9b
# the number Pi^2 / 6 is approximately equal to sum(1/((1:100000000)^2))
# so this means that Pi is approximately:
sqrt(6*sum(1/((1:10000000)^2)))
#10a
# the first 1000 triangular numbers are:
n <- 1000
(1:n)*(2:(n+1))/2
#10b
# the first 1000 tetrahedral numbers are:
n <- 1000
(1:n)*(2:(n+1))*(3:(n+2))/6