myDF <- read.delim("/data/public/election2016/itcont.txt", sep="|", header=F)
#1. The average donations were the largest
# on September 16, 2014.
sort(tapply(myDF$V15, myDF$V14, mean))
#2a. The largest number of
# donations were made on these 10 days,
# e.g., the most donations were
# made on December 31, 2015
tail(sort(table(myDF$V14)), 10)
#2b. The largest dollar amounts of
# donations were made on these 10 days,
# e.g., the most donations were
# made on June 30, 2015
tail(sort(tapply(myDF$V15, myDF$V14, sum)), 10)
#3a. There were 1277 donations by people at Purdue.
length(grep("PURDUE", myDF$V12))
#3b. Among those donations, 599 of them
# were made by residents of West Lafayette.
v <- myDF$V9[grep("PURDUE", myDF$V12)]
v1 <- grep("WEST LAFAYETTE", v)
v2 <- grep("W LAFAYETTE", v)
v3 <- grep("W. LAFAYETTE", v)
length(v1) + length(v2) + length(v3)
#3c. Among the donations by people at Purdue,
# the campaign C00401224 (ACTBLUE) received
# the largest number of donations.
tail(sort(table(myDF$V1[grep("PURDUE", myDF$V12)])))
#4. We search for the cities from Indiana that
# contain Lafayette in the name, because this will
# also get cities with West Lafayette in the name too.
# Then we look at all such donation amounts, and take the average.
# The average size of such a donation is 121.8081 dollars.
mean(myDF$V15[grepl("LAFAYETTE", myDF$V9) & grepl("IN", myDF$V10)])
#5a. The largest number of donations
# were made from these 10 professions:
tail(sort(tapply(myDF$V15, myDF$V13, length)), 11)
#5b. The largest dollar amount of donations
# were made from these 10 professions:
tail(sort(tapply(myDF$V15, myDF$V13, sum)), 11)
#6. The total dollar amount of donations in the local zip codes are:
tapply(myDF$V15,strtrim(myDF$V11,5),sum)[c("47901","47902","47903","47904","47905","47906","47907","47909","47996")]
#7a. The top 15 cities in Indiana, according to the amount donated, are
tail(sort(tapply(myDF$V15[myDF$V10 == "IN"],myDF$V9[myDF$V10 == "IN"],sum)),15)
#7b. The top 15 cities in the whole country,
# according to the amount donated, are
tail(sort(tapply(myDF$V15,myDF$V9,sum)),15)
#8a. Hillary Clinton received the largest amount of money on July 29, 2016.
tail(sort(tapply(myDF$V15[myDF$V1 == "C00575795"],myDF$V14[myDF$V1 == "C00575795"],sum)))
#8b. Donald Trump received the largest amount of money on June 22, 2016.
tail(sort(tapply(myDF$V15[myDF$V1 == "C00580100"],myDF$V14[myDF$V1 == "C00580100"],sum)))
#9a. A vector of the donor information can be formed in this way:
donorvec <- paste(myDF$V8, myDF$V9, myDF$V10, myDF$V11)
#9b. The donor who donated the most times to the Clinton campaign
# was MITCHELL, MARCIA LOS ANGELES CA 900363146
tail(sort(tapply( myDF$V1 == "C00575795", donorvec, sum)))
#9c. The donor who donated the most times to the Trump campaign
# was Trump himself: TRUMP, DONALD J. NEW YORK NY 10022
tail(sort(tapply( myDF$V1 == "C00580100", donorvec, sum)))
#9d. To get the counts of the number of times that
# each person donated to each of the two campains (respectively),
# we compute these two vectors:
clintoncounts <- tapply( myDF$V1 == "C00575795", donorvec, sum )
trumpcounts <- tapply( myDF$V1 == "C00580100", donorvec, sum )
# We can make sure that they came in the same order,
# by checking the lengths,
length(clintoncounts)
length(trumpcounts)
# and moreover by checking to see that the names of the vectors agree:
sum(names(clintoncounts) != names(trumpcounts))
# now we store the names of the donors in this order,
# and see which donors have a positive donation count for both:
donornames <- names(clintoncounts)
donornames[(clintoncounts > 0) & (trumpcounts > 0)]
# There are only 3 such people.