smokes = c("Y","N","N","Y","N","Y","Y","Y","N","Y") amount = c(1,2,2,3,3,1,2,1,3,2) table(smokes,amount) barplot(table(smokes,amount)) data1<-read.csv(url("http://stat.columbia.edu/~rachel/datasets/nyt1.csv")) head(data1) data1$agecat<-cut(data1$Age,c(-Inf,0,18,24,34,44,54,64,Inf)) summary(data1) install.packages("doBy") library("doBy") siterange<-function(x){c(length(x),min(x),mean(x),max(x))} summaryBy(Age~agecat, data=data1,FUN=siterange) install.packages("ggplot2") library(ggplot2) ggplot(data1,aes(x=agecat,y=Impressions,fill=agecat))+geom_boxplot() ggplot(subset(data1,Clicks>0),aes(x=Clicks/Impressions,colour=agecat))+geom_density() ggplot(data1,aes(x=Impressions,fill=agecat))+geom_histogram(binwidth=1) data1$scode[data1$Impressions==0]<-"NoImps" data1$scode[data1$Impressions>0]<-"Imps" data1$scode[data1$Clicks>0]<-"Clicks" data1$scode<-factor(data1$scode) head(data1) data3<-subset(data1,scode=="NoImps") head(data3) clen<-function(x){c(length(x))} etable<-summaryBy(Impressions~scode+Gender+agecat,data=data1,FUN=clen)