# racd07.R March 2012 for R version 2.15.0 # Create log file sink("racd07.Rout") rm(list=ls()) # ********** OVERVIEW OF racd04.R ********** # R Program # copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi # used for "Regression Analyis of Count Data" SECOND EDITION # by A. Colin Cameron and Pravin K. Trivedi (2012) # Cambridge University Press # To run you need files # racd07data1strikes.dta # racd07data2stocktrades.dta # and R packages # foreign, sandwich # This R program simply reads in the data on strikes and on stock trades # for chapter 7 and runs Poisson regression. # (The Stata program racd07.do does more) # 4.8.8 EXAMPLE: PATENTS # Poisson, NB2 and finite mixture of Poisson with 2 and 3 components # ********* DATA DESCRIPTION # The original data in racd07data1strikes.dta are from # J. Kennan, "The Duration of Contract strikes in U.S. Manufacturing", # Journal of Econometrics, 1985, Vol. 28, pp.5-28. # The data are also used in # A.C. Cameron and P.K. Trivedi (1990), # "Regression based tests for overdispersion", # Journal of Econometrics, Vol. 46, pp. 347-364. # For more details see these articles and racd07makedata1strikes.do # The original data in racd07data2stocktrades.dta are from # R.C. Jung, R. Liesenfeld and J.-F. Richard (2011) # "Dynamic Factor Models for Multivariate Count Data: An Application to # Stock-Market Trading Activity," JBES, 29, 73-85. # Data are the number of trades on the NYSE in 5 minute intervals # for Geltfelter Company (GLT) over 39 trading days Jan 3 - Feb 18 2005 # There are 75 5-minute intervals times 39 days # For more details racd07makedata2stocktrades.do # ********* 7.3.4 STRIKES DATA install.packages("foreign") library(foreign) data.ch7 <- read.dta(file = "racd07data1strikes.dta") # Allows variables in database to be accessed simply by giving names attach(data.ch7) # Lists first six observations head(data.ch7) # Tabulate counts of strikes table(STRIKES) table(STRIKES) / nrow(data.ch7) # Variable list names(data.ch7) # Summary statistics summary(data.ch7) sapply(data.ch7,mean) sapply(data.ch7,sd) # Formula for the model estimated in this chapter - shortens the commands below formula.ch7model <- as.formula(STRIKES ~ OUTPUT) # Poisson with default standard errors (variance is a multiple of the mean) model.poiss <- glm(formula.ch7model, family=poisson()) summary(model.poiss) # Poisson with robust standard errors (no assumption on variance) # install.packages("sandwich") library(sandwich) cov.robust <- vcovHC (model.poiss, type="HC0") se.robust <- sqrt(diag(cov.robust)) coeffs <- coef(model.poiss) t.robust <- coeffs / se.robust summary.poissrobust <- cbind(coeffs, se.robust, t.robust, pvalue=round(2*(1-pnorm(abs(coeffs/se.robust))),5), lower=coeffs-1.96*se.robust, upper=coeffs+1.96*se.robust) print(summary.poissrobust,digits=3) # ********* 7.11.2 STOCK TRADE DATA data.ch7b <- read.dta(file = "racd07data2stocktrades.dta") # Allows variables in database to be accessed simply by giving names attach(data.ch7b) # Lists first six observations head(data.ch7b) # Variable list names(data.ch7b) # Summary statistics summary(data.ch7b) sapply(data.ch7b,mean) sapply(data.ch7b,sd) # Formula for the model estimated in this chapter - shortens the commands below formula.ch7bmodel <- as.formula(glt ~ x1+x2+x3+x4) # Poisson with default standard errors (variance is a multiple of the mean) model.poiss <- glm(formula.ch7bmodel, family=poisson()) summary(model.poiss) # Poisson with robust standard errors (no assumption on variance) # install.packages("sandwich") library(sandwich) cov.robust <- vcovHC (model.poiss, type="HC0") se.robust <- sqrt(diag(cov.robust)) coeffs <- coef(model.poiss) t.robust <- coeffs / se.robust summary.poissrobust <- cbind(coeffs, se.robust, t.robust, pvalue=round(2*(1-pnorm(abs(coeffs/se.robust))),5), lower=coeffs-1.96*se.robust, upper=coeffs+1.96*se.robust) print(summary.poissrobust,digits=3) # close log file sink()