# racd04.R  March 2012 for R version 2.15.0

# Create log file
sink("racd04.Rout")

rm(list=ls())

# ********** OVERVIEW OF racd04.R **********

# R Program 
# copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi 
# used for "Regression Analyis of Count Data" SECOND EDITION
# by A. Colin Cameron and Pravin K. Trivedi (2012)
# Cambridge University Press

# To run you need file
#   racd09data.dta
# and R packages
#   foreign, MASS and flexmix 

# This R program does part of analysis of patents data for chapter 4
# (The Stata program racd04.do does more)

#  4.8.8 EXAMPLE: PATENTS
#  Poisson, NB2 and finite mixture of Poisson with 2 and 3 components
 
# ********* DATA DESCRIPTION

# The original data are from Sanjiv Jaggia and Satish Thosar, 1993,
# "Multiple Bids as a Consequence of Target Management Resistance"
# Review of Quantitative Finance and Accounting, 447-457.
# The data are also used in 
# A.C. Cameron and Per Johansson (1997), 
# "Count Data Regression Models using Series Expansions: with Applications", 
# Journal of Applied Econometrics, May, Vol. 12, pp.203-223.

# For more details see these datasets and racd05makedata.dta

# ********* 4.8.8 READ DATA AND SUMMARIZE 

install.packages("foreign")
library(foreign)
data.ch4 <- read.dta(file = "racd09data.dta")
data.ch4 <- subset(data.ch4,data.ch4$YEAR==1)    # Selects 1979 only
# Allows variables in database to be accessed simply by giving names
attach(data.ch4)
# Lists first six observations
head(data.ch4)

# Tabulate counts of doctor visits
table(PAT)
table(PAT) / nrow(data.ch4)

# Create extra variable and include in the data frame
LOGRandD <- log(exp(LOGR)+exp(LOGR1)+exp(LOGR2)+exp(LOGR3)+exp(LOGR4)+exp(LOGR5))
data.ch4 <- cbind(data.ch4,LOGRandD)

# Variable list
names(data.ch4)

# Summary statistics
summary(data.ch4)
sapply(data.ch4,mean)
sapply(data.ch4,sd)

# Formula for the model estimated in this chapter - shortens the commands below
formula.ch4model <- as.formula(PAT ~ LOGRandD+LOGK+SCISECT)

# Poisson with default standard errors (variance is a multiple of the mean)
model.poiss <- glm(formula.ch4model, family=poisson()) 
summary(model.poiss)

# Negative binomial 2 MLE
install.packages("MASS")
library(MASS)
model.nb <- glm.nb(formula.ch4model) 
summary(model.nb)

# Finite mixtures of Poisson using flexmix - does not do negative binomial
# install.packages("flexmix")
library(flexmix)

# Two-component model
model.FMP2 <- flexmix(formula.ch4model, data=data.ch4, k=2, model=FLXMRglm(family="poisson"))
summary(model.FMP2)   # Gives only limited information - need to refit
model.FMP2.refit <- refit(model.FMP2) 
summary(model.FMP2.refit)

# Three-component model
model.FMP3 <- flexmix(formula.ch4model, data=data.ch4, k=2, model=FLXMRglm(family="poisson"))
summary(model.FMP3)   # Gives only limited information - need to refit
model.FMP3.refit <- refit(model.FMP3) 
summary(model.FMP3.refit)

# close log file
sink()
