# racd06p2.R  March 2012 for R version 2.15.0

rm(list=ls())

# Create log file
sink("racd06p2.Rout")

# ********** OVERVIEW OF racd06p2.R **********

# R Program 
# copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi 
# used for "Regression Analyis of Count Data" SECOND EDITION
# by A. Colin Cameron and Pravin K. Trivedi (2012)
# Cambridge University Press

# To run you need files
#   racd06data1healthcare.dta
# and R packages
#   foreign, MASS, gamlss, pscl, flexmix

# This R program gives some ofthe analysis for chapter 6 Recreational Trips example
# The Stata program racd06p2.do does more.

#  6.4 RECREATIONAL TRIPS
 
# ********* DATA DESCRIPTION

# A detailed discussion of the variables can be found in 
# C. Sellar, J.R. Stoll and J.P. Chavas (1985), 
# "Validation of Empirical Measures of Welfare Change: A Comparison of nonmarket 
# Techniques," Land Economics, 61, 156-175.  
# Data used with permission of Sellar et al. (1985)
# And also T. Ozuna and I. Gomaz (1995) 
# "Specification and Testing of Count Data Recreation Demand Functions," 
# Empirical Economics, 20, 543-550.

# See these articles for more detailed discussion 
# Also see racd06makedata2rectrips.dta.do for further details 

# ********* 6.3 NMES DOCTOR VISITS: READ DATA AND SUMMARIZE 

# Read in and select data
# install.packages("foreign")
library(foreign)
data.ch06p2 <- read.dta(file = "racd06data2rectrips.dta")

# Allows variables in database to be accessed simply by giving names
attach(data.ch06p2)
# Lists first six observations
head(data.ch06p2)

### TABLE 6.9 FREQUENCIES COUNT COUNT

# Tabulate counts of trips
table(TRIPS)
table(TRIPS) / nrow(data.ch06p2)

# Variable list
names(data.ch06p2)

### TABLE 6.10 SUMMARY STATISTICS

# Summary statistics
summary(data.ch06p2)
sapply(data.ch06p2,mean)
sapply(data.ch06p2,sd)

### TABLE 6.11 POISSON AND NEGATIVE BINOMIAL ESTIMATES

# Formula for the model estimated in this chapter - shortens the commands below
formula.ch06p2model <- as.formula(TRIPS ~ SO+SKI+I+FC3+C1+C3+C4)

# Poisson with default standard errors (variance equals the mean)
model.poiss <- glm(formula.ch06p2model, family=poisson(), data=data.ch06p2) 
summary(model.poiss)

# Poisson with robust standard errors (variance is a multiple of the mean)
model.poiss <- glm(formula.ch06p2model, family=poisson(), data=data.ch06p2) 
summary(model.poiss)

# Negative binomial 2 MLE using MASS
# install.packages("MASS")
library(MASS)
model.nb <- glm.nb(formula.ch06p2model, data=data.ch06p2) 
summary(model.nb)
yhat.nb <- fitted(model.nb)

# The next is not reported but includes NB1
# NB1 and NB2 using gamlss
# This also allows variance parameter to depend on regressors
# install.packages("gamlss")
library(gamlss)
# Note: What we call NB2 this package calls NBI
model.nb2.gamlss <- gamlss(formula.ch06p2model,family=NBI, data=data.ch06p2, method=mixed(1,20))
summary(model.nb2.gamlss)
# Note: What we call NB1 this package calls NBII
model.nb1.gamlss <- gamlss(formula.ch06p2model,family=NBII, data=data.ch06p2, method=mixed(1,20))
summary(model.nb1.gamlss)

### TABLE 6.12 FINITE MIXTURES POISSON 

# Finite mixtures of Poisson using flexmix (does not do negative binomial)
# install.packages("flexmix")
library(flexmix)

# Two-component model
model.FMP2 <- flexmix(formula.ch06p2model, data=data.ch06p2, k=2, model=FLXMRglm(family="poisson"))
summary(model.FMP2)   # Gives only limited information - need to refit
model.FMP2.refit <- refit(model.FMP2) 
# The fitted model differs from the book wich used Stata add-on fmm
# Here lnL = -947 is less than lnL = -917 
# The order is revered - here component 1 is low users and component 2 is high users
# While different from book the coefficients are qualitatively similar to book
summary(model.FMP2.refit)

# Three-component model does a lot better
model.FMP3 <- flexmix(formula.ch06p2model, data=data.ch06p2, k=3, model=FLXMRglm(family="poisson"))
summary(model.FMP3)   # Gives only limited information - need to refit
model.FMP3.refit <- refit(model.FMP3) 
summary(model.FMP3.refit)

### TABLE 6.13 NB2 HURDLE and ZERO-INFLATED NB2

# Hurdle Poisson (not reported)
# install.packages("pscl")
library(pscl)
# Inflation with same regressors as model and logit hurdle
model.hurdle.poiss <- zeroinfl(formula.ch06p2model, data=data.ch06p2, dist="poisson")
summary(model.hurdle.poiss)
logl.hurdle.poiss <- logLik(model.zip.poiss)

# Zero-inflated Poisson (not reported)
# install.packages("pscl")
library(pscl)
# Inflation with same regressors as model
model.zip.poiss <- zeroinfl(formula.ch06p2model, data=data.ch06p2, dist="poisson")
summary(model.zip.poiss)
logl.zip.poiss <- logLik(model.zip.poiss)

# Hurdle - NB2 with logit for hurdle
# Hurdle with (almost) same regressors as model

# Drop FC3 as regressor in hurdle part of model (explained in text)
# Gives same truncated NB2 results but different hurdle part to book
# And diffreent log-likelihood
model.hurdle.nb2logit <- hurdle(TRIPS ~ SO+SKI+FC3+I+C1+C3+C4 | 
   SO+SKI+I+C1+C3+C4, data=data.ch06p2, dist="negbin")
summary(model.hurdle.nb2logit)
logl.hurdle.nb2logit <- logLik(model.hurdle.nb2logit)

# Hurdle - NB2 with NB2 for hurdle
# Did not work
# model.hurdle.nb2logit <- hurdle(TRIPS ~ SO+SKI+FC3+I+C1+C3+C4 | 
#    SO+SKI+I+C1+C3+C4, data=data.ch06p2, dist="negbin", zero.dist="negbin")
# summary(model.hurdle.nb2nb2)
# logl.hurdle.nb2nb2 <- logLik(model.hurdle.nb2nb2)

# Zero-inflated Negative binomial 2
# Inflation with (almost) same regressors as model

# The original model did not run
# model.zip.nb2 <- zeroinfl(formula.ch06p2model, data=data.ch06p2, dist="negbin")
# yields
#  Error in solve.default(as.matrix(fit$hessian)) : 
#    Lapack routine dgesv: system is exactly singular

# Instead drop FC3 as a regressor in the zero-inflation part
# Gives chapter 12 results for positives and overall logL
formula.ch06p2model.noFC3 <- as.formula(TRIPS ~ SO+SKI+I+C1+C3+C4)
model.zip.nb2.noFC3 <- zeroinfl(TRIPS ~ SO+SKI+I+FC3+C1+C3+C4 | 
   SO+SKI+I+C1+C3+C4, data=data.ch06p2, dist="negbin")
summary(model.zip.nb2.noFC3)
logl.zip.nb2 <- logLik(model.zip.nb2.noFC3)

# Compare log-likelihoods
logls <- cbind(logl.hurdle.poiss,logl.zip.poiss,logl.hurdle.nb2logit,logl.zip.nb2)
logls

### TABLE 6.14  NOT DONE

# close log file
sink()

