# racd06p1.R  March 2012 for R version 2.15.0

rm(list=ls())

# Create log file
sink("racd06p1.Rout")

# ********** OVERVIEW OF racd06p1.R **********

# R Program 
# copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi 
# used for "Regression Analyis of Count Data" SECOND EDITION
# by A. Colin Cameron and Pravin K. Trivedi (2012)
# Cambridge University Press

# To run you need files
#   racd06data1healthcare.dta
# and R packages
#   foreign, MASS, gamlss, pscl, flexmix

# This R program gives some ofthe analysis for chapter 6 Doctor Visits example
# The Stata program racd06p2.do does more.

#  6.3 NMES DOCTOR VISITS
 
# ********* DATA DESCRIPTION

# The data are extracted from the 1987-88 National Medical Expenditure Survey (NMES).
# The extract and analysis are in P. Deb and P.K. Trivedi (1997),
# Demand for Medical Care by the Elderly: A Finite Mixture Approach" 
# Journal of Applied Econometrics, 12, 313-326.

# See this article for more detailed discussion 
# Also see racd06makedata1healthcare.do for further details

# ********* 6.3 NMES DOCTOR VISITS: READ DATA AND SUMMARIZE 

# Read in and select data
# install.packages("foreign")
library(foreign)
data.ch06p1 <- read.dta(file = "racd06data1healthcare.dta")

# Allows variables in database to be accessed simply by giving names
attach(data.ch06p1)
# Lists first six observations
head(data.ch06p1)

# Tabulate counts of doctor visits
table(OFP)
table(OFP) / nrow(data.ch06p1)

# Variable list
names(data.ch06p1)

# Summary statistics
summary(data.ch06p1)
sapply(data.ch06p1,mean)
sapply(data.ch06p1,sd)

# Formula for the model estimated in this chapter - shortens the commands below
formula.ch06p1model <- as.formula(OFP ~ EXCLHLTH+POORHLTH+NUMCHRON+ADLDIFF
 +NOREAST+MIDWEST+WEST+AGE+BLACK+MALE+MARRIED+SCHOOL+FAMINC+EMPLOYED+PRIVINS+MEDICAID)

# Poisson with default standard errors (variance is a multiple of the mean)
model.poiss <- glm(formula.ch06p1model, family=poisson(), data=data.ch06p1) 
summary(model.poiss)

# Negative binomial 2 MLE using MASS
# install.packages("MASS")
library(MASS)
model.nb <- glm.nb(formula.ch06p1model, data=data.ch06p1) 
summary(model.nb)
yhat.nb <- fitted(model.nb)

# NB1 and NB2 using gamlss
# This also allows variance parameter to depend on regressors
# install.packages("gamlss")
library(gamlss)
# Note: What we call NB2 this package calls NBI
model.nb2.gamlss <- gamlss(formula.ch06p1model,family=NBI, data=data.ch06p1, method=mixed(1,20))
summary(model.nb2.gamlss)
# Note: What we call NB1 this package calls NBII
model.nb1.gamlss <- gamlss(formula.ch06p1model,family=NBII, data=data.ch06p1, method=mixed(1,20))
summary(model.nb1.gamlss)

# Zero-inflated Poisson
# install.packages("pscl")
library(pscl)
# Inflation with same regressors as model
model.zip.poiss <- zeroinfl(formula.ch06p1model, data=data.ch06p1, dist="poisson")
summary(model.zip.poiss)
logl.zip.poiss <- logLik(model.zip.poiss)

# Zero-inflated Negative binomial 2
# Inflation with same regressors as model
model.zip.nb2 <- zeroinfl(formula.ch06p1model, data=data.ch06p1, dist="negbin")
summary(model.zip.nb2)
logl.zip.nb2 <- logLik(model.zip.nb2)

# Hurdle - NB2 and logit for hurdle
model.hurdle.nb2logit <- hurdle(formula.ch06p1model, data=data.ch06p1, dist="negbin")
summary(model.hurdle.nb2logit)
logl.hurdle.nb2logit <- logLik(model.hurdle.nb2logit)

# Hurdle - NB2 and NB2 for hurdle
model.hurdle.nb2nb2 <- hurdle(formula.ch06p1model, data=data.ch06p1, dist="negbin", zero.dist="negbin")
summary(model.hurdle.nb2nb2)
logl.hurdle.nb2nb2 <- logLik(model.hurdle.nb2nb2)

# Compare log-likelihoods
logls <- cbind(logl.zip.poiss,logl.zip.nb2,logl.hurdle.nb2logit,logl.hurdle.nb2nb2)
logls

# Finite mixtures of Poisson using flexmix (does not do negative binomial)
# install.packages("flexmix")
library(flexmix)

# Two-component model
model.FMP2 <- flexmix(formula.ch06p1model, data=data.ch06p1, k=2, model=FLXMRglm(family="poisson"))
summary(model.FMP2)   # Gives only limited information - need to refit
model.FMP2.refit <- refit(model.FMP2) 
summary(model.FMP2.refit)

# Three-component model
model.FMP3 <- flexmix(formula.ch06p1model, data=data.ch06p1, k=3, model=FLXMRglm(family="poisson"))
summary(model.FMP3)   # Gives only limited information - need to refit
model.FMP3.refit <- refit(model.FMP3) 
summary(model.FMP3.refit)

# Predicted probabilities using pscl
# library(pscl)
p.poiss <- predprob(model.poiss)
p.poissave <- colMeans(p.poiss)
p.nb <- predprob(model.nb)
p.nbave <- colMeans(p.nb)
relfreqs <- table(NUMBIDS) / length(NUMBIDS)
predictedprobs <- cbind(p.poissave, p.nbave)
relfreqs
sum(relfreqs)
predictedprobs
colSums(predictedprobs)

# Something is wrong here with size of matrices
nrow(p.poiss)
ncol(p.poiss)

# close log file
sink()

