* racd06p1.do  June 2013 for Stata version 12

capture log close
log using racd06p1.txt, text replace

********** OVERVIEW OF racd06p1.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* Chapter 6.3 only
*   6.3 NMES DOCTOR VISITS 

* To run you need file
*   racd06data1healthcare.dta
* and user-written Stata addon
*   fmm
* in your directory

********** SETUP **********

set more off
version 11.2
clear all
set mem 10m
* set linesize 82
set scheme s1mono  /* Graphics scheme */

********** DATA DESCRIPTION

* The data are extracted from the 1987-88 National Medical Expenditure Survey (NMES).
* The extract and analysis are in P. Deb and P.K. Trivedi (1997),
* Demand for Medical Care by the Elderly: A Finite Mixture Approach" 
* Journal of Applied Econometrics, 12, 313-326.

* See this article for more detailed discussion 
* Also see racd06makedata1healthcare.do for further details 

********** RESULTS HERE DIFFER IN SOME PLACES FROM THE BOOK

* This Stata program reanalyzes the data given in the published paper by
* Deb and Trivedi (1997). Their results used quite different code written 
* in a program other than Stata, and there is some difference in results.

* Tables 6.1 and 6.2 generated here are the same as Deb and Trivedi (1997).
* For Tables 6.3 - 6.5 generatedhere there are differences for some of the
* many models estimated. The book reports the tables in the published article.
* For Tables 6.6 - 6.8 the diferences are small and the book reports
* the Stata output given below.

* Using the results obtained below Table 6.4 in the book becomes

*   TABLE 6.4 RE-COMPUTED USING CODE WRITTEN IN STATA

*   NB1 or NB2     Model     k     lnL     AIC    BIC     T_GOF
*   -------------------------------------------------------------
*   NB1            NB        18  -12156   24348   24463    Not
*   NB1            NBH       36  -12126   24325   24554  computed
*   NB1            CFMNB-2   21  -12098   24238   24372    Not 
*   NB1            FMNB-2    37  -12092   24259   24495  computed
*   NB1            CFMNB-3   24  -12096   24239   24392    Not
*   NB1            FMNB-3    56  -12050   24210   24562  computed
*   -------------------------------------------------------------
*   NB2            NB        18  -12202   24440   24555    Not
*   NB2            NBH       36  -12108   24289   24519  computed
*   NB2            CFMNB-2   21  -12149   24340   24474    Not 
*   NB2            FMNB-2    37  -12139   24352   24589  computed
*   NB2            CFMNB-3   24  -12144   24336   24490    Not
*   NB2            FMNB-3    56  -12080   24272   24630  computed
*   -------------------------------------------------------------

********** 6.3.1 NMES DOCTOR VISITS DATA: READ DATA AND SUMMARIZE 

use racd06data1healthcare.dta, clear

*** TABLE 6.1: OFP Frequency distribution
generate OFPfreqs = OFP
replace OFPfreqs = 13 if OFPfreqs >= 13
tabulate OFPfreqs

*** TABLE 6.2: Variable descriptions and summary statistics
describe
summarize

tabulate OFP
* OFP histogram
* histogram OFP if OFP < 45, discrete fraction scale(1.2)

* Global for the regressors
global XLIST EXCLHLTH POORHLTH NUMCHRON ADLDIFF NOREAST MIDWEST WEST AGE ///
  BLACK MALE MARRIED SCHOOL FAMINC EMPLOYED PRIVINS MEDICAID

*********** 6.3.4 MODEL SELECTION AND COMPARISON (NB, Hurdle, FM) for Tables 6.3-6.5

* Poisson
poisson OFP $XLIST, vce(robust) 
estimates store POISSON

* NB1
nbreg OFP $XLIST, vce(robust) dispersion(constant) 
estimates store NB1
scalar llNB1 = e(ll)
scalar kNB1 = e(k)

* NB2
nbreg OFP $XLIST, vce(robust)
estimates store NB2
scalar llNB2 = e(ll)
scalar kNB2 = e(k)

****** HURDLE MODELS - FIRST COMPONENT logit or NB1 or NB2

generate DOFP = OFP > 0

* Hurdle first component: logit
logit DOFP $XLIST, vce(robust)
estimates store H1logit
scalar llH1logit = e(ll)
scalar kH1logit = e(k)
scalar nH = e(N)

* Hurdle first component: NB1
program lfNB1binary
  version 10.1
  args lnf theta1 a               // theta1=x'b, a=alpha, lnf=lnf(y)
  tempvar mu p0
  local y $ML_y1                  // Define y so program more readable
  generate double `mu'  = exp(`theta1')
  generate double `p0' = (1/(1+`a'))^(`mu'/`a')
  quietly replace `lnf' = ln(`p0') if $ML_y1 == 0
  quietly replace `lnf' = ln(1-`p0') if $ML_y1 == 1
end
ml model lf lfNB1binary (DOFP = $XLIST) (), vce(robust)
ml maximize, nolog
estimates store H1NB1
scalar llH1NB1 = e(ll)
scalar kH1NB1 = e(k)

* Hurdle first component: NB2
program lfNB2binary
  version 10.1
  args lnf theta1 a               // theta1=x'b, a=alpha, lnf=lnf(y)
  tempvar mu p0
  local y $ML_y1                  // Define y so program more readable
  generate double `mu'  = exp(`theta1')
  generate double `p0' = (1/(1+`a'*`mu'))^(1/`a')
  quietly replace `lnf' = ln(`p0') if $ML_y1 == 0
  quietly replace `lnf' = ln(1-`p0') if $ML_y1 == 1
end
ml model lf lfNB2binary (DOFP = $XLIST) (), vce(robust)
ml maximize, nolog
estimates store H1NB2
scalar llH1NB2 = e(ll)
scalar kH1NB2 = e(k)

****** HURDLE MODELS - SECOND COMPONENT NB1 or NB2

* Hurdle second component: NB1
ztnb OFP $XLIST if OFP>0, dispersion(constant)  vce(robust)
estimates store H2NB1
scalar llH2NB1 = e(ll)
scalar kH2NB1 = e(k)

* Hurdle second component: NB2
ztnb OFP $XLIST if OFP>0, dispersion(mean) vce(robust)
estimates store H2NB2
scalar llH2NB2 = e(ll)
scalar kH2NB2 = e(k)

* Combine two parts: NB1 and NB1
scalar llHNB1 = llH1NB1 + llH2NB1
scalar kHNB1 = kH1NB1 + kH2NB1
scalar AICHNB1 = -2*llHNB1 + 2*kHNB1
scalar BICHNB1 = -2*llHNB1 + ln(nH)*kHNB1

* Combine two parts: NB2 and NB2
scalar llHNB2 = llH1NB2 + llH2NB2
scalar kHNB2 = kH1NB2 + kH2NB2
scalar AICHNB2 = -2*llHNB2 + 2*kHNB2
scalar BICHNB2 = -2*llHNB2 + ln(nH)*kHNB2

* Combine two parts: logit and NB1
scalar llHNB1log = llH1logit + llH2NB1
scalar kHNB1log = kH1logit + kH2NB1
scalar AICHNB1log = -2*llHNB1log + 2*kHNB1log
scalar BICHNB1log = -2*llHNB1log + ln(nH)*kHNB1log

* Combine two parts: logit and NB2
scalar llHNB2log = llH1logit + llH2NB2
scalar kHNB2log = kH1logit + kH2NB2
scalar AICHNB2log = -2*llHNB2log + 2*kHNB2log
scalar BICHNB2log = -2*llHNB2log + ln(nH)*kHNB2log

****** FINITE MIXTURE MODELS - constrained and unconstrained

**** NB1 Finite Mixtures Models

* Finite mixtures NB1 - 2 components unconstrained
fmm OFP $XLIST, components(2) mixtureof(negbin1) vce(robust)
estimates store FM2NB1
estat ic
scalar llFM2NB1 = e(ll)
scalar kFM2NB1 = e(rank)

* Finite mixtures NB1 - 3 components unconstrained
* This has convergence problems - stop at exactly 20 iterations
fmm OFP $XLIST, components(3) mixtureof(negbin1) vce(robust) iter(20)
estimates store FM3NB1 
estat ic
scalar llFM3NB1 = e(ll)
scalar kFM3NB1 = e(rank)

* Constrained Finite mixtures NB1 - 2 components only intercept varies
local i 1
foreach var of varlist $XLIST {
  constraint `i' [component1]`var' = [component2]`var'
  local i = `i' + 1
  }
fmm OFP $XLIST, components(2) mixtureof(negbin1) constraints(1/`i')
estimates store CFM2NB1
estat ic
scalar llCFM2NB1 = e(ll)
* e(rank) gives number of parameters (e(k)) minus number of constraints
scalar kCFM2NB1 = e(rank)

* Constrained Finite mixtures NB1 - 3 components only intercept varies
* This needs to directly follow the constrained 2 components case
* Note the constraints 1=2 are remembered so only need to add constraints that 2=3
foreach var of varlist $XLIST {
  constraint `i' [component2]`var' = [component3]`var'
  local i = `i' + 1
  }
* This ultimately converges
fmm OFP $XLIST, components(3) mixtureof(negbin1) constraints(1/`i') 
estimates store CFM3NB1
estat ic
scalar llCFM3NB1 = e(ll)
* e(rank) gives number of parameters (e(k)) minus number of constraints
scalar kCFM3NB1 = e(rank)

**** NB2 Finite Mixtures Models

* Finite mixtures NB2 - 2 components unconstrained
fmm OFP $XLIST, components(2) mixtureof(negbin2) vce(robust)
estimates store FM2NB2
estat ic
scalar llFM2NB2 = e(ll)
scalar kFM2NB2 = e(k)

* Finite mixtures NB2 - 3 components unconstrained
* This convergence after about 35 iterations
fmm OFP $XLIST, components(3) mixtureof(negbin2) vce(robust) iter(30)
estimates store FM3NB2 
estat ic
scalar llFM3NB2 = e(ll)
scalar kFM3NB2 = e(k)

* Constrained Finite mixtures NB2 - 2 components only intercept varies
local i 1
foreach var of varlist $XLIST {
  constraint `i' [component1]`var' = [component2]`var'
  local i = `i' + 1
  }
fmm OFP $XLIST, components(2) mixtureof(negbin2) constraints(1/`i')
estimates store CFM2NB2
estat ic
scalar llCFM2NB2 = e(ll)
* e(rank) gives number of parameters (e(k)) minus number of constraints
scalar kCFM2NB2 = e(rank)

* Constrained Finite mixtures NB2 - 3 components only intercept varies
* This needs to directly follow the constrained 2 components case
* Note the constraints 1=2 are remembered so only need to add constraints that 2=3
foreach var of varlist $XLIST {
  constraint `i' [component2]`var' = [component3]`var'
  local i = `i' + 1
  }
fmm OFP $XLIST, components(3) mixtureof(negbin2) constraints(1/`i')
estimates store CFM3NB2
estat ic
scalar llCFM3NB2 = e(ll)
* e(rank) gives number of parameters (e(k)) minus number of constraints
scalar kCFM3NB2 = e(rank)

*** TABLE 6.3: OFP VISITS LIKELIHOOD RATIO TESTS 

*** This differs from book as Trivedi and Deb (1977) used different program

* NB1 likelihood ratio tests
dis "NB1 vs NB1H          : " 2*(llHNB1-llNB1) "  DofF = " kHNB1log-kNB1
dis "NB1 vs CFM2 NB1      : " 2*(llCFM2NB1-llNB1) "  DofF = " kCFM2NB1-kNB1
dis "NB1 vs FM2 NB1       : " 2*(llFM2NB1-llNB1) "  DofF = " kFM2NB1-kNB1
dis "CFM2 NB1 vs FM2 NB1  : " 2*(llFM2NB1-llCFM2NB1) "  DofF = " kFM2NB1-kCFM2NB1
dis "CFM2 NB1 vs CFM3 NB1 : " 2*(llCFM3NB1-llCFM2NB1) "  DofF = " kCFM3NB1-kCFM2NB1
dis "FM3 NB1 vs FM2 NB1   : " 2*(llFM3NB1-llFM2NB1) "  DofF = " kFM3NB1-kFM2NB1

* NB2 likelihood ratio tests
dis "NB2 vs NB2H          : " 2*(llHNB2-llNB2) "  DofF = " kHNB2log-kNB2
dis "NB2 vs CFM2 NB2      : " 2*(llCFM2NB2-llNB2) "  DofF = " kCFM2NB2-kNB2
dis "NB2 vs FM2 NB2       : " 2*(llFM2NB2-llNB2) "  DofF = " kFM2NB2-kNB2
dis "CFM2 NB2 vs FM2 NB2  : " 2*(llFM2NB2-llCFM2NB2) "  DofF = " kFM2NB2-kCFM2NB2
dis "CFM2 NB2 vs CFM3 NB2 : " 2*(llCFM3NB2-llCFM2NB2) "  DofF = " kCFM3NB2-kCFM2NB2
dis "FM3 NB2 vs FM2 NB2   : " 2*(llFM3NB2-llFM2NB2) "  DofF = " kFM3NB2-kFM2NB2

*** TABLE 6.4: OFP VISITS LL and AIC for various models

*** This differs from book as Trivedi and Deb (1977) used different program
*** Also we do not calculate GoF tests here

** TABLE 6.4: NB1 and NB2 Hurdle models - second row of table

display "Hurdle: NB1 / NB1    : k = " kHNB1 " and ll = " llHNB1 " and AIC = " AICHNB1 " and BIC = " BICHNB1 
display "Hurdle: NB2 / NB2    : k = " kHNB2 " and ll = " llHNB2 " and AIC = " AICHNB2 " and BIC = " BICHNB2

* ASIDE: Instead use logit at first stage
display "Hurdle: Logit / NB1  : k = " kHNB1log " and ll = " llHNB1log " and AIC = " AICHNB1log " and BIC = " BICHNB1log
display "Hurdle: Logit / NB2  : k = " kHNB2log " and ll = " llHNB2log " and AIC = " AICHNB2log " and BIC = " BICHNB2log

** TABLE 6.4: Remaining NB1 models

estimates table POISSON NB1 CFM2NB1 FM2NB1 CFM3NB1 FM3NB1, keep(EXCLHLTH) ///
   b(%10.3f) t(%10.2f) stats(l ll aic bic N) equations(1) 

** TABLE 6.4: Remaining NB2 models

estimates table POISSON NB2 CFM2NB2 FM2NB2 CFM3NB2 FM3NB2, keep(EXCLHLTH) ///
   b(%10.3f) t(%10.2f) stats(l ll aic bic N) equations(1) 

** Focus on probabilities and overdispersion parameters in FM
estimates table CFM3NB1 FM3NB1 CFM3NB2 FM3NB2, keep (imlogitpi1:_cons imlogitpi2:_cons lnalpha1:_cons lnalpha2:_cons lnalpha1:_cons)

*********** 6.3.8 HURDLE MODEL (Table 6.8)

*** TABLE 6.8: NB2 HURDLE ESTIMATES WITH LOGIT FIRST STAGE

* Could use earlier results
* Simpler to use user-written addon command hnblogit
hnblogit OFP $XLIST, vce(robust)
scalar llhnblogit = e(ll)
estat ic

*********** 6.3.5-6.3.7  DETAILED ANALYSIS OF UNCONSTRAINED FNM NB1 model with 2 components

preserve 

* TABLES 6.5, 6.6 and 6.7 recode the largest value of OFP from 89 to 70

replace OFP = 70 if OFP > 70

* See how NB1 loglikelihood changes as mentioned in text
quietly nbreg OFP $XLIST, vce(robust) dispersion(constant)
display "Fitted log-likelihood for NB1 = " e(ll) 

* Estimate FMM model model and save results
fmm OFP $XLIST, components(2) mixtureof(negbin1) vce(robust)
estimates store FMMalt70
estat ic

* Create fitted means and variances in the two components
matrix theta = e(b)
scalar delta1 = exp(theta[1,e(k)-1])
scalar delta2 = exp(theta[1,e(k)])
predict mu1, mean equation(component1) 
predict mu2, mean equation(component2)
generate mu1alt = mu1
generate mu2alt = mu2
generate var1 = mu1*(1+delta1)
generate var2 = mu2*(1+delta2)
predict p1, prior equation(component1) 
predict posterior1, posterior equation(component1) 
generate mufm = p1*mu1 + (1-p1)*mu2
* The following differs from Table 6.5 - need to check
generate varfm = p1*(mu1^2 + var1) + (1-p1)*(mu2^2 + var2) - mufm^2

* Create predicted probabilities for preferred 2-component NB1 finite mixture model
* For 2-component NB2 estimate 2 component NB2 model and then 
* change  generate ainvmu = mu1/delta1  to  ainvmu = 1/delta1
* change  replace ainvmu = mu2/delta2   to  ainvmu = 1/delta2

global MAXCOUNT 20
* First component
generate mu = mu1
generate ainvmu = mu1/delta1    // For NB1: (1/a)*mu   For NB2: use (1/a)
generate pfit1sum = 0
forvalues i = 0/$MAXCOUNT {
   generate pfit1`i' = lngamma(`i'+ainvmu) - lngamma(ainvmu) - lnfactorial(`i') + ainvmu*ln(ainvmu/(ainvmu+mu)) + `i'*ln(mu/(ainvmu+mu))
   quietly replace pfit1`i' = exp(pfit1`i')
   quietly replace pfit1sum = pfit1sum + pfit1`i'
   }
* Second component
replace mu = mu2
replace ainvmu = mu2/delta2
generate pfit2sum = 0
forvalues i = 0/$MAXCOUNT {
   generate pfit2`i' = lngamma(`i'+ainvmu) - lngamma(ainvmu) - lnfactorial(`i') + ainvmu*ln(ainvmu/(ainvmu+mu)) + `i'*ln(mu/(ainvmu+mu))
   quietly replace pfit2`i' = exp(pfit2`i')
   quietly replace pfit2sum = pfit2sum + pfit2`i'
   }
* Combined
generate pfitsum = 0
forvalues i = 0/$MAXCOUNT {
   generate pfitfm`i' = p1*pfit1`i' + (1-p1)*pfit2`i'
   }
generate pfitfmsum = p1*pfit1sum + (1-p1)*pfit2sum
generate pfitfmge10 = 1
forvalues i = 0/9 {
    replace pfitfmge10 = pfitfmge10 - pfitfm`i'
   }

*** TABLE 6.5: FITTED PROBABILITIES FROM THE TWO COMPONENT MODEL

display "Fitted probabilities from 2 component NB1 finite mixture model" 
summarize pfitfm*

*** TABLE 6.6: FMM MODEL MEANS and VARIANCE in the TWO COMPONENTS

* Summary statistics
summarize mu1 mu2 var1 var2 mufm varfm p1 posterior1

*** FIGURE 6.3: PREDICTED MEANS IN THE TWO COMPONENTS 

* Histogram of means
quietly histogram mu1, width(2) name(_comp_1, replace) scale(1.5)
quietly histogram mu2, width(2) name(_comp_2, replace) scale(1.5)
graph combine _comp_1 _comp_2, iscale(0.7) ysize(3) xsize(6) xcommon
quietly graph export racd06fig3.wmf, replace
quietly graph export racd06fig3.eps, replace

*** TABLE 6.7: FMM MODEL ESTIMATES

* Tabulate results for earlier estimated model and save results
estimates table FMMalt70, b(%10.3f) t(%10.2f) stats(l ll aic bic N) equations(1)

****** DIRECTIONAL GRADIENTS

restore 

* NB1 model
nbreg OFP $XLIST, dispersion(constant)
predict exb
scalar delta = exp(_b[/lndelta])
generate psi = exb / delta
scalar phi = ln(1+delta)

forvalues y=0/12 {
generate f`y'_nb1 = exp(lngamma(`y'+psi) - lngamma(`y'+1) ///
	           - lngamma(psi) + _b[/lndelta]*`y' - (`y'+psi)*phi)
}
drop exb psi

* FM model
fmm OFP $XLIST, mix(negbin1) components(2)
predict exb1, eq(component1)
predict exb2, eq(component2)
sum exb1 exb2
scalar delta1 = exp(_b[/lndelta1])
scalar delta2 = exp(_b[/lndelta2])
generate psi1 = exb1 / delta1
generate psi2 = exb2 / delta2
scalar phi1 = ln(1+delta1)
scalar phi2 = ln(1+delta2)
forvalues y=0/12 {
generate f`y'_fmnb1 = e(pi1_est) * (exp(lngamma(`y'+psi1) - lngamma(`y'+1) ///
       - lngamma(psi1) + _b[/lndelta1]*`y' - (`y'+psi1)*phi1)) ///
       + e(pi2_est) * (exp(lngamma(`y'+psi2) - lngamma(`y'+1)  ///
       - lngamma(psi2) + _b[/lndelta2]*`y' - (`y'+psi2)*phi2))
}
drop exb1 exb2 psi1 psi2

* Compute directional gradients
forvalues y = 0/12 {
	gen d`y' = f`y'_nb1 / f`y'_fmnb1 - 1
}
preserve
collapse (mean) d0-d12
gen i = _n
reshape long d, i(i) j(y)

display "Directional gradients from 2 component NB1 finite mixture model"
list d y

*** FIGURE 6.2: OFP VISITS: DIRECTIONAL GRADIENTS

twoway line d y, xlabel(0(1)12) yline(0)

restore

******* SENSITIVITY TO OUTLIERS (end 6.3.6)

* Same model with original sample
fmm OFP $XLIST, components(2) mixtureof(negbin1) vce(robust)
estimates store FMMorig 
predict mu1, eq(component1)
predict mu2, eq(component2)

* Altered and then original
* sum mu1alt mu2alt mu1 mu2 
estimates table FMMalt70 FMMorig, b(%10.3f) t(%10.2f) stats(ll aic bic N k)

*** Also compare regular NB1 across the two samples (mentioned in text) and FMN-NB2

* Original sample - FMM-NB2 and regular NB1 
quietly fmm OFP $XLIST, components(2) mixtureof(negbin2) vce(robust)
estat ic
quietly nbreg OFP $XLIST, dispersion(constant) vce(robust) 
estat ic

* Altered sample - FMM-NB2 and regular NB1 dispersion(constant)
preserve 
replace OFP = 70 if OFP > 70
quietly fmm OFP $XLIST, components(2) mixtureof(negbin2) vce(robust)
estat ic
quietly nbreg OFP $XLIST, dispersion(constant) vce(robust) 
estat ic
restore

********** CLOSE OUTPUT

* log close
* clear
* exit