* racd06p4.do  June 2013 for Stata version 12

capture log close
log using racd06p4.txt, text replace

********** OVERVIEW OF racd06p4.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* Chapter 6.6 only
*   6.6 MODEL SELECTION CRITERIA: A DIGRESSION

* To run you need no dataset as simulated data
* and user-written Stata addon
*   fmm
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1mono  /* Graphics scheme */

********** DATA DESCRIPTION

* Generated data

********** RESULTS HERE DIFFER FROM THE BOOK

* The book presents results summarized in section 4 of Deb and Trivedi (1997). 
* The more detailed results given in Table 6.18 (2nd ed.) and Table 6.15 (1st ed.)
* were not given in the Deb and Trivedi article.
* Furthermore we do not have the code that produced these tables.

* Instead, here we simply generate data from similar dgp's. 
* And we do a small amount of the analysis in Table 6.18 (part of column 1)

********** 6.6 MODEL SELECTION CRITERIA: A DIGRESSION

* Three data generating processes
*   (1)  Poisson         mu = exp(-1.445 + 3.0*x)
*   (2)  Poisson Hurdle  Zeros      mu = exp(-1.6 + 3.0*x)
*                        Positives  mu = exp(-1.35 + 3.0*x)
*   (3)  FM Poisson      Component1 mu = exp(-1.225 + 3.0*x)
*                        Component1 mu = exp(-1.5 + 0.75x*x)


**** DATA GENERATING PROCESSES

clear
set obs 100000
set seed 10101
generate x = runiform()

* Poisson
* generate yP = rpoisson(exp(-1.6 + 3.0*x))
generate yP = rpoisson(exp(-1.445 + 3.0*x))

* Hurdle Poisson - not so easy to draw from
generate yH = 0
generate muH2 = exp(-1.35 + 3.0*x)
scalar miny = 0
* The following draws from truncated at zero Poisson
while miny == 0 {
 generate yph = rpoisson(muH2) 
 quietly replace yH = yph if yH==0 
 drop yph
 quietly summarize yH
 scalar miny = r(min)
}
* Now bring in the hurdle
replace yH = 0 if runiform() < (1 - (1-exp(-exp(-1.6 + 3.0*x))))

* Finite mixture
generate yFM = rpoisson(exp(-1.225 + 3.0*x))
replace yFM = rpoisson(exp(-1.5 + .75*x)) if runiform() > .75
generate yP0 = yP==0
generate yH0 = yH==0
generate yFM0 = yFM==0

* Calculate the fraction of positives
generate dyP = yP > 0
generate dyH = yH > 0
generate dyFM = yFM > 0

summarize

*** Check the dgps

* Poisson: dgp was rpoisson(exp(-1.445 + 3.0*x))
poisson yP x

* Finite mixture: dgp was pi = .75 mixture of 
*  rpoisson(exp(-1.225 + 3.0*x)) & rpoisson(exp(-1.5 + .75*x)) 
fmm yFM x, components(2) mixtureof(poisson)
scalar AICFM = -2*e(ll) + 2*e(k)
scalar BICFM = -2*e(ll) + ln(e(N))*e(k)
display "BIC Finite Mixture = " BICFM

* Hurdle: 
* Hurdle first component: Poisson for the zeroes
program lfpoissonbinary
  version 10.1
  args lnf theta1                 
  tempvar mu p0
  local y $ML_y1                  
  generate double `mu'  = exp(`theta1')
  generate double `p0' = exp(-`mu')
  quietly replace `lnf' = ln(`p0') if $ML_y1 == 0
  quietly replace `lnf' = ln(1-`p0') if $ML_y1 == 1
end
ml model lf lfpoissonbinary (dyH = x)
ml maximize, nolog
estimates store H1p
scalar llH1p = e(ll)
scalar kH1p = e(k)
scalar nH = e(N)
* Hurdle second component: Poisson for the positives 
ztp yH x if yH>0
estimates store H2p
scalar llH2p = e(ll)
scalar kH2p = e(k)
* Combine two parts 
scalar llHp = llH1p + llH2p
scalar kHp = kH1p + kH2p
scalar AICHp = -2*llHp + 2*kHp
scalar BICHp = -2*llHp + ln(nH)*kHp

* Display hurdle results
estimates table H1p H2p, eq(1) b se
display "BIC Hurdle Poisson = " BICHp


**** SIMULATIONS

* This does part of the first column of Table 6.18
* The data generating process is Poisson
* Just Poisson and Poisson FM models are estimated
* The models are compared using LR test, AIC and BIC

* To speed up program there are just 50 simulations
* numsims shoudl be increased

clear
set seed 10101
global numsims 50
global numobs 500
program simbypost
    version 10.1 
    tempname simfile
    postfile `simfile' bpcons bpx bfmp1cons bfmp1x bfmp2cons bfmp2x pi lrtest lrreject AICplow BICplow using simresults, replace
    quietly {
       forvalues i = 1/$numsims { 
         display "simulation: " `i'
         drop _all
         set obs $numobs
         generate x = runiform()                 
         generate y = rpoisson(exp(-1.445 + 3.0*x))
         poisson y x
         scalar bpx =_b[x]
         scalar bpcons = _b[_cons]
         scalar llp = e(ll)
         scalar AICp = -2*e(ll) + 2*e(k)
         scalar BICp = -2*e(ll) + ln(e(n))*e(k)
         fmm y x, components(2) mixtureof(poisson) iter(20)
         scalar llfmm = e(ll)
         scalar AICFM = -2*e(ll) + 2*e(k)
         scalar BICFM = -2*e(ll) + ln(e(n))*e(k)
         scalar converge = e(converge)         
         matrix bfmp = e(b)
         scalar bfmp1x = exp(bfmp[1,1])
         scalar bfmp1cons = exp(bfmp[1,2])
         scalar bfmp2x = exp(bfmp[1,3])
         scalar bfmp2cons = exp(bfmp[1,4])
         scalar pi = exp(bfmp[1,5])/(1+exp(bfmp[1,5]))
         scalar lrtest = 2*(llfmm - llp)
         scalar lrreject = lrtest > invchi2(1,.95)
         scalar AICplow = AICp < AICFM
         scalar BICplow = BICp < BICFM
         post `simfile' (bpcons) (bpx) (bfmp1cons) (bfmp1x) (bfmp2cons) (bfmp2x) (pi) (lrtest) (lrreject) (AICplow) (AICplow)
      }
    }
    postclose `simfile'
end
simbypost
use simresults, clear
summarize
mean bpcons bpx bfmp1cons bfmp1x bfmp2cons bfmp2x pi lrtest lrreject AICplow BICplow, noheader

summarize lrtest, detail
centile lrtest, centile(10 20 30 40 50 60 70 80 90 95)
display "chisquare 1 degree at 80% " invchi2(1,.80) " and 90% " invchi2(1,.90) " and 95% " invchi2(1,.95)
display "chisquare 1 degree at 80% " invchi2(2,.80) " and 90% " invchi2(2,.90) " and 95% " invchi2(2,.95)

kdensity lrtest
quietly sum lrtest
generate evalpoint = r(max)*_n/_N
generate chi1dens = (1/sqrt(2))*exp(-lngamma(1/2))*(evalpoint^(-1/2))*exp(-evalpoint/2) 
generate chi2dens = (1/2)*1*1*exp(-evalpoint)
graph twoway kdensity lrtest || line chi1dens evalpoint ||  line chi2dens evalpoint

********** CLOSE OUTPUT

* log close
* clear
* exit

