* racd03.do  January 2013 for Stata version 12

capture log close
log using racd03.txt, text replace

********** OVERVIEW OF racd03.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* Chapter 3
*   3.2 POISSON REGRESSION WITH VARIOUS STANDARD ERRORS
*   3.3 NEGATIVE BINOMIAL WITH VARIOUS STANDARD ERRORS
*   3.4 OVERDISPERSION TESTS
*   3.5 MARGINAL EFFECTS AFTER POISSON
*   3.7 OTHER MODELS
* The bootstraps are commented out to speed up execution time.

* To run you need file
*   racd03data.dta
* The included output also inlcued output from user-written Stata addon countfit

********** SETUP **********

set more off
version 12
clear all
* set linesize 82
set scheme s1mono  /* Graphics scheme */
 
********** DATA DESCRIPTION

* The data set racd3data.dta is the same data as originally used in
* (1) A.C. Cameron and P.K. Trivedi (1986), "Econometric Models Based on
* Count Data: Comparisons and Applications of  Some Estimators and Tests",
* Journal of Applied Econometrics, Vol. 1, pp. 29-54.
* and in other papers.

* This data is not a representative sample of Australians as it oversamples
* young and old. In particular, use of health services may be overstated.
* This is because while the original sample of 40,650 individuals
* from the 1977-78 Australian Health Survey is representative,
* the sample used here is restricted to single people over 18 years of age.
 
* See the R.E.Stud. (1988, pp.85-106) section 3 for more detailed
* discussion of the data than that given in the RACD book.
* Also see racd03makedata.do for further details 

********** 3.2 READ DATA AND SUMMARIZE 

use racd03data.dta, clear

*** TABLE 3.1: FREQUENCIES

* Tabulate counts of doctor visits
tabulate DVISITS

*** TABLE 3.2: VARIABLE DEFINITIONS AND SUMMARY STATISTICS

* Variable descriptions and summary statistics
describe
summarize

* Global for the regressors
global XLIST SEX AGE AGESQ INCOME LEVYPLUS FREEPOOR FREEREPA ILLNESS ///
   ACTDAYS HSCORE CHCOND1 CHCOND2

********** 3.2 POISSON REGRESSION WITH VARIOUS STANDARD ERRORS

*** POISSON MLE and QMLE

* Poisson Robust standard errors
poisson DVISITS $XLIST, vce(robust) 
estimates store PRobust

* The following GLM command gives the same
glm DVISITS $XLIST, family(poisson) link(log) vce(robust) 

* Poisson Bootstrap is asymptotically equivalent
* Comment out to save time
* poisson DVISITS $XLIST, vce(boot, reps(400) seed(10101)) 
* estimates store PBoot

* Poisson Default ml standard errors (Same as vce(oim))
poisson DVISITS $XLIST
estimates store PMLHess

* The following GLM command gives the same
glm DVISITS $XLIST, family(poisson) link(log)

* Poisson OPG standard errors
poisson DVISITS $XLIST, vce(opg) 
estimates store PMLOPG

* The following GLM command gives the same
glm DVISITS $XLIST, family(poisson) link(log) vce(opg) 

* Poisson NB1 standard errors
glm DVISITS $XLIST, family(poisson) link(log) scale(x2)
estimates store PNB1

* Poisson NB2 standard errors
quietly poisson DVISITS $XLIST 
matrix InvHessian = e(V)
matrix b = e(b)
scalar Nobs = e(N)
scalar k = e(k)
predict mu, n
generate terminsum = ((DVISITS - mu)^2 - mu) / (mu^2)
quietly summarize terminsum
scalar alphanb2 = r(sum) / (Nobs-k)
display "alpha for NB2 : " alphanb2
generate NB2weight = mu + alphanb2*mu^2
matrix accum Vmiddle = $XLIST [pweight = NB2weight]
matrix VNB2 = InvHessian*Vmiddle*InvHessian
ereturn post b VNB2

** TABLE 3.3: POISSON PMLE WITH NB2 STANDARD ERRORS

ereturn display

*** TABLE 3.3: POISSON PMLE WITH DIFFERENT STANDARD ERRORS 

* Most of Table 3.3 (except PNB2 given just above and PBoot )
estimates table PRobust PMLHess PMLOPG PNB1, b(%9.3f) se(%9.3f) t(%9.2f)
* estimates table PRobust PMLHess PMLOPG PNB1 PBoot, b(%9.3f) se(%9.3f) t(%9.2f)

/* Jackknife takes a long time so commented out. Results are as follows.
poisson DVISITS $XLIST, vce(jackknife)
Poisson regression                              Number of obs      =      5190
                                                Replications       =      5190
                                                F(  12,   5189)    =     77.11
                                                Prob > F           =    0.0000
Log likelihood = -3355.5413                     Pseudo R2          =    0.1576
------------------------------------------------------------------------------
             |              Jackknife
     DVISITS |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
         SEX |    .156882    .080145     1.96   0.050     -.000236    .3139999
         AGE |   1.056299   1.380652     0.77   0.444    -1.650361     3.76296
       AGESQ |  -.8487041   1.477986    -0.57   0.566    -3.746179    2.048771
      INCOME |  -.2053206   .1309306    -1.57   0.117    -.4619997    .0513585
    LEVYPLUS |   .1231854   .0961254     1.28   0.200    -.0652609    .3116318
    FREEPOOR |  -.4400609   .3139898    -1.40   0.161    -1.055613    .1754914
    FREEREPA |   .0797984   .1273904     0.63   0.531    -.1699404    .3295373
     ILLNESS |   .1869484   .0242433     7.71   0.000     .1394213    .2344755
     ACTDAYS |   .1268465   .0078955    16.07   0.000     .1113679     .142325
      HSCORE |    .030081   .0144887     2.08   0.038     .0016771     .058485
     CHCOND1 |   .1140853   .0917735     1.24   0.214    -.0658294        .294
     CHCOND2 |   .1411583   .1241703     1.14   0.256    -.1022679    .3845844
       _cons |  -2.223848   .2569408    -8.66   0.000     -2.72756   -1.720136
------------------------------------------------------------------------------
*/

* Poisson estimated using Stata ml command 
program lfpois
  version 11
  args lnf theta1                  // theta1=x'b, lnf=lnf(y)
  tempvar lnyfact mu
  local y "$ML_y1"                 // Define y so program more readable
  generate double `lnyfact' = lnfactorial(`y')
  generate double `mu'      = exp(`theta1')
  quietly replace `lnf'     = -`mu' + `y'*`theta1' - `lnyfact'
end
ml model lf lfpois (DVISITS = $XLIST), vce(robust)
ml maximize 

/* Following not run to save time but cited in discussion of Table 3.3
   Output is given for the first bootstrap
* Two checks: 
* (1) correct standard errors if DVISITS_se observed Coef. 
*     is close to DVISITS Bootstrap Std. Error
* (2) variablity of the s.e. is DVISITS_se Bootstrap Std. Error
* Poisson Robust sandwich se's
bootstrap _b _se, reps(400) seed(10101): poisson DVISITS $XLIST, vce(robust)
Bootstrap results                               Number of obs      =      5190
                                                Replications       =       400
------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
DVISITS      |
         SEX |    .156882    .076327     2.06   0.040     .0072839      .30648
         AGE |   1.056299   1.390803     0.76   0.448    -1.669625    3.782224
       AGESQ |  -.8487041   1.476844    -0.57   0.566    -3.743264    2.045856
      INCOME |  -.2053206    .128924    -1.59   0.111    -.4580069    .0473657
    LEVYPLUS |   .1231854   .0998184     1.23   0.217    -.0724551    .3188259
    FREEPOOR |  -.4400609   .2932651    -1.50   0.133     -1.01485    .1347281
    FREEREPA |   .0797984    .131406     0.61   0.544    -.1777525    .3373494
     ILLNESS |   .1869484   .0243491     7.68   0.000     .1392252    .2346717
     ACTDAYS |   .1268465   .0079706    15.91   0.000     .1112243    .1424686
      HSCORE |    .030081   .0138043     2.18   0.029     .0030251    .0571369
     CHCOND1 |   .1140853   .0869783     1.31   0.190    -.0563889    .2845595
     CHCOND2 |   .1411583   .1198889     1.18   0.239    -.0938196    .3761362
       _cons |  -2.223848   .2705066    -8.22   0.000    -2.754031   -1.693665
-------------+----------------------------------------------------------------
DVISITS_se   |
         SEX |   .0792209   .0031799    24.91   0.000     .0729885    .0854534
         AGE |   1.364474   .0778062    17.54   0.000     1.211977    1.516972
       AGESQ |   1.459683   .0716192    20.38   0.000     1.319312    1.600054
      INCOME |   .1292572   .0097382    13.27   0.000     .1101707    .1483436
    LEVYPLUS |   .0951652   .0047033    20.23   0.000     .0859468    .1043835
    FREEPOOR |   .2900225   .0384038     7.55   0.000     .2147525    .3652924
    FREEREPA |   .1257953   .0067341    18.68   0.000     .1125967    .1389939
     ILLNESS |   .0239387   .0014318    16.72   0.000     .0211323     .026745
     ACTDAYS |   .0077698   .0004421    17.58   0.000     .0069034    .0086362
      HSCORE |   .0142359   .0008156    17.45   0.000     .0126373    .0158345
     CHCOND1 |   .0908541   .0040289    22.55   0.000     .0829577    .0987505
     CHCOND2 |   .1227226   .0066592    18.43   0.000     .1096708    .1357744
       _cons |   .2544567    .011415    22.29   0.000     .2320838    .2768297
------------------------------------------------------------------------------
* Poisson NB1 se's (assumes variance multiple of the mean)
bootstrap _b _se, reps(400) seed(10101): glm DVISITS $XLIST, family(poisson) link(log)
*/

********** 3.3 NEGATIVE BINOMIAL WITH VARIOUS STANDARD ERRORS

*** NB2 MLE

* Negbin2 MLE with default standard errors
nbreg DVISITS $XLIST, dispersion(mean) 
estimates store NB2MLHess

* Negbin2 MLE with robust standard errors
nbreg DVISITS $XLIST, dispersion(mean) vce(robust)
estimates store NB2Robust

* Negbin2 MLE with OPG standard errors
nbreg DVISITS $XLIST, dispersion(mean) vce(opg)
estimates store NB2MLOPG

* Negbin2 MLE with OIM standard errors
nbreg DVISITS $XLIST, dispersion(mean) vce(oim)
estimates store NB2MLOIM

* Negbin2 MLE with bootstrap standard errors
* nbreg DVISITS $XLIST, dispersion(mean) vce(boot, reps(400) seed(10101) nodots) 
* estimates store NB2Boot

* Negbin2 MLE with four different ways to estimate standard errors 
estimates table NB2Robust NB2MLHess NB2MLOPG NB2MLOIM, b(%9.3f) se

* ASIDE: Negbin2 ML estimated using Stata ml command 
program lfnb2
  version 11
  args lnf theta1 a               // theta1=x'b, a=alpha, lnf=lnf(y)
  tempvar mu
  local y $ML_y1                  // Define y so program more readable
  generate double `mu'  = exp(`theta1')
  quietly replace `lnf' = lngamma(`y'+(1/`a')) - lngamma((1/`a'))  ///
               -  lnfactorial(`y') - (`y'+(1/`a'))*ln(1+`a'*`mu')  ///
               + `y'*ln(`a') + `y'*ln(`mu') 
end
ml model lf lfnb2 (DVISITS = $XLIST) ()
ml maximize

*** NB1 MLE

* Negbin1 MLE with default standard errors
nbreg DVISITS $XLIST, dispersion(constant) 
estimates store NB1MLHess

* Negbin1 MLE with robust standard errors
nbreg DVISITS $XLIST, dispersion(constant) vce(robust)
estimates store NB1Robust

* Negbin1 MLE with OPG standard errors
nbreg DVISITS $XLIST, dispersion(mean) vce(opg)
estimates store NB1MLOPG

* Negbin1 MLE with OIM standard errors
nbreg DVISITS $XLIST, dispersion(constant) vce(oim)
estimates store NB1MLOIM

* Negbin1 MLE with bootstrap standard errors
* nbreg DVISITS $XLIST, dispersion(mean) vce(boot, reps(400) seed(10101) nodots) 
* estimates store NB1Boot

* Negbin2 MLE with four different ways to estimate standard errors 
estimates table NB1Robust NB1MLHess NB1MLOPG NB1MLOIM, b(%9.3f) se

*** NB2 QGPMLE

* Negbin2 QGPMLE estimated using glm (with default log link)
* Use alpha found earlier
display "alpha for NB2 : " alphanb2
global aglm = alphanb2

* Negbin2 QGPMLE with Hessian standard errors
glm DVISITS $XLIST, family(nbinomial $aglm)
estimates store NB2QGPH

* Negbin2 QGPMLE with robust standard errors
glm DVISITS $XLIST, family(nbinomial $aglm) vce(robust)
estimates store NB2QGPMLE

* Following with canonical link does not converge
* glm DVISITS $XLIST, family(nbinomial 1) link(nbinomial) difficult

* ASIDE: Negbin2 QGPMLE using method ML (should be same as glm)
global invaglm = 1/$aglm
program glmnb2
  version 11
  args lnf theta1                 // theta1=x'b, lnf=lnf(y)
  tempvar mu
  local y $ML_y1                  // Define y so program more readable
  generate double `mu'  = exp(`theta1')
  quietly replace `lnf' = - (`y'+$invaglm)*ln(1+$aglm*`mu') + `y'*ln(`mu') 
end
ml model lf glmnb2 (DVISITS = $XLIST), vce(robust)
ml maximize

estimates table NB2Robust NB2QGPMLE NB2QGPH PRobust, b(%9.3f) se

* Aside for the canonical link use instead in program glmnb2 
*    generate double `mu'  = $invaglm*exp(`theta1')/(1 - exp(`theta1'))
* but this does not converge 

* Following implements QGPPMLE using GMM based on the first-order conditions
gmm ( (DVISITS - exp({b1}*SEX+{b2}*AGE+{b2}*AGESQ+{b3}*INCOME+{b4}*LEVYPLUS ///
      +{b5}*FREEPOOR+{b6}*FREEREPA+{b7}*ILLNESS+{b8}*ACTDAYS+{b9}*HSCORE    ///
      +{b10}*CHCOND1+{b11}*CHCOND2+{b0}))                                   ///
      / (1 + alphanb2*exp({b1}*SEX+{b2}*AGE+{b2}*AGESQ+{b3}*INCOME+{b4}*LEVYPLUS ///
      +{b5}*FREEPOOR+{b6}*FREEREPA+{b7}*ILLNESS+{b8}*ACTDAYS+{b9}*HSCORE    ///
      +{b10}*CHCOND1+{b11}*CHCOND2+{b0})) ), instruments($XLIST) onestep

estimates table NB2Robust NB2QGPMLE NB2QGPH PRobust, b(%9.3f) se

*** TABLE 3.4: NB2 and NB1 MODEL ESTIMATORS AND STANDARD ERRORS

estimates table NB2Robust NB2QGPMLE NB1Robust PRobust, b(%9.3f) se

/* Following not run to save time but cited in discussion of Table 3.4
* Two checks: 
* (1) correct standard errors if DVISITS_se observed Coef. 
*     is close to DVISITS Bootstrap Std. Error
* (2) variablity of the s.e. is DVISITS_se Bootstrap Std. Error
* NB2 Robust sandwich se's
bootstrap _b _se, reps(400) seed(10101): nbreg DVISITS $XLIST, dispersion(mean) vce(robust)
* NB2 default se's
bootstrap _b _se, reps(400) seed(10101): nbreg DVISITS $XLIST, dispersion(mean)
*/

*** FIGURE 3.1 

* The following creates Figure 3.1 manually
* where the predicted probabilities come from 
* Average Predicted probabilities for y = 0, 1, ... , 10
* countfit DVISITS, maxcount(10) prm nograph noestimates nofit
* countfit DVISITS, maxcount(10) nbreg nograph noestimates nofit 
* countfit DVISITS $XLIST, maxcount(10) prm nograph noestimates nofit
* countfit DVISITS $XLIST, maxcount(10) nbreg nograph noestimates nofit
clear 
input count sample poissintonly nb2intonly poissreg nb2reg 
 0 .7979 .7395347 .8011167 .7733644 .803997
 1 .1507 .2231428 .1343514 .1788264 .1398705
 2 .0335 .0336649 .0411049 .0323732 .032438
 3 .0058 .0033859 .0144702 .0087688 .0104049  
 4 .0046 .0002554 .0054274 .0036505 .004566  
 5 .0017 .0000154 .0021107 .0017036 .0025236 
 6 .0023 .0000008 .0008403 .0007746 .0016031 
 7 .0023 .0000000 .0003401 .0003316 .0011016 
 8 .0010 .0000000 .0001393 .0001325 .0007925
 9 .0002 .0000000 .0000576 .0000494 .0005869 
end
label variable sample "Number of doctor visits"
label variable sample "Sample frequency"
label variable poissintonly "Poisson no regressors"
label variable nb2intonly "NB2 no regressors"
label variable poissreg "Poisson with regressors"
label variable nb2reg "NB2 with regressors"
drop if count > 4
set scheme s1mono
graph bar (mean) sample poissintonly poissreg, over(count)                  ///
  saving(racd03graph1, replace) ytitle("Sample and predicted frequencies") ///
  legend( ring(0) rows(3) pos(3) label(1 "Sample frequency")               ///
    label(2 "Poisson no regressors") label(3 "Poisson with regressors") )
graph bar (mean) sample nb2intonly nb2reg, over(count)                      ///
  saving(racd03graph2, replace)  ytitle("Sample and predicted frequencies") ///
  legend( ring(0) rows(3) pos(3) label(1 "Sample frequency")                ///
    label(2 "NB2 no regressors") label(3 "NB2 with regressors") )
graph combine racd03graph1.gph racd03graph2.gph, iscale(0.9) ysize(3) xsize(6) 
graph export racd03fig1.eps, replace
graph export racd03fig1.wmf, replace
use racd03data.dta, replace

********** 3.3.6 SIMULATION

* From Cameron and Trivedi (1986) NB with mean mu and variance mu + a*mu^j
* is generated from Poisson(xgamma) where 
* xgamma is gamma with mean mu and variance alpha*mu^j
* Since rgamma(a,b) yields gamma with mean ab and variance ab^2
* we need rgamma(mu^(2-j)/a, a*mu^(j-1))

* Test that code works with mu=2 and a=2 
* Should yield NB2 with mean 3 and variance 3 + 2*3^2 = 21
*          and NB1 with meam 3 and variance 3 + 2*3 = 9
clear 
set seed 10101
set obs 100000
generate mu = 3
scalar a = 2
* NB2 has variance mu + a*mu^2 so set j = 2
generate gammaNB2 = rgamma(1/a, a*mu)
generate xNB2 = rpoisson(gammaNB2)
* NB1 has variance j + a*mu^2 so set j = 1
generate gammaNB1 = rgamma(mu/a, a)
generate xNB1 = rpoisson(gammaNB1)
tabstat xNB2 xNB1, stat(mean var min max) col(stat)

*** Generate Poisson, NB1 and NB2 with n = 10,000
clear
set obs 10000
set seed 10101 
generate x = runiform()
scalar a = 2
generate mu = exp(0 + 2*x)
generate yP = rpoisson(mu)
generate gammaNB2 = rgamma(1/a, a*mu)
generate yNB2 = rpoisson(gammaNB2)
generate gammaNB1 = rgamma(mu/a, a)
generate yNB1 = rpoisson(gammaNB1)
summarize x mu gammaNB2 gammaNB1 yP yNB2 yNB1

* POISSON regressions - with P, NB1 and NB2 as dgp
* All should be consistent
poisson yP x, vce(robust)
estimates store P_Prob
poisson yNB1 x, vce(robust)
estimates store NB1_Prob
poisson yNB2 x, vce(robust)
estimates store NB2_Prob

* NB2 regressions - with NB1 and NB2 as dgp
* Note that do not estimate for Poisson dgp since half the time generated Poisson
* will be underdispersed and cannot use NB2 then
* All should be consistent but check standard errors
nbreg yNB1 x
estimates store NB1_NB2def
nbreg yNB1 x, vce(robust)
estimates store NB1_NB2rob
nbreg yNB2 x
estimates store NB2_NB2def
nbreg yNB2 x, vce(robust)
estimates store NB2_NB2rob

* NB1 regressions - with NB1 and NB2 as dgp
* Note that do not estimate for Poisson dgp since half the time generated Poisson
* will be underdispersed and cannot use NB2 then
* All should be consistent but check standard errors
nbreg yNB1 x, dispersion(constant)
estimates store NB1_NB1def
nbreg yNB1 x, dispersion(constant) vce(robust)
estimates store NB1_NB1rob
nbreg yNB2 x, dispersion(constant)
estimates store NB2_NB1def
nbreg yNB2 x, dispersion(constant) vce(robust)
estimates store NB2_NB1rob

estimates table P_Prob NB1_Prob NB2_Prob, b(%7.4f) se(%7.4f) stats(N ll) stfmt(%9.1f) ///
   modelwidth(9) equations(1) title("Poisson with dgp Poisson, NB1, NB2")
estimates table NB1_NB2def NB1_NB2rob NB2_NB2def NB2_NB2rob, b(%7.4f) se(%7.4f) ///
   stats(N ll) stfmt(%9.1f) modelwidth(9) equations(1)                          ///
   title("NB2 MLE with dgp NB1, NB2; default, robust se's")
estimates table NB1_NB1def NB1_NB1rob NB2_NB1def NB2_NB1rob, b(%7.4f) se(%7.4f) ///
   stats(N ll) stfmt(%9.1f) modelwidth(9) equations(1)                         ///
   title("NB2 MLE with dgp NB1, NB2; default, robust se's")   

*** TABLE 3.5: SIMULATION RESULTS (NB1_NB2rob means NB1 dgp and NB2 MLE with robust se's)

* For alpha goes to earlier command output as here ln(alpha) is given
estimates table NB2_Prob NB2_NB1rob NB2_NB2rob NB1_Prob NB1_NB1rob NB1_NB2rob NB1_NB2def, ///
   b(%7.4f) se(%7.4f) stats(N ll) stfmt(%9.1f) modelwidth(9) equations(1)  ///
   title("NB2 MLE with dgp NB1, NB2; default, robust se's") 

********** 3.4 OVERDISPERSION TESTS

use racd03data.dta, clear

* Raw overdispersion
quietly summarize DVISITS
display "Overdispersion: variance/ mean ratio is " r(var) " / " r(mean) " = " r(var)/r(mean)

* LR test statistic against NB2
* Stata command lrtest does not work as LR test is for 2 different model commands
quietly poisson DVISITS $XLIST
scalar llpoisson = e(ll)
quietly nbreg DVISITS $XLIST, dispersion(mean)
scalar llnb2 = e(ll)
display "LR test against NB2 = 2* (" llnb2 " - " llpoisson ") = " 2*(llnb2 - llpoisson)

* LR test statistic against NB1
quietly nbreg DVISITS $XLIST, dispersion(constant)
scalar llnb1 = e(ll)
display "LR test against NB1 = 2* (" llnb1 " - " llpoisson ") = " 2*(llnb1 - llpoisson)

* Wald test against NB2
* Use output from nbreg DVISITS $XLIST, dispersion(mean)

* Wald test against NB1
* Use output from nbreg DVISITS $XLIST, dispersion(constant)

* LM test against NB2
capture drop mu
quietly poisson DVISITS $XLIST 
predict mu, n
generate ystar = ((DVISITS - mu)^2 - DVISITS) / mu
regress ystar mu, noconstant
regress ystar mu, noconstant vce(robust)

* LM test against NB1
regress ystar
regress ystar, vce(robust)

********** 3.6 POISSON MARGINAL EFFECTS AND PREDICTION

*** Table 3.6 OLS column
* OLS coefficients are OLS marginal effects 
regress DVISITS $XLIST, vce(robust)   // Table 3.5 OLS column

*** Table 3.6 QMLE, AME, MEM and Elast columns
* Poisson marginal effects done the easy way using calculus method 
* and ignoring the quadratic in age
poisson DVISITS $XLIST, vce(robust)
margins, dydx(*)           // Table 3.6 AME column
margins, dydx(*) atmeans   // Table 3.6 MEM column
margins, eyex(*)           // Table 3.5 Elast column

* Following discussed in text
* Gives Treatment eEffect for binary regressors
* Gives correct AME and MEM for the quadratic in AGE
poisson DVISITS i.SEX c.AGE##c.AGE c.INCOME i.LEVYPLUS i.FREEPOOR i.FREEREPA ///
  c.ILLNESS c.ACTDAYS c.HSCORE i.CHCOND1 i.CHCOND2, vce(robust)
margins, dydx(*)
margins, dydx(*) atmeans

* Following computes standardized coefficients
* Table 3.6 SSC column
capture drop one
generate one = 1
matrix accum Cov = $XLIST, deviations noconstant
quietly sum one
matrix Cov = Cov / (r(N)-1)
matrix Stdev = (vecdiag(cholesky(diag(vecdiag(Cov)))))'
// Need to add back constant as last entry in Stdev to make conformable with b
matrix Stdev = Stdev \ 0
matrix list Stdev
quietly poisson DVISITS $XLIST, vce(robust)
matrix b = e(b)'
matrix bstandardized = hadamard(b,Stdev)
matrix list bstandardized     // Table 3.6 SSC column

********** 3.7 OTHER MODELS

use racd03data.dta, clear

* Binary Poisson
generate BINARYVISIT = DVISITS > 0
tabulate BINARYVISIT

* Poisson ML program lfpois to be called by command ml method lf
program lfbinarypois
  version 10.1
  args lnf theta1                  // theta1=x'b, lnf=lnf(y)
  tempvar lnyfact mu p
  local y "$ML_y1"                 // Define y so program more readable
  generate double `lnyfact' = lnfactorial(`y')
  generate double `mu'      = exp(`theta1')
  generate double `p'       = 1 - exp(-`mu')
  quietly replace `lnf'     = `y'*ln(`p') + ln(1-`p') - `y'*ln(1-`p')
end
ml model lf lfbinarypois (BINARYVISIT = $XLIST), vce(robust) 
ml check
ml search
ml maximize 

* CLOGLOG model is the same !
cloglog DVISITS $XLIST, vce(robust)   // Table 3.7 BP column
estimates store CLOGLOG

* Compare to binary logit and probit
probit DVISITS $XLIST, vce(robust)
logit DVISITS $XLIST, vce(robust)

* Ordered probit
* Transform to 8 or more (as only one observation 9)
generate DVISITS8ormore = DVISITS
replace DVISITS8ormore = 8 if DVISITS > 8
oprobit DVISITS8ormore $XLIST, vce(robust)
estimates store OPROBIT               

* Now rescale the Ordered Probit coefficients
matrix b = e(b)
quietly regress DVISITS $XLIST
matrix boprobitrescaled = e(rmse)*b'
display "Rescale coefficients by multiplying by: " e(rmse)
matrix list boprobitrescaled         // Table 3.7 OrdProb column 

* OLS with dependent variable y
regress DVISITS $XLIST, vce(robust)
estimates store OLSY                 // Table 3.7 y column
predict pOLSY, xb
generate resOLSY = DVISITS - pOLSY
quietly sum, detail
display "Skewness: " r(skewness) "  Kurtosis: " r(kurtosis)

* OLS with dependent variable ln(y+0.1)
generate LNDVISITS = ln(DVISITS + 0.1)
regress LNDVISITS $XLIST, vce(robust)  // Table 3.7 lny column
estimates store OLSLNY
predict pOLSLNY, xb
generate resOLSLNY = LNDVISITS - pOLSLNY
quietly sum, detail
display "Skewness: " r(skewness) "  Kurtosis: " r(kurtosis)

* OLS with dependent variable sqrt(y)
generate SQRTDVISITS = sqrt(DVISITS)
regress SQRTDVISITS $XLIST, vce(robust)  // Table 3.7 sqrty column 
estimates store OLSSQRTY
predict pOLSSQY, xb
generate resOLSSQY = SQRTDVISITS - pOLSSQY
quietly sum, detail
display "Skewness: " r(skewness) "  Kurtosis: " r(kurtosis)

* Poisson QMLE
poisson DVISITS $XLIST, vce(robust)
estimates store POISSON
predict pPOISS, n
generate resPOISS = DVISITS - pPOISS
quietly sum, detail
display "Skewness: " r(skewness) "  Kurtosis: " r(kurtosis)

* Nonlinear least squares with same conditionam mean as Poisson
generate one = 1
nl (DVISITS = exp({xb: $XLIST one})), vce(robust) // Table 3.7 NLS 
estimates store NL

*** TABLE 3.7: BINARY PROBIT, ORDERED PROBIT, OLS (y, lny, sqrty) POISS NLS (most of table)

estimates table CLOGLOG OLSY OLSLNY OLSSQRTY POISSON, b(%9.3f) t eq(1) stats(ll)

* For TABLE 3.7 OrdProb column use the earlier rescaled coefficients
matrix list boprobitrescaled 

* For TABLE 3.7 NL column
estimates table NL, b(%9.3f) t stats(ll)
 
********** CLOSE OUTPUT

* log close
* clear
* exit
