* racd08.do  January 2013 for Stata version 12

capture log close
log using racd08.txt, text replace

********** OVERVIEW OF racd08.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* This STATA program does examples for chapter 8.8 and 8.9
*   8.5 COPULAS: CLAYTON AND GUMBEL 
*   8.9 EXAMPLE: BIVARIATE COUNT ANALYSIS
*   (1) INDEPENDENCE TESTS BASED ON ORTHOGONAL POLYNOMIALS
*   (2) NLSUR: NUNLINEAR SEEMINGLY UNRELATED REGRESSION ESTIMATOR
*   (3) MULTIVARIATE NEGATIVE BINOMIAL ESTIMATION ESTIMATED BY ML

* To run you need file
*   racd06data1healthcare.dta
* in your directory

********** SETUP **********

set more off
version 12
clear all
* set linesize 82
set scheme s1mono  /* Graphics scheme */
 
********** DATA DESCRIPTION for CHAPTER 8.9

* The data are extracted from the 1987-88 National Medical Expenditure Survey (NMES).
* The extract and analysis are in P. Deb and P.K. Trivedi (1997),
* Demand for Medical Care by the Elderly: A Finite Mixture Approach" 
* Journal of Applied Econometrics, 12, 313-326.
* See this article for more detailed discussion 
* Also see racd06makedata1healthcare.do for further details 

* This STATA program does the analysis for chapter 9 
*   8.5 COPULA
*   8.9 EMPIRICAL EXAMPLE (EMR and HOSP)

* To run you need file
*   racd06data1healthcare.dta
* in your directory

********** DATA DESCRIPTION

* The data are extracted from the 1987-88 National Medical Expenditure Survey (NMES).
* The extract and analysis are in P. Deb and P.K. Trivedi (1997),
* Demand for Medical Care by the Elderly: A Finite Mixture Approach" 
* Journal of Applied Econometrics, 12, 313-326.
* See this article for more detailed discussion 

* The simulation to show copula generated data 
* is based on code from P.K. Trivedi and D.M. Zimmer (2005)
* Copula Modeling: An Introduction for Practitioners
* Foundations and Trends in Econometrics Vol. 1, No 1 1-111.
 
********** 8.5 COPULAS: CLAYTON AND GUMBEL 

* The data are generated data
*   y1 ~ Poisson(10)
*   y2 ~ Poisson(10)
* Copula is 
*   Clayton theta = 2 so Kendall's tau = 2/(2+2) = 0.5
*   Gumbel  theta = 2 so Kendall's tau = (2-1)/2 = 0.5

*** CLAYTON COPULA

* Code from David Zimmer
clear
set obs 1000
set seed 10101
gen y1=0
gen y2=0
mata:
  obs = 1000
  mean1 = 10
  mean2 = 10
  theta = 2
  y1 = J(obs,1,-999)
  y2 = J(obs,1,-999)
  v1 = uniform(obs,1)
  v2 = uniform(obs,1)
  u1 = v1
  u2 = (   (v1:^(-theta))   :*    (v2:^(-theta/(theta+1)) :- 1)     :+ 1     ) :^(-1/theta)
  for(j=1; j<=obs; j++) { 
    p10 = exp(-mean1)
    p20 = exp(-mean2)
    s1 = p10
    s2 = p20
	for(i=0; i<50; i++) {
		if (u1[j,] < s1)	 	y1[j,]=i
		if (u1[j,] < s1) 		i=50
		if (u1[j,] > s1) 		p10 = mean1*p10/(i+1)
		if (u1[j,] > s1) 		s1 = s1 + p10
		if (u1[j,] > s1) 		y1[j,]=i+1
	}
	for(i=0; i<50; i++) {
		if (u2[j,] < s2) 		y2[j,]=i
		if (u2[j,] < s2) 		i=50
		if (u2[j,] > s2) 		p20 = mean2*p20/(i+1)
		if (u2[j,] > s2) 		s2 = s2 + p20
		if (u2[j,] > s2) 		y2[j,]=i+1
	}
    }
  st_store(., "y1", y1)
  st_store(., "y2", y2)
end
summarize
ktau y1 y2
graph twoway (scatter y1 y2) (lfit y1 y2, lwidth(medthick)), legend(off)  ///
   title("Sample from Clayton Copula")  ytitle("y1") scale(1.2) saving(clayton, replace)

*** GUMBEL COPULA

* Code from David Zimmer
clear
set obs 1000
set seed 10101
gen y1=0
gen y2=0
mata:
  obs = 1000
  mean1 = 10
  mean2 = 10
  theta = 4
  y1 = J(obs,1,-999)
  y2 = J(obs,1,-999)
  thet = uniform(2000,1) :* 3.1415
  w = uniform(2000,1)
  ww = -1:*ln(w)
  alph = 1/theta
  z1 = sin((1-alph):*thet) :* (sin(alph:*thet)):^(alph/(1-alph))
  z2 = (sin(thet)):^(1/(1-alph))
  z = z1:/z2
  xx = (z:/ww):^((1-alph)/alph)
  v1 = uniform(2000,1)
  v2 = uniform(2000,1)
  u1 = exp( -1:*((-1:*ln(v1):/xx):^(1/theta)) )
  u2 = exp( -1:*((-1:*ln(v2):/xx):^(1/theta)) )
  for(j=1; j<=obs; j++) { 
    p10 = exp(-mean1)
    p20 = exp(-mean2)
    s1 = p10
    s2 = p20
	for(i=0; i<50; i++) {
		if (u1[j,] < s1)	 	y1[j,]=i
		if (u1[j,] < s1) 		i=50
		if (u1[j,] > s1) 		p10 = mean1*p10/(i+1)
		if (u1[j,] > s1) 		s1 = s1 + p10
		if (u1[j,] > s1) 		y1[j,]=i+1
	}
	for(i=0; i<50; i++) {
		if (u2[j,] < s2) 		y2[j,]=i
		if (u2[j,] < s2) 		i=50
		if (u2[j,] > s2) 		p20 = mean2*p20/(i+1)
		if (u2[j,] > s2) 		s2 = s2 + p20
		if (u2[j,] > s2) 		y2[j,]=i+1
	}
    }
  st_store(., "y1", y1)
  st_store(., "y2", y2)
end
sum
ktau y1 y2
graph twoway (scatter y1 y2) (lfit y1 y2, lwidth(medthick)), legend(off) ///
 title("Sample from Gumbel Copula") ytitle("y1") scale(1.2) saving(gumbel, replace)

*** FIGURE 8.1: CLAYTON AND GUMBEL COPULAS EXAMPLE

graph combine clayton.gph gumbel.gph, ycommon xcommon ysize(3) xsize(6)
graph export racd08fig1.wmf, replace
graph export racd08fig1.eps, replace

********** 8.9 EMPIRICAL EXAMPLE

****** READ IN DATAA  AND SUMMARIZE

use racd06data1healthcare.dta, clear

* Variable descriptions and summary statistics
describe
summarize

summarize EMR HOSP, detail
summarize EMR HOSP
correlate EMR HOSP
correlate EMR HOSP if EMR > 0 & HOSP > 0
tabulate EMR
tabulate HOSP

****** INDEPENDENCE TESTS BASED ON ORTHOGONAL POLYNOMIALS

global XLIST EXCLHLTH POORHLTH NUMCHRON ADLDIFF NOREAST MIDWEST WEST AGE ///
  BLACK MALE MARRIED SCHOOL FAMINC EMPLOYED PRIVINS MEDICAID
global y1 EMR
global y2 HOSP

* Following is for NB2 model
* Generate first two orthogonal polynomials for y1
nbreg $y1 $XLIST
predict mu1, n
scalar alpha1 = exp([lnalpha]_cons)
generate Q1y1 = $y1 - mu1 
generate Q2y1 = ($y1-mu1)^2 - (1+2*alpha1*mu1)*($y1-mu1) - (1+alpha1*mu1)*mu1
* Generate first two orthogonal polynomials for y2
nbreg $y2 $XLIST
predict mu2, n
scalar alpha2 = exp([lnalpha]_cons)
generate Q1y2 = $y2 - mu2
generate Q2y2 = ($y2-mu2)^2 - (1+2*alpha2*mu2)*($y2-mu2) - (1+alpha2*mu2)*mu2

/*
* Following is for NB1 model - not reported in book
* Note that here we use Var = alpha*mu whereas Stata sets Var = mu + delta*mu 
* Generate first two orthogonal polynomials for y1
nbreg $y1 $XLIST, dispersion(constant)
predict mu1, n
scalar delta1 = exp([lndelta]_cons)
scalar phi1 = phi1 + 1
generate Q1y1 = $y1 - mu1 
generate Q2y1 = ($y1-mu1)^2 - (2*phi1)*($y1-mu1) - phi1*mu1
* Generate first two orthogonal polynomials for y2
nbreg $y2 $XLIST, dispersion(constant)
predict mu2, n
scalar delta2 = exp([lndelta]_cons) 
scalar phi2 = delta2 + 1
generate Q1y2 = $y2 - mu2
generate Q2y2 = ($y2-mu2)^2 - (2*phi2)*($y2-mu1) - phi2*mu1
*/

/*
* Following is for Poisson model - not reported in book
* Generate first two orthogonal polynomials for y1
poisson $y1 $XLIST
predict mu1, n
generate Q1y1 = $y1 - mu1 
generate Q2y1 = ($y1-mu1)^2 - $y1
* Generate first two orthogonal polynomials for y2
poisson $y2 $XLIST
predict mu2, n
generate Q1y2 = $y2 - mu2
generate Q2y2 = ($y2-mu2)^2 - $y2
*/

* Now perform tests based on crossproducts of Q1 and Q2
generate one = 1
generate Q1y1Q1y2 = Q1y1*Q1y2
quietly regress one Q1y1Q1y2, noconstant
scalar test11 = e(N)*e(r2)
generate Q2y1Q2y2 = Q2y1*Q2y2
quietly regress one Q2y1Q2y2, noconstant
scalar test22 = e(N)*e(r2)
generate Q1y1Q2y2 = Q1y1*Q2y2
quietly regress one Q1y1Q2y2, noconstant
scalar test12 = e(N)*e(r2)
generate Q2y1Q1y2 = Q2y1*Q1y2
quietly regress one Q2y1Q1y2, noconstant
scalar test21 = e(N)*e(r2)

*** RESULTS GIVEN IN TEXT: Display the four test statisics

display "Test based on Q1y1 x Q1y2 = " test11 " and p = " chi2tail(1,test11) 
display "Test based on Q1y1 x Q2y2 = " test12 " and p = " chi2tail(1,test12) 
display "Test based on Q2y1 x Q1y2 = " test21 " and p = " chi2tail(1,test21) 
display "Test based on Q2y1 x Q2y2 = " test22 " and p = " chi2tail(1,test22) 

sum Q*
correlate Q*

/* To apply to original Cameron and Trivedi (1993) example use 
use racd03data.dta, clear
global XLIST SEX AGE AGESQ INCOME LEVYPLUS FREEPOOR FREEREPA ILLNESS ///
  ACTDAYS HSCORE CHCOND1 CHCOND2
global y1 HOSPADMI
global y2 HOSPDAYS
*/

****** NLSUR: NUNLINEAR SEEMINGLY UNRELATED REGRESSION ESTIMATOR

* Global for the regressors
global XLIST EXCLHLTH POORHLTH NUMCHRON ADLDIFF NOREAST MIDWEST WEST AGE ///
  BLACK MALE MARRIED SCHOOL FAMINC EMPLOYED PRIVINS MEDICAID

generate CONSTANT = 1

*** TABLE 8.3: NLSUR RESULTS (second half of table)

nlsur (EMR = exp({xb1: $XLIST CONSTANT}))     ///
   (HOSP = exp({xb2: $XLIST CONSTANT})), vce(robust) nolog
estimates store NLSUR

* Save the parameter estimates to use as starting values later for Bivariate Ne
matrix bnlsur = e(b)

* Calculate the error correlation in two ways
matrix Sigma = e(Sigma)
matrix list Sigma
scalar rho = Sigma[1,2] / sqrt(Sigma[1,1]*Sigma[2,2])
display rho
predict u1hat, equation(#1) residuals
predict u2hat, equation(#2) residuals
correlate u1hat u2hat

****** MULTIVARIATE NEGATIVE BINOMIAL ESTIMATION ESTIMATED BY ML

* Bivariate Negbin ML program lfnbbivariate to be called by command ml method lf
program lfnbbivariate
  version 10.1
  args lnf theta1 theta2 a        // theta1=x1'b1, theta2=x2'b2 a=alpha, lnf=lnf(y)
  tempvar mu1 mu2
  local y1 $ML_y1                 // Define y1 so program more readable
  local y2 $ML_y2                 // Define y2 so program more readable
  generate double `mu1'  = exp(`theta1')
  generate double `mu2'  = exp(`theta2')
  quietly replace `lnf' = lngamma(`y1'+`y2'+(1/`a')) - lngamma((1/`a'))  ///
                          -  lnfactorial(`y1') - lnfactorial(`y2')       ///
                          + `y1'*ln(`mu1') + `y2'*ln(`mu2')              ///
                          - (`y1'+`y2'+(1/`a'))*ln(1+`mu1'+`mu2')  
end

* Initial values - for betas use initial values from preceding NLSUR
* For alpha use alpha = 2
ml model lf lfnbbivariate (EMR = $XLIST) (HOSP = $XLIST) (), vce(robust)
ml init bnlsur 2.0, copy

*** TABLE 8.3: ML RESULTS

ml maximize
estimates store MLBVNB

* Aside: Confidence interval for 1/alpha
nlcom 1/[eq3]_cons

*** ALTERNATIVE SINGLE EQUATION ESTIMATORS (Not given in book)

nl (EMR = exp({xb3: $XLIST CONSTANT})), vce(robust) nolog
nl (HOSP = exp({xb4: $XLIST CONSTANT})), vce(robust) nolog
quietly poisson EMR $XLIST, vce(robust) nolog
estimates store Poi_EMR
quietly poisson HOSP $XLIST, vce(robust) nolog
estimates store Poi_HOSP
quietly nbreg EMR $XLIST, vce(robust) nolog
estimates store NB_EMR
quietly nbreg HOSP $XLIST, vce(robust) nolog
estimates store NB_HOSP
estimates table Poi_EMR Poi_HOSP NB_EMR NB_HOSP, b(%9.3f) se(%9.2f) eq(1)

******* TABLE in the BOOK

*** TABLE 8.3: ML and NLSUR bivariate estimates
* Note: Following gives default se's for NB1FE and not jackknife se's (given above)
estimates table MLBVNB NLSUR, b(%7.4f) se(%7.3f) stats(N ll) stfmt(%9.1f) modelwidth(9) 

********** CLOSE OUTPUT

* log close
* clear
* exit
