* racd07.do  January 2013 for Stata version 12

capture log close
log using racd07.txt, text replace

********** OVERVIEW OF racd07.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* This program does the analysis for chapter 7
*  7.3    STATIC REGRESSION AND AUTOCORRELATIONS FOR STRIKES DATA
*  7.11.1 DYNAMIC REGRESSION FOR STRIKES DATA
*  7.11.2 DYNAMIC REGRESSION FOR STOCK TRADES DATA

* To run you need files
*   racd07data1strikes.dta
*   racd07data2stocktrades.dta
* in your directory

* And you need Stata user-written command
*   countfit

********** SETUP **********

set more off
version 12
clear all
set linesize 82
set scheme s1mono  // Graphics scheme

********** DATA DESCRIPTION

* The original data in racd07data1strikes.dta are from 
*   J. Kennan, "The Duration of Contract strikes in U.S. Manufacturing",
*   Journal of Econometrics, 1985, Vol. 28, pp.5-28.
* The data are also used in 
* A.C. Cameron and P.K. Trivedi (1990), 
* "Regression based tests for overdispersion", 
* Journal of Econometrics, Vol. 46, pp. 347-364.
* For more details see these articles and racd07makedata1strikes.do

* The original data in racd07data2stocktrades.dta are from 
* R.C. Jung, R. Liesenfeld and J.-F. Richard (2011)
* "Dynamic Factor Models for Multivariate Count Data: An Application to 
*  Stock-Market Trading Activity," JBES, 29, 73-85.
* Data are the number of trades on the NYSE in 5 minute intervals
* for Geltfelter Company (GLT) over 39 trading days Jan 3 - Feb 18 2005
* There are 75 5-minute intervals times 39 days   
* For more details racd07makedata2stocktrades.do

********** 7.3 STATIC REGRESSION AND AUTOCORRELATIONS: STRIKES DATA 

use racd07data1strikes.dta, clear

*** TABLE 7.1: VARIABLE DEFINITIONS AND SUMMARY STATISTICS

summarize
describe 
tabulate STRIKES

*** FIGURE 7.1: STRIKES AND OUTPUT OVER TIMES

graph twoway (line STRIKES MONTH, lwidth(medthick))                       ///
  (line OUTPUT MONTH, lpattern(dash) lwidth(medthick) yaxis(2)),         ///
  scale(1.2) yscale(range(0 20) axis(1)) yscale(range(-0.1 0.15) axis(2)) ///
  legend(ring(0) rows(2) pos(12) label(1 "Strikes")          ///
  label(2 "Output")) ytitle("Strikes", axis(1)) ytitle("Output", axis(2)) 
graph export racd07fig1.eps, replace
graph export racd07fig1.wmf, replace

* Poisson QMLE with various standard errors
* Stock and Watson text suggests #lags = 0.75*T^(1/3) (= 3.57 here)
* Optimal for linear AR(1) y and error with rho=0.5
* based on Andrews (1991, eq.5.3)
glm STRIKES OUTPUT, family(poisson) vce(robust)
estimates store HAC0 
glm STRIKES OUTPUT, family(poisson) vce(hac nwest 4)
estimates store HAC4
glm STRIKES OUTPUT, family(poisson) vce(hac nwest 8)
estimates store HAC8
glm STRIKES OUTPUT, family(poisson) vce(hac nwest 12)
estimates store HAC12
glm STRIKES OUTPUT, family(poisson) vce(hac nwest 16)
estimates store HAC16

*** TABLE 7.2: POISSON ESTIMATES AND VARIOUS HAC STANDARD ERRORS

estimates table HAC0 HAC4 HAC8 HAC12 HAC16, b(%9.3f) se

*** FIGURE 7.2: ACTUAL AND PREDICTED STRIKES OVER TIME

quietly poisson STRIKES OUTPUT, vce(robust)
predict PREDICTED, n
correlate STRIKES PREDICTED
graph twoway (line STRIKES MONTH, lwidth(medthick))                   ///
  (line PREDICTED MONTH, lpattern(dash) lwidth(medthick)),            ///
  scale(1.2) legend(ring(0) rows(2) pos(12) label(1 "Actual strikes") ///
  label(2 "Predicted strikes")) ytitle("Strikes: actual and predicted") 
graph export racd07fig2.eps, replace
graph export racd07fig2.wmf, replace

*** R-SQUAREDS MENTIONED IN TEXT 

*** Deviance, Pearson and R-squared measures presented in text
* Fitted model
quietly glm STRIKES MONTH, family(poisson) vce(robust)
scalar Devfitted = e(deviance)
scalar Pearsfitted = e(deviance_p)
* Intercept-only model
quietly glm STRIKES, family(poisson) vce(robust)
scalar Devintercept = e(deviance)
scalar Pearsintercept = e(deviance_p)
* Calculate R-squared Deviance and Pearson
scalar R2_Dev = 1 - Devfitted/Devintercept
scalar R2_Pears = 1 - Pearsfitted/Pearsintercept
display "Deviance R-squared = " R2_Dev "   Fitted = " Devfitted "   Intercept = " Devintercept 
display "Pearson R-squared  = " R2_Pears "   Fitted = " Pearsfitted "   Intercept = " Pearsintercept
* Squared correlation coefficient
quietly correlate STRIKES PREDICTED
display "Squared correlation coefficient = " r(rho)^2

*** TABLE 7.3: RESIDUAL AUTOCORRELATIONS

* Residuals
quietly poisson STRIKES OUTPUT, vce(robust)
predict RESIDUAL, score
generate PEARSON = RESIDUAL/sqrt(PREDICTED)

* Autocorrelation functions and Ljung-Box statistics
* Only use Ljung-Box or Box-Pierce for PEARSON as this is standarndized
corrgram STRIKES, lags(12)     // Table 7.3 Column 1
corrgram RESIDUAL, lags(12)    // Table 7.3 Column 2
corrgram PEARSON, lags(12)     // Table 7.3 Column 3

* Get the z statistics by multiply autocorrelations by sqrt(T)
scalar sqrtT = sqrt(_N)     
quietly corrgram PEARSON, lags(12)
matrix Zstatistics = r(AC)*I(12)*sqrtT
matrix list Zstatistics       // Table 7.3 Column 4

* Compute the BP statistic (corrgram gives the LB statistics)
matrix ZstatisticsSQ = hadamard(Zstatistics,Zstatistics)
matrix BP = trace(diag(ZstatisticsSQ))
matrix list BP

* Nonstandardized case
* Applied just for PEARSON where not necessary as standardized
* But can also apply to nonstandardized such as RESIDUAL
scalar TBPstar = 0
forvalues i = 1/12 {
  quietly generate PL`i' = L`i'.PEARSON
  quietly generate PL0PL`i' = PEARSON*PL`i'
  quietly generate PL0SQPL`i'SQ = PEARSON*PEARSON*PL`i'*PL`i'
  quietly sum PL0PL`i'
  scalar NUMERATOR = r(sum)
  quietly sum PL0SQPL`i'SQ
  scalar DENOMINATOR = sqrt(r(sum))
  scalar T`i'star = NUMERATOR / DENOMINATOR
  scalar T`i'starSQ = T`i'star^2
  scalar TBPstar = TBPstar + T`i'starSQ
  }
* List the individual statistics T* at lags 1 to 12
scalar list T1star T2star T3star T4star T5star T6star T7star T8star ///
   T9star T10star T11star T12star               // Table 7.3 Column 5
* List the overall test
scalar list TBPstar

* Yet another test
regress PEARSON L.PEARSON L2.PEARSON  L3.PEARSON L4.PEARSON L5.PEARSON  ///
  L6.PEARSON L7.PEARSON  L8.PEARSON L9.PEARSON  L10.PEARSON L11.PEARSON ///
  L12.PEARSON , vce(robust)
display "chisquare(12) test = " 12*e(F)

* Yet another test number two
regress PEARSON OUTPUT L.PEARSON L2.PEARSON  L3.PEARSON L4.PEARSON L5.PEARSON  ///
  L6.PEARSON L7.PEARSON  L8.PEARSON L9.PEARSON  L10.PEARSON L11.PEARSON ///
  L12.PEARSON , vce(robust)
display "chisquare(12) test = " 12*e(F)

*** TABLE 7.4: PREDICTED PROBABILITIES

quietly poisson STRIKES OUTPUT
forvalues i = 0/10 {
   predict poissfit`i', pr(`i')
   }
quietly nbreg STRIKES OUTPUT
forvalues i = 0/10 {
   predict nb2fit`i', pr(`i')
   }
* Table 7.4
sum poissfit*
sum nb2fit*

* Aside: If instead use user-written addon countfit
countfit STRIKES OUTPUT, maxcount(10) prm nograph
drop PRM*
countfit STRIKES OUTPUT, maxcount(10) nbreg nograph
drop NBRM*

**********  7.11.1 DYNAMIC REGRESSION FOR STRIKES DATA

use racd07data1strikes.dta, clear

generate ystar = STRIKES
replace ystar = 0.5 if ystar == 0
generate lnystar = ln(ystar)
generate ytwostar = STRIKES
replace ytwostar = 1 if STRIKES == 0
generate dy0 = STRIKES == 0
generate lnytwostar = ln(ytwostar)
summarize

* Zeger-Qaqish model with y* = max(y,0.5) and up to three lags
poisson STRIKES OUTPUT, vce(robust)
estimates store ZQ0
predict yhatZQ0, n
predict resZQ0, score
generate pearsZQ0 = resZQ0 / sqrt(yhatZQ0) 

poisson STRIKES OUTPUT L.lnystar, vce(robust)
estimates store ZQ1
predict yhatZQ1, n
predict resZQ1, score
predict STRIKESZQ1, xb
generate pearsZQ1 = resZQ1 / sqrt(yhatZQ1) 

poisson STRIKES OUTPUT L.lnystar L2.lnystar, vce(robust)
predict yhatZQ2, n
estimates store ZQ2
predict resZQ2, score
generate pearsZQ2 = resZQ2 / sqrt(yhatZQ2) 

poisson STRIKES OUTPUT L.lnystar L2.lnystar L3.lnystar, vce(robust)
predict yhatZQ3, n
estimates store ZQ3
predict resZQ3, score
generate pearsZQ3 = resZQ3 / sqrt(yhatZQ3) 

* Use following to get the Pearson statistic for NB1 overdispersion
glm STRIKES OUTPUT L.lnystar L2.lnystar L3.lnystar, family(poisson) vce(robust)

poisson STRIKES OUTPUT L.lnytwostar L.dy0, vce(robust)
estimates store ZQ1c
predict yhatZQ1c, n
predict resZQ1c, score
generate pearsZQ1c = resZQ1c / sqrt(yhatZQ1c) 

estimates table ZQ0 ZQ1 ZQ2 ZQ3 ZQ1c, b(%9.3f) se

* Brannas Conditional NL of INAR(1) model
generate yL1 = L.STRIKES
generate yL2 = L2.STRIKES
generate yL3 = L3.STRIKES

* Compare NLS of static model with Poisson of static model earlier
nl (STRIKES = exp({beta1}+{beta2}*OUTPUT) ), vce(robust)
estimates store B0
predict yhatB0, yhat
predict resB0, residual
generate pearsB0 = resB0 / sqrt(yhatB0) 

nl (STRIKES = {rho1}*yL1 + exp({beta1}+{beta2}*OUTPUT) ) if yL1 != ., initial(rho1 0.5) vce(robust)
estimates store B1
predict yhatB1, yhat
predict resB1, residual
generate pearsB1 = resB1 / sqrt(yhatB1) 

nl (STRIKES = {rho1}*yL1 + {rho2}*yL2 + exp({beta1}+{beta2}*OUTPUT) ) if (yL1 != . & yL2 != .), initial(rho1 0.4 rho2 0.1) vce(robust)
estimates store B2
predict yhatB2, yhat
predict resB2, residual
generate pearsB2 = resB2 / sqrt(yhatB2) 

nl (STRIKES = {rho1}*yL1 + {rho2}*yL2 + {rho3}*yL3 + exp({beta1}+{beta2}*OUTPUT) ) if (yL1 != . & yL2 != . & yL3 != .), initial(rho1 0.3 rho2 0.1 rho1 0.1) vce(robust)
estimates store B3
predict yhatB3, yhat
predict resB3, residual
generate pearsB3 = resB3 / sqrt(yhatB3) 

*** TABLE 7.5: STRIKES ZEGER-QAQISH AUTORGEGRESSIVE MODEL ESTIMATES

* First four columns of Table 7.5
estimates table ZQ0 ZQ1 ZQ2 ZQ3 ZQ1c, b(%9.3f) se

* Last three columns of Table 7.5
estimates table B0 B1 B2 B3, b(%9.3f) se

* Correlations - The squares of these are given in second last row of Table 7.5
correlate STRIKES yhatZQ0 yhatZQ1 yhatZQ2 yhatZQ3 yhatZQ1c yhatB0 yhatB1 yhatB2 yhatB3
summarize STRIKES yhatZQ0 yhatZQ1 yhatZQ2 yhatZQ3 yhatZQ1c yhatB0 yhatB1 yhatB2 yhatB3

* Autocorrelations
corrgram STRIKES, lags(6)
corrgram resZQ0, lags(6)
corrgram resZQ1, lags(6)
corrgram resZQ2, lags(6)
corrgram resZQ3, lags(6)
corrgram resZQ1c, lags(6)
corrgram resB0, lags(6)
corrgram resB1, lags(6)
corrgram resB2, lags(6)
corrgram resB3, lags(6)

** TABLE 7.5: For LB test use the (standardized) Pearson residuals
corrgram pearsZQ0, lags(6)
corrgram pearsZQ1, lags(6)
corrgram pearsZQ2, lags(6)
corrgram pearsZQ3, lags(6)
corrgram pearsZQ1c, lags(6)
corrgram pearsB0, lags(6)
corrgram pearsB1, lags(6)
corrgram pearsB2, lags(6)
corrgram pearsB3, lags(6)

*** FIGURE 7.3: STRIKES PREDICTED FROM A DYNAMIC MODEL

graph twoway (line STRIKES MONTH, lwidth(medthick))                   ///
  (line yhatB1 MONTH, lpattern(dash) lwidth(medthick)),               ///
  scale(1.2) legend(ring(0) rows(2) pos(12) label(1 "Actual strikes") ///
  label(2 "Predicted strikes")) ytitle("Strikes: actual and predicted") 
graph export racd07fig3.eps, replace
graph export racd07fig3.wmf, replace

* Try Brannas model with rho1 varying with regressors
nl (STRIKES = (1/(1 + exp(-{gamma1}-{gamma2}*OUTPUT)))*yL1 + ///
  exp({beta1}+{beta2}*OUTPUT) ) if yL1 != ., initial(gamma1 0.1 gamma2 0.1) vce(robust)
predict yhatB1z, yhat
correlate STRIKES yhatB1z
predict resB1z, residual
generate pearsB1z = resB3 / sqrt(yhatB1z)
corrgram resB1z, lags(6)
corrgram pearsB1z, lags(6)
* The range of values of rho1
generate rho1 = 1/(1 + exp(-_b[/gamma1]-_b[/gamma2]*OUTPUT)) 
summarize rho1

**********  7.11.2 DYNAMIC REGRESSION FOR STOCK TRADES DATA

use racd07data2stocktrades.dta, clear

describe
summarize
tabulate glt

*** FIGURE 7.4: HISTOGRAM AND TIME SERIES OF NUMBER OF TRADES

histogram glt, scale(1.9) saving(histogram, replace) 
line glt t if t < 76, scale(1.9) saving(timeseries, replace)
graph combine histogram.gph timeseries.gph, ysize(3) xsize(6) iscale(0.7)
graph export racd07fig4.wmf, replace
graph export racd07fig4.eps, replace

* There is considerable autocorrelation
corrgram glt, lags(100)

* Poisson intercept-only
poisson glt, vce(robust)
estimates store INTONLY

* Poisson with just trigonometric terms
poisson glt x1 x2 x3 x4, vce(robust)
estimates store STATIC
test x1 x2 x3 x4
predict yhatP0, n
predict resP0, score   
generate pearsP0 = resP0 / sqrt(yhatP0)

tsline yhatP0 in 1/150

* Poisson with one lag of Pearson residual
poisson glt x1 x2 x3 x4 L.pearsP0, vce(robust)
estimates store P1
predict yhatP1, n
predict resP1, score   
generate pearsP1 = resP1 / sqrt(yhatP1)

* Poisson with two lags of Pearson residual
poisson glt x1 x2 x3 x4 L.pearsP0 L2.pearsP0, vce(robust)
estimates store P2
predict yhatP2, n
predict resP2, score   
generate pearsP2 = resP2 / sqrt(yhatP2)

* Poisson with three lags of Pearson residual
poisson glt x1 x2 x3 x4 L.pearsP0 L2.pearsP0 L3.pearsP0, vce(robust)
estimates store P3
predict yhatP3, n
predict resP3, score   
generate pearsP3 = resP3 / sqrt(yhatP3)

* Poisson with Zeger-Qaqish one lag of y as regressor
generate gltstar = glt 
replace gltstar = 0.5 if gltstar == 0
generate lngltstar = ln(gltstar)
summarize gltstar glt
poisson glt x1 x2 x3 x4 L.lngltstar, vce(robust)
estimates store ZQ1
predict yhatZQ1, n
predict resZQ1, score
generate pearsZQ1 = resZQ1 / sqrt(yhatZQ1)

* Brannas Conditional NL of INAR(1) model
generate one = 1
global XLIST x1 x2 x3 x4
generate gltL1 = L.glt

* Compare NLS of static model with Poisson of static model earlier
nl (glt = exp({xb: $XLIST one})), vce(robust)
estimates store B0
predict yhatB0, yhat
predict resB0, residual
generate pearsB0 = resB0 / sqrt(yhatB0) 

* Brannas Conditional NL of INAR(1) model
nl (glt = {rho1}*gltL1 + exp({xb: $XLIST one}) ) if gltL1 != ., initial(rho1 0.3) vce(robust)
estimates store B1
predict yhatB1, yhat
predict resB1, residual
generate pearsB1 = resB1 / sqrt(yhatB1) 

*** TABLE 7.6 STOCK TRADES: STATIC AND DYNAMIC MODEL ESTIMATES

estimates table INTONLY STATIC ZQ1 P1 P3 , b(%9.3f) se 

estimates table B1, b(%9.3f) se 

* Correlations
correlate glt yhatP0 yhatZQ1 yhatB1 yhatP1 yhatP3
summarize glt yhatP0 yhatZQ1 yhatB1 yhatP1 yhatP3

* For Ljung-Box test use the Pearson residuals
corrgram glt, lags(10)
corrgram pearsP0, lags(10)
corrgram pearsZQ1, lags(10)
corrgram pearsB1, lags(10)
corrgram pearsP1, lags(10)
corrgram pearsP3, lags(10)

********** CLOSE OUTPUT

* log close
* clear
* exit
