* MMA26P1MELOGIT.DO January 2007 for Stata version 8.0
clear
capture log close
log using mma26p1melogit.txt, text replace
********** OVERVIEW OF MMA26P1MELOGIT.DO **********
* STATA Program by A. Colin Cameron and Pravin K. Trivedi (2005) for
* "Microeconometrics: Methods and Applications, Cambridge University Press
* Chapter 26.5 pp. 919-920 measurement error in a logit model
* (1) Table 26.1 Attenuation bias in a logit model with measurement error
* THIS PROGRAM DIFFERS FROM THE PROGRAM THAT CREATED THE TABLE GIVEN IN THE BOOK.
* THIS PROGRAM GIVES QUITE DIFFERENT RESULTS TO THOSE IN THE BOOK.
* THE ATTENUATION BIAS IS ACTUALLY VERY SIMILAR TO THAT FOR OLS.
* MOST LIKELY EXPLANATION IS THAT THE RESULTS IN THE BOOK ARE IN ERROR.
* See MMA26P2MENONLIN.DO for program that created Table 26.2
* NOTE: Because this is a simulation using many samples (here 1000)
* the generated data are not saved in a text file.
* Problem can arise if in one of the simulations all of sample is y=0 or y=1
* Then the logit model is not estimable. Should not happen here as N=1000
********** SETUP **********
set more off
version 8.0
set scheme s1mono /* Graphics scheme */
********** SIMULATION OVERVIEW **********
* The data generating process is
* Logit with Pr[y=1] = Phi(b1 + b2*x2star)
* where xstar ~ U[0,1]
* So generate as
* ystar = b1 + b2*xstar + e, where e ~ logistic
* y = 1 if ystar > 0
* = 0 if ystar <= 0
* where we set b1 = 1 and b2 = 1
* The measurement error problem is that xstar is not observed
* Instead we observe
* x = xstar + v
* v ~ N[0, s2_v]
* The noise-to-signal ratio is
* s2_v / s2_xstar
* = s2_v / (1/12)
* = 12*s2_v since V[U[0,1]] = 1/12
* and Table 26.1 uses s2_v values: 0 .04/12 .25/12 1/12 4/12 9
* with noise/signal ratio: 0 .04 .25 1 4 9/12
* The sample size N is set below in the global numobs
* The number of simulations S is set below in the global numsims
* The noise-signal ratio is given as an argument.
* The simulation is done using stata command simulate
* An alternative is to use postfile - see mma07p3montecarlo.do
********** INITIAL SIMULATION SET UP **********
* Change the following for different sample size N
global numobs "1000"
* Change the following for different number of simulations S
global numsims "1000"
scalar b1true = 1
scalar b2true = 2
********** GENERATE THE TRUE DATA ONCE AS ILLUSTRATION **********
* Generate logistic rv using the transformation method
* The logistic cdf of v is exp(e)/(1+exp(e) = 1/(1-exp(-e))
* So u = 1/(1-exp(-e)) ==> 1/u = 1-exp(-e) ==> exp(-e) = (1-u)/u
* ==> exp(e) = u/(1-u) ==> e = ln{u/(1-u)} where u ~ U[0,1]
set seed 10101
set obs $numobs
gen xstar = uniform()
gen u = uniform()
gen e = ln(u/(1-u))
gen ystar = 0
replace y = 1 if b1true + b2true*xstar + e > 0
* Following will give consistent estimates as uses true data
* But for small sample b1 and b2 are measured with error
* and will differ from the theoretical values
logit y x
****** ANALYSIS: SIMULATION STUDY MEASUREMENT ERROR IN LOGIT *******
* The program is rclass.
* This means the results returned by the program are put into r( )
* Here we return meany, vary, betahat, sebetahat, ztestforbetaeq1
* This version of the program instead redraws both x and y in each simulation
* The program has one argument
* - s2_v = the variance of the measurement error in x
program logitme, rclass
version 8.0
/* The argument s2_v is the meas. error variance for xstar */
args s2_v
/* Generate the data: here x and y */
drop _all
set obs $numobs
gen xstar = uniform()
gen v = sqrt(s2_v)*invnorm(uniform()) /* measurement error */
gen x = xstar + v
gen u = uniform()
gen e = ln(u/(1-u))
gen ystar = 0
replace y = 1 if b1true + b2true*xstar + e > 0
logit y x
return scalar b1=_b[_cons]
return scalar b2 = _b[x]
end
* Now call the program logitme where
* - include values for each argument within the quotes " "
* (here the argument is s2_v which takes different values
* - make sure that ask for each of the returned results
* TABLE 26.1 p.919 First column - no measurement error
scalar s2_v = 0
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
sum b1
scalar avealphahat = r(mean)
sum b2
scalar avebetahat = r(mean)
di "COLUMN 1" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
* TABLE 26.1 p.919 Second column - noise/signal = 0.04
scalar s2_v = 0.04/12
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
quietly sum b1
scalar avealphahat = r(mean)
quietly sum b2
scalar avebetahat = r(mean)
di "COLUMN 1" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
* TABLE 26.1 p.919 Third column - noise/signal = 0.25
scalar s2_v = 0.25/12
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
quietly sum b1
scalar avealphahat = r(mean)
quietly sum b2
scalar avebetahat = r(mean)
di "COLUMN 3" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
* TABLE 26.1 p.919 Fourth column - noise/signal = 1
scalar s2_v = 1/12
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
di "Noise signal ratio = " 12*s2_v " leads to average intercept and slope : "
summarize
quietly sum b1
scalar avealphahat = r(mean)
quietly sum b2
scalar avebetahat = r(mean)
di "COLUMN 4" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
* TABLE 26.1 p.919 Fifth column - noise/signal = 4
scalar s2_v = 4/12
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
quietly sum b1
scalar avealphahat = r(mean)
quietly sum b2
scalar avebetahat = r(mean)
di "COLUMN 5" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
* TABLE 26.1 p.919 Sixth column - noise/signal = 4
scalar s2_v = 9/12
set seed 10101
simulate "logitme s2_v" b1=r(b1) b2=r(b2), reps($numsims)
quietly sum b1
scalar avealphahat = r(mean)
quietly sum b2
scalar avebetahat = r(mean)
di "COLUMN 6" _n "Noise signal ratio = " 12*s2_v " leads to " _n ///
"average intercept: " avealphahat _n "average slope: " avebetahat
********** CLOSE OUTPUT **********
log close
clear
exit