/*What if there are missing a/p/c - i.e. it doesn't occur in the data*/ /*Version control - some options only work with stata 11*/ program apc, eclass version 11.2 syntax varname [if] [in], Age(varname) Period(varname) Cohort(varname) [Lowerbound(real 123456789) Upperbound(real 123456789) /// ptol(real 1e-10) vtol(real 1e-10) maxiter(integer 2000000) maxpeniter(integer 100) /// technique(string) method(string) graph(string) ] marksample touse set more off /*Generating dummies for age, period and cohort*/ qui: tab `age' if `touse', gen(adum) qui: tab `period' if `touse', gen(pdum) qui: tab `cohort' if `touse', gen(cdum) /*Generate macors that holds all the dummies while counting the number of variables*/ local numvars = 0 local counter = 1 local tempagedummies local tempagedummiesexfirst foreach var of varlist adum* { local numvars = `numvars' + 1 tempvar a`counter' gen `a`counter''= `var' local tempagedummies `tempagedummies' `a`counter'' if `counter' ~=1 local tempagedummiesexfirst `tempagedummiesexfirst' `a`counter'' local counter = `counter' + 1 } local counter = 1 local tempperioddummies local tempperioddummiesexfirst foreach var of varlist pdum* { local numvars = `numvars' + 1 tempvar p`counter' gen `p`counter''= `var' local tempperioddummies `tempperioddummies' `p`counter'' if `counter' ~=1 local tempperioddummiesexfirst `tempperioddummiesexfirst' `p`counter'' local counter = `counter' + 1 } local counter = 1 local tempcohortdummies local tempcohortdummiesexfirst foreach var of varlist cdum* { local numvars = `numvars' + 1 tempvar c`counter' gen `c`counter''= `var' local tempcohortdummies `tempcohortdummies' `c`counter'' if `counter' ~=1 local tempcohortdummiesexfirst `tempcohortdummiesexfirst' `c`counter'' local counter = `counter' + 1 } /*Dropping the non-temporary dummy variables*/ drop *dum* /*Checking the rank of the matrix of dependentn variables*/ local tempalldummies `tempagedummies' `tempperioddummies' `tempcohortdummies' tempname rankofmat mata: checkrank("`tempalldummies'", "`touse'", "`rankofmat'") if `rankofmat' >= `numvars' -2 { di in green "Matrix of values of dependent variables has full rank" di in green "Coefficients are identified without any restrictions (i.e OLS 'works')" di in green "But we'll continue anyway estimating using either Deaton/Paxson or Maximum Entropy as specified" } if ("`method'"=="me") & (`numvars'>10) { di in green "You have specified maximum entropy estimation with `numvars' dependent variables" di in green "This requires the estimation of 2^`counter' probabilities (a large number: see http://scivestor.com/insight/?p=308)" di in green "This problem will require a lot of memory and estimation, and even if possible, will be very slow" di di in green "If estimation is not possible try using generalised maximum entropy by specifying method(gme)" } /*Parse the technique option */ if ("`technique'"=="") { local technique "nr" } if ("`technique'"~="nr") & ("`technique'"~="nm") & ("`technique'"~="dfgs") & ("`technique'"~="dfp") { di in red "technique, if specificed, must be one of nr, dfp, dfgs or nm" exit } if ("`technique'"=="nm") { local eval "d0" } else { local eval "d2" } /*Parse the method option */ if ("`method'"=="") { local method "gme" } if ("`method'"~="me") & ("`method'"~="gme") & ("`method'"~="dp") { di in red "method, if specified, must be one of: me, gme or dp" exit } if ("`method'"=="me") | ("`method'"=="gme") { if ("`method'"=="me") { local gme = 0 } else { local gme = 1 /*Check bounds have been inputted if me or gme options have been selected*/ if `lowerbound' == 123456789 & `upperbound' == 123456789 { di in red "If using maximum entropy or generalised maximum entropy lower and upper bounds must be supplied" exit } /*Need to check bounds when applying maximum entropy method*/ qui: sum `varlist' if r(min)<`lowerbound' | r(max)>`upperbound' { di in red "At least one value of the dependent variable is outside the bounds specified" exit } } /*Calling the maximum entropy procedure*/ tempname b V N mata: forapc("`varlist'", /// "`tempagedummies'", /// "`tempperioddummies'", /// "`tempcohortdummies'", /// "`touse'", /// `lowerbound', /// `upperbound', /// `gme', /// `ptol', /// `vtol', /// `maxiter', /// `maxpeniter', /// "`technique'", /// "`eval'", /// "`b'", /// "`V'", /// "`N'" ) mat def coeffs = `b' } else if ("`method'"=="dp") { /*Do the Deaton-Paxson method*/ local forconstraint 0 local plus "+" local equals "=" foreach var of varlist `tempperioddummiesexfirst' { local forconstraint `forconstraint' `plus' `var' } local forconstraint `forconstraint' `equals' 0 constraint 1 `forconstraint' qui: cnsreg `varlist' `tempagedummiesexfirst' `tempperioddummiesexfirst' `tempcohortdummiesexfirst', constraints(1) tempname b tempname V mat def `b' = e(b) mat def `V' = e(V) local N = r(N) } mat def coeffs = `b' mat def vcv = `V' /*PRINTING RESULTS*/ /*Let's generate names for each variable*/ /*Also create matrices that hold value of each of the three coefficient vectors separately*/ /*Also create matrices that hold the standard errors and confidence intervals*/ /*Also create matrices that hold the values of the parameters giving rise to the dummy*/ local overallcounter = 1 foreach apcvar in `age' `period' `cohort' { mat def `apcvar'mat = 0 mat def `apcvar'sdmat = . mat def `apcvar'ciuppmat = . mat def `apcvar'cilowmat = . /*Generating locals holding values of each of the apc values*/ qui: levelsof `apcvar' if `touse', local(`apcvar'values) local `apcvar'namelist /*Want to store all but the first value - the first being ommitted*/ local counter = 1 foreach i in ``apcvar'values' { if `counter' ~=1 local `apcvar'namelist ``apcvar'namelist' `apcvar'`i' /*Names*/ if `counter' ~=1 mat `apcvar'mat = `apcvar'mat \ coeffs[1, `overallcounter'] /*Coefficients*/ if `counter' ~=1 mat `apcvar'sdmat = `apcvar'sdmat \ (vcv[`overallcounter', `overallcounter'])^.5 /*standard deviations*/ if `counter' ~=1 mat `apcvar'ciuppmat = `apcvar'ciuppmat \ coeffs[1, `overallcounter'] + 1.96*(vcv[`overallcounter', `overallcounter'])^.5 /*Upper confidence interval*/ if `counter' ~=1 mat `apcvar'cilowmat = `apcvar'cilowmat \ coeffs[1, `overallcounter'] - 1.96*(vcv[`overallcounter', `overallcounter'])^.5 /*Lower confidence interval*/ if `counter' ==1 mat def `apcvar'valmat = `i' if `counter' ~=1 mat `apcvar'valmat = `apcvar'valmat \ `i' if `counter' ~=1 local overallcounter = `overallcounter' + 1 /*There is no entry in the coefficient matrix for the first value of each apcvar*/ local counter = `counter' + 1 } } matrix colnames `b' = ``age'namelist' ``period'namelist' ``cohort'namelist' _cons matrix colnames `V' = ``age'namelist' ``period'namelist' ``cohort'namelist' _cons matrix rownames `V' = ``age'namelist' ``period'namelist' ``cohort'namelist' _cons ereturn post `b' `V', esample(`touse') ereturn scalar N = `N' ereturn local cmd "apc" ereturn display /*Graphing*/ /*Creating variables holding the coefficients and values of each of age, period and cohort coefficients*/ foreach apcvar in `age' `period' `cohort' { svmat `apcvar'mat, names(`apcvar'coeffs) svmat `apcvar'valmat, names(`apcvar'values) svmat `apcvar'ciuppmat, names(`apcvar'ciupp) svmat `apcvar'cilowmat, names(`apcvar'cilow) tempvar `apcvar'coeffs `apcvar'values `apcvar'ciupp `apcvar'cilow qui: gen ``apcvar'coeffs' = `apcvar'coeffs1 qui: gen ``apcvar'values' = `apcvar'values1 qui: gen ``apcvar'ciupp' = `apcvar'ciupp1 qui: gen ``apcvar'cilow' = `apcvar'cilow1 drop `apcvar'coeffs1 `apcvar'values1 `apcvar'ciupp1 `apcvar'cilow1 } if ("`graph'"=="") { local graph "off" } if ("`graph'"== "on, replace") | ("`graph'"== "on,replace") | ("`graph'"== "on, replace"){ /*Graphs with confidence intervals*/ foreach apcvar in `age' `period' `cohort' { twoway (line ``apcvar'ciupp' ``apcvar'values', lpattern(-) color(black)) (line ``apcvar'coeffs' ``apcvar'values', color(black)) (line ``apcvar'cilow' ``apcvar'values',lpattern(-) color(black)), /// ytitle(Coefficients and CIs) xtitle(`apcvar') /// legend(off) saving(`apcvar', replace) } graph combine `age'.gph `period'.gph `cohort'.gph, title("Age, Period, Cohort Coefficients") saving(apc, replace) } else if ("`graph'"== "off") { exit } else if ("`graph'"== "on") { di in red "For graph option 'on, replace' must be specified rather than simple 'on' as command will overwrite graphs with names `age'.gph, `period.gph or `cohort'.gph already saved in working directory)" exit } else { di in red "Graph, if specified must be either 'on, replace' or off)" exit } end version 11.2 mata: mata set matastrict on void forapc(string scalar varname, /// string scalar agedummies, /// string scalar perioddummies, /// string scalar cohortdummies, /// string scalar touse, /// real scalar lowerbound, /// real scalar upperbound, /// real scalar gme, /// real scalar ptol, /// real scalar vtol, /// real scalar maxiter, /// real scalar maxpeniter, /// string scalar technique, /// string scalar eval, /// string scalar b_me, /// string scalar V_me, /// string scalar N ) { real colvector y st_view(y, ., varname, touse) real matrix Da st_view(Da, ., tokens(agedummies), touse) real matrix Dp st_view(Dp, ., tokens(perioddummies), touse) real matrix Dc st_view(Dc, ., tokens(cohortdummies), touse) real scalar Ka real scalar Kp real scalar Kc Ka = cols(Da) Kp = cols(Dp) Kc = cols(Dc) /*Reduced form regression*/ D = Da[.,3..Ka],Dp[.,2..(Kp)], Dc[.,2..(Kc)], J(rows(Da), 1, 1) /*Vector of coefficients and estimate of error variance*/ b = invsym(cross(D,0 , D,0))*cross(D,0 , y,0) errorv = ((y-D*b)'*(y-D*b))/(rows(y)-rows(b)) /*Maximum entropy estimation*/ A = pinv(D)*Da[.,2] , I(cols(D)) K = cols(A) /*Number of structural coefficients*/ if (gme == 1) { /*i.e. if we're doing generalised me*/ betweensupppoints = 2*(upperbound - lowerbound) supppoints = 2 stepsize = betweensupppoints/(supppoints - 1) Supp = J(1, supppoints, 0) Supp[1, 1] = -(upperbound - lowerbound) for (suppix=2; suppix<=supppoints; suppix++) { Supp[1, suppix]= Supp[1, (suppix-1)] + stepsize } } /*if gme==1*/ else if (gme == 0) { Supp = -(upperbound - lowerbound) , (upperbound - lowerbound) } R = cols(Supp) if (gme == 1) { /*i.e. if we're doing generalised me*/ B= (-(upperbound - lowerbound) , (upperbound - lowerbound) ) for (k=2; k<=K; k++) { B = blockdiag(B,Supp) } /* Form the restrictions that the probabilities sum to one*/ u = J(1, R, 1); for (k=2; k<=K; k++) { u = blockdiag(u,J(1, R, 1)) } /*Want the constraint that sum of ps is 1*/ Pi = (b \ J(K, 1, 1)) AB = (A*B \ u) } /*if gme==1*/ else if (gme == 0) { /*i.e. if we're not doing generalised me*/ numstructparams = K numverts = 2^numstructparams elements = Supp /*This block of code creates the vertices of the hypercube*/ B = J(numstructparams,numverts, 0) switchevery = numverts for (rix=1; rix<=numstructparams; rix++) { elix = 0 sinceswitching = 0 switchevery = switchevery/2 for (cix=1; cix<=numverts; cix++) { if (sinceswitching == switchevery) { elix = 1-elix sinceswitching = 1 } else { sinceswitching = sinceswitching + 1 } currelem = elements[1, (1+elix)] B[rix, cix] = currelem } } /*Want the constraint that sum of ps is 1*/ u = J(1, cols(A*B), 1) Pi = (b \ J(1, 1, 1)) AB = (A*B \ u) } /*if gme==0*/ numparams = cols(AB) numconstraints = rows(AB) + cols(AB) /*the former is the number of constraints, including adding to one; the latter is the number of nonnegativity constraints*/ t= J(1, numparams, 1/numparams) /*Starting values*/ counterix = 1 penalty = 0 minusent = 0 conviol = 0 smallestp = -0.1 sigmapviol = 0 iterix = 1 stop = 0 while (smallestp <0 & counterix <=maxpeniter ) { P = 10^iterix T=optimize_init() optimize_init_evaluator(T, &MinusEntropy()) optimize_init_constraints(T,(AB,Pi)) optimize_init_which(T,"min") optimize_init_evaluatortype(T,eval) optimize_init_technique(T, technique) optimize_init_conv_maxiter(T , maxiter) optimize_init_params(T, t) optimize_init_nmsimplexdeltas(T, J(1, numparams, 1)) optimize_init_conv_ptol(T , ptol) optimize_init_conv_vtol(T , vtol) optimize_init_argument(T, 1, P) optimize_init_argument(T, 2, AB) optimize_init_argument(T, 3, Pi) /*optimize_init_tracelevel(T, "none")*/ p=optimize(T) t = optimize_result_params(T) oldminusent = minusent minusent = -cross(t', (ln(t))') /*Calculate penalty for violating constraint*/ penalty = 0 for (tix=1; tix<=cols(t); tix++) { if (t[1, tix] <= 0) { penalty = penalty + ((t[1, tix])^2) } /*if (p[1, pix] < 0)*/ if (t[1, tix] > 1) { penalty = penalty + ((t[1, tix] - 1)^2) } /*if (p[1, pix] < 0)*/ } /*go through all p*/ /*Some tracking of progress*/ oldsmallestp = smallestp smallestp = rowmin((rowmin(t), 0)) /*Set this to 0 if there are no negative ps*/ changinsmallestp = abs(smallestp - oldsmallestp) deltaent = abs(oldminusent - minusent) iterix = iterix + 0.25 printf("Penalty Function Iteration %f\n", counterix) printf("The smallest probability is %f\n", smallestp) printf("Negative entropy (i.e. the function to be maximised subject to constraints) is %f\n", minusent) printf("\n") counterix = counterix + 1 } /*This was just for checking output - can delete*/ t=optimize_result_params(T) var = pinv(A) * (errorv * invsym(D'*D) * (pinv(A))') /*Variance- delta method*/ if (counterix>=maxpeniter) { printf("PENALTY FUNCTION PROCEDURE DID NOT CONVERGE %f\n", 999) printf("COULD NOT FIND FEASIBLE VALUES %f\n", 999) } /*Passing results back to Stata*/ st_matrix(b_me, (B*t')' ) st_matrix(V_me, var) st_numscalar(N, rows(y)) } void MinusEntropy(real scalar todo,real rowvector p,P,AB, Pi,MinusEntropy,g,H) { MinusEntropy = cross(p', (ln(p))') for (pix=1; pix<=cols(p); pix++) { if (p[1, pix] <= 0) { MinusEntropy = MinusEntropy + ((p[1, pix])^2)*P } /*if (p[1, pix] < 0)*/ if (p[1, pix] > 1) { MinusEntropy = MinusEntropy + ((p[1, pix] - 1)^2)*P } /*if (p[1, pix] < 0)*/ } /*loop through all ps*/ /*Adding in the gradient*/ if (todo>=1) { g = J(rows(p), cols(p),0) for (pix=1; pix<=cols(p); pix++) { if (p[1, pix] <=0) { g[1, pix] = 2*p[1, pix]*P } /*if (p[1, pix] < 0)*/ else if (p[1, pix] >1) { g[1, pix] = 2*(p[1, pix] - 1)*P + 1 + ln(p[1, pix]) } /*if p = 0*/ else { g[1, pix] = 1 + ln(p[1, pix]) } } /*loop through all ps*/ if (todo==2) { H = J(cols(p), cols(p), 0) for (pix=1; pix<=cols(p); pix++) { if (p[1, pix] <= 0) { H[pix, pix] = 2*P } /*if (p[1, pix] <= 0)*/ else if (p[1, pix] >1) { H[pix, pix] = 2*P + 1/p[1, pix] } /*if p = 0*/ else { H[pix, pix] = 1/p[1, pix] } /*if (p[1, pix] > 0)*/ } /*loop through all ps*/ } /*If todo==2*/ } /*If todo>=1*/ } void checkrank(string scalar variables, string scalar touse, string scalar rankofmat) { st_view(mattocheck, .,variables, touse) rank = rank(mattocheck) st_numscalar(rankofmat, rank) } end