// This is a short bit of code to estimate a linear regression model that allows
// the residual variance (actually the natural log of the standard deviation) to
// be a function of a set of predictor variables. The model is estimated by 
// maximum likelihood. 
//
// This sort of model is discussed pp 28-34 of Scott. R. Eliason (1993)
// Maximum Likelihood Estimation: Logic and Practice, Sage. 
// Eliason calls it the heteroscedastic normal pdf model.
//
// Bruce Western and Deirdre Bloome discuss various ways of doing this sort of
// thing in their 2009 Sociological Methodology article 'Variance Function
// Regressions for Studying Inequality'
//
// Western and Bloome give the Stata code for one of their methods at the end of
// their article. You should note that there is a trivial typo in the printed
// version of their code that will generate an error message. I've corrected
// the typo and give their code below mine. I find my way of doing things much
// more direct, but I haven't investigated how it performs in very many cases
// and there may be advantages to their method which I haven't appreciated. In 
// any case if you use the Western/Bloome code you should acknowledge them and 
// not me!

// Begin by generating some data. A binary variable x1, a continuous variable x2
// and a response variable y1. The important feature of the data is that 
// the residual variation is much greater if x1 = 1 than if x1 = 0. This is
// what we want to recover from the model.

set obs 10000
gen z = uniform()
gen x1 = 0 if z <0.5
replace x1 =1 if z >= 0.5
gen x2 = rnormal(0,5)
gen y1 = 1.5*x1 + 0.5*x2 + rnormal(0,1) if x1==0
replace y1 = 1.5*x1 + 0.5*x2 + rnormal(0,5) if x1==1

// Now establish the target

tab x1, summ(y1)

// Note what the  standard deviation is if x1 = 0 and if x1 = 1

// Now define the likelihood to be maximised and the arguments

capture program drop hetnormal
program define hetnormal
    version 14.1
	args lnf mu lnsigma
	quietly replace `lnf' = ///
	ln(normalden( $ML_y1, `mu', exp(`lnsigma')))
end

// This bit defines the model and tells stata to maximise the likelihood
// We'll start  with just one predictor variable - x1

ml model lf hetnormal ///
   (mu: y1 = x1 ) /// 
   (lnsigma: x1), ///
   diparm (lnsigma, exp label("sigma"))
ml check
ml max 

// The slopes for the means are reported in the panel labeled mu.
// The slopes for the  log standard deviation are reported in  the panel lnsigma
//
// sigma is exp(_cons) ie the standard deviation if x1 = 0
// The standard deviation if x1 = 1 is exp(x1 + _cons)



// This is the Western/Broome code
// NB the scale for the model of the residual variation is log of the variance
// ie to compare the results with those produced by my code you need to 
// exponentiate and take the square root

reg y1 x1  
predict R, r                                 
gen R2=R^2 
glm R2 x1, family(gamma) link(log)                                   
predict S2, mu                               
gen LOGLIK = (-.5*(ln(S2)+(R2/S2) ))            
egen LL0 = sum(LOGLIK)                       
di LL0 

* Updating beta and lambda coefficients;
gen DLL=1                                     
while DLL > .00000001 { 
drop R 
quietly: reg y1 x1  [aw=1/S2]              
drop S2 
predict R, r                                 
replace R2=R^2                               
est store BETA                                
quietly:
glm R2 x1, family(gamma) link(log)            
predict S2, mu                                
est store LAMBDA                             
replace LOGLIK = (-.5*(ln(S2)+(R2/S2)))         
egen LLN = sum(LOGLIK)                        
di LLN 
replace DLL=LLN-LL0                          
replace LL0=LLN 
drop LLN 
} 
est table BETA LAMBDA, b se  


// This adds another predictor  which is continuous x2. It should have no effect on residual variation

ml model lf hetnormal ///
   (mu: y1 = x1 x2) /// 
   (lnsigma: x1 x2), ///
   diparm (lnsigma, exp label("sigma"))
ml check
ml max