/// Here is a very simple simulation to illustrate regression to the mean ///
/// It has been claimed that something along these lines is responsible for ///
/// Feinsteins "crossover" result. ///
clear
/// Make a correlation matrix which contains the correlation of SES with ability ///
/// You can play around with the numerical value for this correlation to see what ///
/// happens. ///
matrix input c = (1, .7\.7 1)
matrix list c
/// Set the seed for random number generator. ///
set seed 12345
/// Create 1000 observations. ///
set obs 1000
/// Generate an id number for the observations. ///
gen id=_n
/// Now generate to random variables with a given correlation - s is SES and x ///
/// is true ability. ///
corr2data s x, corr(c)
/// Assume that true ability does not change over measurement occasions but observed ///
/// ability is contaminated with measurement error is influenced by tranistory factors. ///
/// Measurement error is uncorrelated with true score and is uncorrelated with itself ///
/// over occasions. Generate observations for 4 time periods (occasions). The amount of ///
/// measurement error can be adjusted. ///
gen y_1= x+(0.5*rnormal())
gen y_2= x+(0.5*rnormal())
gen y_3= x+(0.5*rnormal())
gen y_4= x+(0.5*rnormal())
/// Examine the summary statistics for the observed measurements by occasion. ///
su y_1 y_2 y_3 y_4
/// Sort the data on SES. ///
sort s
/// Generate 3 SES groups - top and bottom quartiles and middle 50%. ///
gen ses_group=2
replace ses_group=1 if _n <=250
replace ses_group=3 if _n >=750
/// Sort the data on ability. ///
sort y_1
/// Now generate 3 ability groups based on the observed scores on the first measurement ///
/// occasion. ///
gen ry_1=_n
gen y1_q=2
replace y1_q=1 if _n <=250
replace y1_q=3 if _n >=750
/// Generate variables that contain the ranks for each occasion - note for the first ///
/// occasion this was created earlier. ///
sort y_2
gen ry_2=_n
sort y_3
gen ry_3=_n
sort y_4
gen ry_4=_n
/// Now examine the averages for each occasion for each of the four groups of interest ///
/// High SES low ability, High SES high ability etc... ///
su ry_1 ry_2 ry_3 ry_4 if ses_group==3 & y1_q==1
su ry_1 ry_2 ry_3 ry_4 if ses_group==3 & y1_q==3
su ry_1 ry_2 ry_3 ry_4 if ses_group==1 & y1_q==1
su ry_1 ry_2 ry_3 ry_4 if ses_group==1 & y1_q==3
/// Now we want to make a line graph for the four groups by four occasions. ///
/// One way to do this is to reshape the data from wide to long. ///
reshape long ry_, i(id) j(t)
collapse ry_, by(t ses_group y1_q)
/// Now make the graph. ///
graph twoway (line ry t if ses_group==3 & y1_q==1) ///
(line ry t if ses_group==3 & y1_q==3) ///
(line ry t if ses_group==1 & y1_q==1) ///
(line ry t if ses_group==1 & y1_q==3) ///
, legend(label(1 "High SES low ability") label (2 "High SES high ability") ///
label (3 "Low SES low ability") label (4 "Low SES high ability"))
/// You can see the Feinstein pattern over the first three time points. ///
/// The lines don't cross because the model assumes the "regression towards the mean " ///
/// effect is dissipated after occassion 2. Nesselroade et al discuss this kind of thing. ///