eAppendix: Collider-stratification bias due to censoring in prospective cohort studies
/*********************************************************************
This macro calculates the observable parameter estimates from a cohort study
with censoring given the following structural equations.
The variables E, C, U and Y are binomial variables with probabilities:
prob(E) = prevE
prob(U) = prevU
prob(Y) = prevY + a*U + te*E
prob(C) = prevC + b*U + c*E + d*U*E
wherea,b,c,d,te are parameterized as causal risk differences.
The observed estimates in the selected (i.e. non-censored) are defined as
risk difference: prob(Y=1|E=1,C=0)-prob(Y=1|E=0,C=0)
risk ratio: prob(Y=1|E=1,C=0)/prob(Y=1|E=0,C=0)
These observed estimates can be compared to causal effects,
which since no confounding exists, can be calculated as
risk difference: prob(Y=1|E=1)-prob(Y=1|E=0)
risk ratio: prob(Y=1|E=1)/prob(Y=1|E=0)
**********************************************************************/
DM 'log; clear; out; clear;';
%macrocolliderstrat (out=,prevE=,prevU=,prevY=,prevC=,a=,b=,c=,d=,te=);
data &out;
doprevE = &prevE;* The prevalence of the exposure E;
doprevU = &prevU;* The prevalence of unmeasured factor U;
doprevY = &prevY;* The baseline risk of disease Y;
doprevC = &prevC;* The baseline probability of censoring;
do a = &a;* Strength of U->Y effect;
do b = &b;* Strength of U->C effect;
do c = &c;* Strength of E->C effect;
do d = &d;* Strength of U*E->C interaction effect;
dote = &te;* Strength of E->Y effect, i.e. the true effect;
* Probabilities in the censored;
pE1U1Y1C1 = prevE*prevU*(prevY+a+te)*(prevC+b+c+d);
pE1U1Y0C1 = prevE*prevU*(1-(prevY+a+te))*(prevC+b+c+d);
pE0U1Y1C1 = (1-prevE)*prevU*(prevY+a)*(prevC+b);
pE0U1Y0C1 = (1-prevE)*prevU*(1-(prevY+a))*(prevC+b);
pE1U0Y1C1 = prevE*(1-prevU)*(prevY+te)*(prevC+c);
pE1U0Y0C1 = prevE*(1-prevU)*(1-(prevY+te))*(prevC+c);
pE0U0Y1C1 = (1-prevE)*(1-prevU)*(prevY)*(prevC);
pE0U0Y0C1 = (1-prevE)*(1-prevU)*(1-prevY)*(prevC);
* Probabilities in the uncensored;
pE1U1Y1C0 = prevE*prevU*(prevY+a+te)*(1-(prevC+b+c+d));
pE1U1Y0C0 = prevE*prevU*(1-(prevY+a+te))*(1-(prevC+b+c+d));
pE0U1Y1C0 = (1-prevE)*prevU*(prevY+a)*(1-(prevC+b));
pE0U1Y0C0 = (1-prevE)*prevU*(1-(prevY+a))*(1-(prevC+b));
pE1U0Y1C0 = prevE*(1-prevU)*(prevY+te)*(1-(prevC+c));
pE1U0Y0C0 = prevE*(1-prevU)*(1-(prevY+te))*(1-(prevC+c));
pE0U0Y1C0 = (1-prevE)*(1-prevU)*(prevY)*(1-(prevC));
pE0U0Y0C0 = (1-prevE)*(1-prevU)*(1-prevY)*(1-prevC);
* Marginal probabilities across strata of U;
*in the censored;
pE1Y1C1 = pE1U1Y1C1 + pE1U0Y1C1;
pE1Y0C1 = pE1U1Y0C1 + pE1U0Y0C1;
pE0Y1C1 = pE0U1Y1C1 + pE0U0Y1C1;
pE0Y0C1 = pE0U1Y0C1 + pE0U0Y0C1;
* in the uncensored;
pE1Y1C0 = pE1U1Y1C0 + pE1U0Y1C0;
pE1Y0C0 = pE1U1Y0C0 + pE1U0Y0C0;
pE0Y1C0 = pE0U1Y1C0 + pE0U0Y1C0;
pE0Y0C0 = pE0U1Y0C0 + pE0U0Y0C0;
* marginal probabilities in the population;
pE1Y1 = pE1Y1C1 + pE1Y1C0;
pE1Y0 = pE1Y0C1 + pE1Y0C0;
pE0Y1 = pE0Y1C1 + pE0Y1C0;
pE0Y0 = pE0Y0C1 + pE0Y0C0;
*probability of censoring by exposure, status;
pCen = (pE1Y1C1+pE1Y0C1+pE0Y1C1+pE0Y0C1)/1;
labelpCen= "Proportion Censored Overall";
pCenExpos = (pE1Y1C1+pE1Y0C1)/(pE1Y1C1+pE1Y0C1 + pE1Y1C0+pE1Y0C0);
labelpCenExpos= "Proportion Censored among the Exposed";
pCenUnexp = (pE0Y1C1+pE0Y0C1)/(pE1Y1C1+pE1Y0C1 + pE1Y1C0+pE1Y0C0);
labelpCenUnexp= "Proportion Censored among the Unexposed";
*probability of censoring by E, U;
pCen_EU =(pE1U1Y1C1+pE1U1Y0C1)/ (pE1U1Y1C1+pE1U1Y0C1+pE1U1Y1C0+pE1U1Y0C0);
labelpCen_EU= "Proportion Censored, E=1, U=1";
pCen_noEU = pE0U1Y1C1+pE0U1Y0C1)/ (pE0U1Y1C1+pE0U1Y0C1+pE0U1Y1C0+pE0U1Y0C0);
labelpCen_noEU= "Proportion Censored, E=0, U=1";
pCen_EnoU =(pE1U0Y1C1+pE1U0Y0C1 / (pE1U0Y1C1+pE1U0Y0C1+pE1U0Y1C0+pE1U0Y0C0);
labelpCen_EnoU= "Proportion Censored, E=1, U=0";
pCen_noEnoU=(pE0U0Y1C1+pE0U0Y0C1)/ (pE1U0Y1C1+pE1U0Y0C1+pE1U0Y1C0+pE1U0Y0C0);
labelpCen_noEnoU= "Proportion Censored, E=0, U=0";
*probability of outcome;
pYobs = (pE1Y1C0+pE0Y1C0)/(pE1Y1C0+pE0Y1C0+pE1Y0C0+pE0Y0C0);
labelpYobs= "Probability of Outcome among Observed";
pYcen = (pE1Y1C1+pE0Y1C1)/(pE1Y1C1+pE0Y1C1+pE1Y0C1+pE0Y0C1);
labelpYcen= "Probability of Outcome among Censored";
*true risk;
pY1 = (pE1Y1C0+pE0Y1C0+pE1Y1C1+pE0Y1C1)/1; label pY1 = "true population risk";
*observable effects in the uncensored;
rYE1C0 = pE1Y1C0/(pE1Y1C0+pE1Y0C0); label rYE1C0 = "observed risk in the exposed";
rYE0C0 = pE0Y1C0/(pE0Y1C0+pE0Y0C0); label rYE0C0 = "observed risk in the unexposed";
RDC0 = rYE1C0-rYE0C0; label RDC0 = "Observed Risk Difference";
RRC0 = rYE1C0/rYE0C0; label RRC0 = "Observed Risk Ratio";
ORC0 = (rYE1C0/(1-rYE1C0))/(rYE0C0/(1-rYE0C0)); label ORC0 = "Observed Odds Ratio";
*true effects;
rYE1 = pE1Y1/(pE1Y1+pE1Y0); label rYE1 = "true risk in the exposed";
rYE0 = pE0Y1/(pE0Y1+pE0Y0); label rYE0 = "true risk in the unexposed";
RDt = rYE1-rYE0; label RDt = "True Risk Difference";
RRt = rYE1/rYE0; label RRt = "True Risk Ratio";
ORt = (rYE1/(1-rYE1))/(rYE0/(1-rYE0)); label ORt = "True Odds Ratio";
* Check to make sure all probabilities are between 0 and 1;
* if not, write a warning to the log;
if (min(of pE1U1Y1C1--pE0Y0C0) > 0) and (max(of pE1U1Y1C1--pE0Y0C0) <1) then output;
else putlog "*** Invalid parameters ***";
drop pE1U1Y1C1--pE0Y0;
formatpCen--ORt5.4;
end;end;end;end;end;end;end;end;end;
run;
quit;
%mend;
/*to call macro*/
%colliderstrat (
out=,/* name to call the dataset being created */
prevE=,/* exposure prevalence (between 0 and 1) */
prevU=,/* U prevalence (between 0 and 1) */
prevY=,/* baseline risk (between 0 and 1) */
prevC=,/* baseline censoring (between 0 and 1) */
a=,/* RD comparing U to no U (between 0 and 1) */
b=,/* U effect on censoring prob (between 0 and 1)*/
c=,/* exposure effect on censoring prob (between 0 and 1)*/
d=,/* interaction effect on censoring prob (between 0 and 1)*/
te=/* true RD with exposure (between 0 and 1) */
);
quit;