eAppendix: Collider-stratification bias due to censoring in prospective cohort studies

/*********************************************************************

This macro calculates the observable parameter estimates from a cohort study

with censoring given the following structural equations.

The variables E, C, U and Y are binomial variables with probabilities:

prob(E) = prevE

prob(U) = prevU

prob(Y) = prevY + a*U + te*E

prob(C) = prevC + b*U + c*E + d*U*E

wherea,b,c,d,te are parameterized as causal risk differences.

The observed estimates in the selected (i.e. non-censored) are defined as

risk difference: prob(Y=1|E=1,C=0)-prob(Y=1|E=0,C=0)

risk ratio: prob(Y=1|E=1,C=0)/prob(Y=1|E=0,C=0)

These observed estimates can be compared to causal effects,

which since no confounding exists, can be calculated as

risk difference: prob(Y=1|E=1)-prob(Y=1|E=0)

risk ratio: prob(Y=1|E=1)/prob(Y=1|E=0)

**********************************************************************/

DM 'log; clear; out; clear;';

%macrocolliderstrat (out=,prevE=,prevU=,prevY=,prevC=,a=,b=,c=,d=,te=);

data &out;

doprevE = &prevE;* The prevalence of the exposure E;

doprevU = &prevU;* The prevalence of unmeasured factor U;

doprevY = &prevY;* The baseline risk of disease Y;

doprevC = &prevC;* The baseline probability of censoring;

do a = &a;* Strength of U->Y effect;

do b = &b;* Strength of U->C effect;

do c = &c;* Strength of E->C effect;

do d = &d;* Strength of U*E->C interaction effect;

dote = &te;* Strength of E->Y effect, i.e. the true effect;

* Probabilities in the censored;

pE1U1Y1C1 = prevE*prevU*(prevY+a+te)*(prevC+b+c+d);

pE1U1Y0C1 = prevE*prevU*(1-(prevY+a+te))*(prevC+b+c+d);

pE0U1Y1C1 = (1-prevE)*prevU*(prevY+a)*(prevC+b);

pE0U1Y0C1 = (1-prevE)*prevU*(1-(prevY+a))*(prevC+b);

pE1U0Y1C1 = prevE*(1-prevU)*(prevY+te)*(prevC+c);

pE1U0Y0C1 = prevE*(1-prevU)*(1-(prevY+te))*(prevC+c);

pE0U0Y1C1 = (1-prevE)*(1-prevU)*(prevY)*(prevC);

pE0U0Y0C1 = (1-prevE)*(1-prevU)*(1-prevY)*(prevC);

* Probabilities in the uncensored;

pE1U1Y1C0 = prevE*prevU*(prevY+a+te)*(1-(prevC+b+c+d));

pE1U1Y0C0 = prevE*prevU*(1-(prevY+a+te))*(1-(prevC+b+c+d));

pE0U1Y1C0 = (1-prevE)*prevU*(prevY+a)*(1-(prevC+b));

pE0U1Y0C0 = (1-prevE)*prevU*(1-(prevY+a))*(1-(prevC+b));

pE1U0Y1C0 = prevE*(1-prevU)*(prevY+te)*(1-(prevC+c));

pE1U0Y0C0 = prevE*(1-prevU)*(1-(prevY+te))*(1-(prevC+c));

pE0U0Y1C0 = (1-prevE)*(1-prevU)*(prevY)*(1-(prevC));

pE0U0Y0C0 = (1-prevE)*(1-prevU)*(1-prevY)*(1-prevC);

* Marginal probabilities across strata of U;

*in the censored;

pE1Y1C1 = pE1U1Y1C1 + pE1U0Y1C1;

pE1Y0C1 = pE1U1Y0C1 + pE1U0Y0C1;

pE0Y1C1 = pE0U1Y1C1 + pE0U0Y1C1;

pE0Y0C1 = pE0U1Y0C1 + pE0U0Y0C1;

* in the uncensored;

pE1Y1C0 = pE1U1Y1C0 + pE1U0Y1C0;

pE1Y0C0 = pE1U1Y0C0 + pE1U0Y0C0;

pE0Y1C0 = pE0U1Y1C0 + pE0U0Y1C0;

pE0Y0C0 = pE0U1Y0C0 + pE0U0Y0C0;

* marginal probabilities in the population;

pE1Y1 = pE1Y1C1 + pE1Y1C0;

pE1Y0 = pE1Y0C1 + pE1Y0C0;

pE0Y1 = pE0Y1C1 + pE0Y1C0;

pE0Y0 = pE0Y0C1 + pE0Y0C0;

*probability of censoring by exposure, status;

pCen = (pE1Y1C1+pE1Y0C1+pE0Y1C1+pE0Y0C1)/1;

labelpCen= "Proportion Censored Overall";

pCenExpos = (pE1Y1C1+pE1Y0C1)/(pE1Y1C1+pE1Y0C1 + pE1Y1C0+pE1Y0C0);

labelpCenExpos= "Proportion Censored among the Exposed";

pCenUnexp = (pE0Y1C1+pE0Y0C1)/(pE1Y1C1+pE1Y0C1 + pE1Y1C0+pE1Y0C0);

labelpCenUnexp= "Proportion Censored among the Unexposed";

*probability of censoring by E, U;

pCen_EU =(pE1U1Y1C1+pE1U1Y0C1)/ (pE1U1Y1C1+pE1U1Y0C1+pE1U1Y1C0+pE1U1Y0C0);

labelpCen_EU= "Proportion Censored, E=1, U=1";

pCen_noEU = pE0U1Y1C1+pE0U1Y0C1)/ (pE0U1Y1C1+pE0U1Y0C1+pE0U1Y1C0+pE0U1Y0C0);

labelpCen_noEU= "Proportion Censored, E=0, U=1";

pCen_EnoU =(pE1U0Y1C1+pE1U0Y0C1 / (pE1U0Y1C1+pE1U0Y0C1+pE1U0Y1C0+pE1U0Y0C0);

labelpCen_EnoU= "Proportion Censored, E=1, U=0";

pCen_noEnoU=(pE0U0Y1C1+pE0U0Y0C1)/ (pE1U0Y1C1+pE1U0Y0C1+pE1U0Y1C0+pE1U0Y0C0);

labelpCen_noEnoU= "Proportion Censored, E=0, U=0";

*probability of outcome;

pYobs = (pE1Y1C0+pE0Y1C0)/(pE1Y1C0+pE0Y1C0+pE1Y0C0+pE0Y0C0);

labelpYobs= "Probability of Outcome among Observed";

pYcen = (pE1Y1C1+pE0Y1C1)/(pE1Y1C1+pE0Y1C1+pE1Y0C1+pE0Y0C1);

labelpYcen= "Probability of Outcome among Censored";

*true risk;

pY1 = (pE1Y1C0+pE0Y1C0+pE1Y1C1+pE0Y1C1)/1; label pY1 = "true population risk";

*observable effects in the uncensored;

rYE1C0 = pE1Y1C0/(pE1Y1C0+pE1Y0C0); label rYE1C0 = "observed risk in the exposed";

rYE0C0 = pE0Y1C0/(pE0Y1C0+pE0Y0C0); label rYE0C0 = "observed risk in the unexposed";

RDC0 = rYE1C0-rYE0C0; label RDC0 = "Observed Risk Difference";

RRC0 = rYE1C0/rYE0C0; label RRC0 = "Observed Risk Ratio";

ORC0 = (rYE1C0/(1-rYE1C0))/(rYE0C0/(1-rYE0C0)); label ORC0 = "Observed Odds Ratio";

*true effects;

rYE1 = pE1Y1/(pE1Y1+pE1Y0); label rYE1 = "true risk in the exposed";

rYE0 = pE0Y1/(pE0Y1+pE0Y0); label rYE0 = "true risk in the unexposed";

RDt = rYE1-rYE0; label RDt = "True Risk Difference";

RRt = rYE1/rYE0; label RRt = "True Risk Ratio";

ORt = (rYE1/(1-rYE1))/(rYE0/(1-rYE0)); label ORt = "True Odds Ratio";

* Check to make sure all probabilities are between 0 and 1;

* if not, write a warning to the log;

if (min(of pE1U1Y1C1--pE0Y0C0) > 0) and (max(of pE1U1Y1C1--pE0Y0C0) <1) then output;

else putlog "*** Invalid parameters ***";

drop pE1U1Y1C1--pE0Y0;

formatpCen--ORt5.4;

end;end;end;end;end;end;end;end;end;

run;

quit;

%mend;

/*to call macro*/

%colliderstrat (

out=,/* name to call the dataset being created */

prevE=,/* exposure prevalence (between 0 and 1) */

prevU=,/* U prevalence (between 0 and 1) */

prevY=,/* baseline risk (between 0 and 1) */

prevC=,/* baseline censoring (between 0 and 1) */

a=,/* RD comparing U to no U (between 0 and 1) */

b=,/* U effect on censoring prob (between 0 and 1)*/

c=,/* exposure effect on censoring prob (between 0 and 1)*/

d=,/* interaction effect on censoring prob (between 0 and 1)*/

te=/* true RD with exposure (between 0 and 1) */

);

quit;