/*********************************************************************
Heather J. Hoffman
Purpose: To define macros used to generate likelihood function
accounting for left-censored and missing observations.
IMPORTANT NOTE: Use only one of the two MULTICDF macro definitions
provided below. The first is to be used with the
multivariate code and the second with the
bivariate code.
*********************************************************************/
libname mymacs "C:\sasmacros";
options mstored=yes sasmstore=mymacs;
/*------
MACRO NAME: DUMMY
PURPOSE: To create dummy variables for left-censoring in LIFEREG.
PARAMETERS:
NUM = number of variables in analysis
------*/
%macro Dummy(num) / store;
%do i=1 %to #
if(cen&i eq 1) then
lc&i=.;
else if(cen&i eq 0) then
lc&i=var&i;
%end;
%mend Dummy;
/*------
MACRO NAME: VARS
PURPOSE: To generate list of variables to KEEP in SET statement,
for example.
PARAMETERS:
NUM = number of variables in analysis
------*/
%macro Vars(num) / store;
%do i=1 %to #
var&i
%end;
%mend Vars;
/*------
MACRO NAME: LIFEREG
PURPOSE: To obtain univariate MLE results for each variable by
calling PROC LIFEREG assuming normal distribution.
PARAMETERS:
LEFT = left bound holding left-censor dummy variable value
RIGHT = right bound holding observed value or LOD value
------*/
%macro Lifereg(left, right) / store;
proc lifereg data=moremydata covout outest=compare&i noprint;
model (&left,&right)= / dist=normal nolog;
run;
%mend Lifereg;
/*------
MACRO NAME: NUMVAR
PURPOSE: To call LIFEREG macro for each of the NUM variables.
PARAMETERS:
NUM = number of variables in analysis
------*/
%macro NumVar(num) / store;
%do i=1 %to #
%Lifereg(lc&i, var&i);
%end;
%mend NumVar;
/*------
MACRO NAME: IMPUTE
PURPOSE: To impute one-half of LOD value for left-censored data.
Assuming original data is lognormal, subtract ln(2).
PARAMETERS:
NUM = number of variables in analysis
------*/
%macro Impute(num) / store;
%do i=1 %to #
if(cen&i eq 1) then
var&i=var&i-log(2);
%end;
%mend Impute;
/*------
MACRO NAME: IMPUTEORIG
PURPOSE: To impute one-half of LOD value for left-censored data.
Assuming this data is already lognormal, divide by 2.
PARAMETERS:
NUM = number of variables in analysis
------*/
%macro ImputeOrig(num) / store;
%do i=1 %to #
if(cen&i eq 1) then
var&i=var&i/2;
%end;
%mend ImputeOrig;
/*------
MACRO NAME: GETFILES
PURPOSE: To print COMPARE# for each variable in analysis;
MACRO VARIABLES REFERENCED:
NUMVARS = number of variables in analysis
------*/
%macro GetFiles / store;
%do i=1 %to &numvars;
Compare&i
%end;
%mend GetFiles;
/*------
MACRO NAME: GETINITS
PURPOSE: To edit COMPARE# datasets so that they contain only
means and variances for variable #.
To combine these datasets into one called TRY.
To create dataset PEARSON containing correlations.
MACRO VARIABLES REFERENCED:
NUMVARS = number of variables in analysis
------*/
%macro GetInits / store;
%do i=1 %to &numvars;
data Compare&i;
set Compare&i (keep=intercept _scale_);
if _N_=1;
run;
%end;
data try;
set %GetFiles;
run;
data pearson;
set corr (firstobs=4 keep=%Vars(&numvars));
run;
%mend GetInits;
/*------
MACRO NAME: PRINTPTIMES
PURPOSE: To print names of variables indicated by BASE#.
PARAMETERS:
BASE = name of base word to be printed
------*/
%macro PrintpTimes(base) / store;
%do i=1 %to &numvars;
&base&i
%end;
%mend PrintpTimes;
/*------
MACRO NAME: PRNTRES
PURPOSE: To print names of variables to be written to multivariate
results dataset.
MACRO VARIABLES REFERENCED:
NUMVARS = number of variables in analysis
------*/
%macro PrntRes / store;
%do i=1 %to &numvars;
mu&i sig&i
%end;
%do i=1 %to &numvars-1;
%do j=&i+1 %to &numvars;
rho&i&j
%end;
%end;
%mend PrntRes;
/*------
MACRO NAME: PRNTRES2
PURPOSE: To print names of variables to be written to univariate
results dataset.
MACRO VARIABLES REFERENCED:
NUMVARS = number of variables in analysis
------*/
%macro PrntRes2 / store;
%do i=1 %to &numvars;
mu&i sig&i
%end;
%mend PrntRes2;
/*------
MACRO NAME: USERINPUT
PURPOSE: To create global macro variables within NULL data step
that hold actual names of variables in analysis.
To store data imported from Excel in dataset MYDATA,
rename variables VAR1-VARp, and convert character
values to numeric after removing <.
To create p indicator variables CEN1-CENp within dataset
MYDATA, which are assigned 0 if value is observed,
1 if value is censored, or 2 if value is missing.
MACRO VARIABLES REFERENCED:
LIBNAME = name of SAS library where SAS dataset is stored
DATASET = name of SAS dataset being created
NUMVARS = number of variables in dataset to be analyzed
VARLIST = name of variables to be analyzed (each name
enclosed in quotes, separated by single space)
------*/
%macro UserInput / store;
data _null_;
set &libname..&dataset;
array myvar{&numvars} $ (&varlist);
do i=1 to &numvars;
call symput("actualname"||trim(left(i)),myvar{i});
end;
run;
data mydata (drop=%PrintpTimes(tempvar));
set here(rename=(
%do i=1 %to &numvars;
&actualname&i=tempvar&i
%end;
));
%do i=1 %to &numvars;
if(tempvar&i eq ".") then
cen&i=2;
else if(tempvar&i=:"<") then
do;
tempvar&i=input(substr(tempvar&i,2),best.);
cen&i=1;
end;
else
cen&i=0;
var&i=input(trim(left(tempvar&i)),best.);
%end;
run;
%mend UserInput;
/*------
MACRO NAME: LOGTRANS
PURPOSE: To create new variables holding log-tansformed values.
------*/
%macro LogTrans / store;
%do i=1 %to &numvars;
var&i=log(var&i);
%end;
%mend LogTrans;
/*------
MACRO NAME: BUBBLESORT
PURPOSE: To sort each row of (NR x NC) censor indicator matrix C
(observed (0) -> censored (1) -> missing (2)).
To create (NR x NC) matrix VARS that keeps track of
original variable order.
To store number of observed values for each observation
in (NR x 1) column vector OBS.
To store number of censored values for each observation
in (NR x 1) column vector CEN.
To store number of missing values for each observation
in (NR x 1) column vector MIS.
VARIABLES REFERENCED:
NR = number of rows/observations in dataset (scalar)
NC = number of columns/variables in dataset (scalar)
C = (NR x NC) indicator matrix with values 0, 1, 2.
------*/
%macro BubbleSort / store;
vars=J(nr,nc,0);
do i=1 to nr;
do j=1 to nc;
vars[i,j]=j;
end;
end;
do i=1 to nr;
do k=nc-1 to 1 by -1;
do j=1 to k;
if(c[i,j] > c[i,j+1]) then
do;
temp=c[i,j];
c[i,j]=c[i,j+1];
c[i,j+1]=temp;
temp=vars[i,j];
vars[i,j]=vars[i,j+1];
vars[i,j+1]=temp;
end;
end;
end;
end;
obs=J(nr,1,0);
cen=J(nr,1,0);
mis=J(nr,1,0);
do i=1 to nr;
do j=1 to nc;
if(c[i,j] = 0) then
obs[i,1]=obs[i,1]+1;
else if(c[i,j] = 1) then
cen[i,1]=cen[i,1]+1;
else
mis[i,1]=mis[i,1]+1;
end;
end;
%mend BubbleSort;
/*------
MACRO NAME: MULTIPDF
PURPOSE: To calculate multivariate normal PDF PDFCONTRIBUTION from
given observed values for a single observation. This
value is calculated using TEMPY, TEMPMU, and TEMPSIG,
which are specific to the given observation.
VARIABLES REFERENCED:
CURNOBS = number of observed values for current observation
MU = (NC x 1) vctr of estimated means for each variable
CURVAR = (NC x 1) vctr of variable order (obs, cen, mis)
for current observation
CURY = (NC x 1) vctr of data for current observation
SIG = (NC x NC) mtrx of estimated (co)variances for each
of the NC variables
------*/
%macro MultiPDF / store;
tempmu=J(curnobs,1,0);
tempsig=J(curnobs,curnobs,0);
tempy=J(curnobs,1,0);
do i=1 to curnobs;
tempmu[i,1]=mu[curvar[i,1],1];
tempy[i,1]=cury[curvar[i,1],1];
end;
do i=1 to curnobs;
do j=1 to curnobs;
tempsig[i,j]=sig[curvar[i,1],curvar[j,1]];
end;
end;
pi=4*atan(1);
pdfcontribution=inv(sqrt((2*pi)**curnobs)*sqrt(det(tempsig)))
*exp(-0.5*(tempy-tempmu)`*inv(tempsig)*(tempy-tempmu));
%mend MultiPDF;
/*------
MACRO NAME: CENMUSIG
PURPOSE: To construct temporary mean vector TEMPMU and
(co)variance matrix TEMPSIG for current observation.
VARIABLES REFERENCED:
CURNCEN = number of censored values for current observation
CURNOBS = number of observed values for current observation
MU = (NC x 1) vctr of estimated means for each variable
CURVAR = (NC x 1) vctr of variable order (obs, cen, mis)
for current observation
CURY = (NC x 1) vctr of data for current observation
SIG = (NC x NC) mtrx of estimated (co)variances for each
of the NC variables
------*/
%macro CenMuSig / store;
tempmu=J(curncen,1,0);
tempsig=J(curncen,curncen,0);
tempLOD=J(curncen,1,0);
do i=1 to curncen;
tempi=curnobs+i;
tempmu[i,1]=mu[curvar[tempi,1],1];
tempLOD[i,1]=cury[curvar[tempi,1],1];
end;
do i=1 to curncen;
do j=1 to curncen;
tempi=curnobs+i;
tempj=curnobs+j;
tempsig[i,j]=sig[curvar[tempi,1],curvar[tempj,1]];
end;
end;
%mend CenMuSig;
/***** The following four macros are used by the MULTICDF macro *****/
/*------
MACRO NAME: GENDOSTMTS
PURPOSE: To generate and insert NUM DO statements directly into
program (for loops used to sum function of interest
within MULTICDF macro).
PARAMETERS:
NUM = number of DO statements that we want to generate
VARIABLES REFERENCED:
k = number of abscissas used in quadrature procedure
------*/
%macro GenDoStmts(num) / store;
%do numints=&num %to 1 %by -1;
do index&numints=1 to k;
%end;
%mend GenDoStmts;
/*------
MACRO NAME: GENENDSTMTS
PURPOSE: To generate and insert NUM END statements directly into
program (to close DO loops used to sum function of
interest within MULTICDF macro).
PARAMETERS:
NUM = number of END statements that we want to generate
------*/
%macro GenEndStmts(num) / store;
%do numints=&num %to 1 %by -1;
end;
%end;
%mend GenEndStmts;
/*------
MACRO NAME: GENWGHTS
PURPOSE: To generate a string of NUM weights multiplied by one
another (wght1 * wght2 * ... wghtNUM *).
PARAMETERS:
NUM = number of weights that we want to multiply in function
VARIABLES REFERENCED:
WGHT = (k x 1) column vector of weights
INDEX# = index variable assigned in DO statement above
------*/
%macro GenWghts(num) / store;
%do numints=1 %to #
wght[index&numints,1]*
%end;
%mend GenWghts;
/*------
MACRO NAME: DEFINEE
PURPOSE: To assign correct values to elements in vector E, defined
in Genz paper, based on index values assigned within
DO loop.
PARAMETERS:
NUM = NCEN, number censored values for current observation
VARIABLES REFERENCED:
ABS = (k x 1) column vector of abscissas
CHOL = Cholesky decomposition of (NUM x NUM) TEMPSIG matrix
B = column vector defined in Genz (NUM x 1)
W = column vector defined in Genz (NUM x 1)
D = column vector defined in Genz (NUM x 1)
E = column vector defined in Genz (NUM x 1)
------*/
%macro DefineE(num) / store;
%do count1=2 %to #
%let deccount1=%eval(&count1-1);
temp=0;
%do count2=1 %to &deccount1;
w[&count2,1]=abs[index&count2,1];
temp=temp+chol[&count1,&count2]*probit(d[&count2,1]
+w[&count2,1]*(e[&count2,1]-d[&count2,1]));
%end;
e[&count1,1]=probnorm((b[&count1,1]-temp)
/chol[&count1,&count1]);
%end;
%mend DefineE;
/********** End set of macros used by MULTICDF macro **********/
/*------
MACRO NAME: MULTICDF - For Pseudo-likelihood Program
PURPOSE: To approximate bivariate normal CDF CDFCONTRIBUTION
evaluated at LOD from given censored variables for
a single observation.
VARIABLES REFERENCED:
TEMPSIG = (NCEN x NCEN) (co)variance matrix for current obsn
TEMPLOD = (NCEN x 1) vctr of LOD values for variables censored
by current observation
TEMPMU = (NCEN x 1) vctr of values of mu for current obsn
------*/
%macro MultiCDF / store;
upbd=J(curncen,1,0);
do i=1 to curncen;
upbd[i,1]=(tempLOD[i,1]-tempmu[i,1])/sqrt(tempsig[i,i]);
end;
temprho=J(curncen,curncen,0);
do i=1 to curncen;
do j=1 to curncen;
temprho[i,j]=tempsig[i,j]/sqrt(tempsig[i,i]*tempsig[j,j]);
end;
end;
cdfcontribution=probbnrm(upbd[1,1],upbd[2,1],temprho[1,2]);
%mend MultiCDF;
/*------
MACRO NAME: PARTMU
PURPOSE: To partition mean and data vectors into subvectors
PARTMUO/PARTMUC/PARTYO/PARTYC (observed and censored)
for use in calculation of conditional CDF.
VARIABLES REFERENCED:
CURNCEN = number of censored values for current observation
CURNOBS = number of observed values for current observation
MU = (NC x 1) vctr of estimated means for each variable
CURVAR = (NC x 1) vctr of variable order (obs, cen, mis)
for current observation
CURY = (NC x 1) vctr of data for current observation
------*/
%macro PartMU / store;
partmuc=J(curncen,1,0);
partyc=J(curncen,1,0);
do i=1 to curncen;
tempi=curnobs+i;
partmuc[i,1]=mu[curvar[tempi,1],1];
partyc[i,1]=cury[curvar[tempi,1],1];
end;
partmuo=J(curnobs,1,0);
partyo=J(curnobs,1,0);
do i=1 to curnobs;
partmuo[i,1]=mu[curvar[i,1],1];
partyo[i,1]=cury[curvar[i,1],1];
end;
%mend PartMU;
/*------
MACRO NAME: PARTSIG
PURPOSE: To partition variance/covariance matrix into a submatrix
PARTSIGOO/PARTSIGOC/PARTSIGCO/PARTSIGCC.
PARAMETERS:
CASE1 = 'cen' or 'obs' to be used as outer index
CASE2 = 'cen' or 'obs' to be used as inner index
LET1 = 'c' or 'o' to append onto name of submatrix
LET2 = 'c' or 'o' to append onto name of submatrix
VARIABLES REFERENCED:
CURNCEN = number of censored values for current observation
CURNOBS = number of observed values for current observation
CURVAR = (NC x 1) vctr of variable order (obs, cen, mis) for
current observation
SIG = (NC x NC) mtrx of estimated (co)variances for each
of the NC variables
------*/
%macro PartSIG(case1,case2,let1,let2) / store;
partsig&let1&let2=J(curn&case1,curn&case2,0);
do i=1 to curn&case1;
do j=1 to curn&case2;
tempi=curnobs+i;
tempj=curnobs+j;
if("&case1"="cen" & "&case2"="cen") then
partsig&let1&let2[i,j]=sig[curvar[tempi,1],
curvar[tempj,1]];
else if("&case1"="cen" & "&case2"="obs") then
partsig&let1&let2[i,j]=sig[curvar[tempi,1],
curvar[j,1]];
else if("&case1"="obs" & "&case2"="cen") then
partsig&let1&let2[i,j]=sig[curvar[i,1],
curvar[tempj,1]];
else if("&case1"="obs" & "&case2"="obs") then
partsig&let1&let2[i,j]=sig[curvar[i,1],
curvar[j,1]];
end;
end;
%mend PartSIG;
/*------
MACRO NAME: CONDCDF
PURPOSE: To approximate conditional multivariate normal CDF
CONDCONTRIBUTION evaluated at LOD from given censored
variables for a single observation.
VARIABLES REFERENCED:
CURNCEN = number of censored values for current observation
CURNOBS = number of observed values for current observation
CURVAR = (NC x 1) vctr of variable order (obs, cen, mis)
for current observation
MACROS CALLED: They must be compiled prior to execuion of macro.
PARTMU()
PARTSIG(case1,case2,let1,let2)
MULTICDF()
------*/
%macro CondCDF / store;
%PartMU;
%PartSIG(cen,cen,c,c);
%PartSIG(cen,obs,c,o);
%PartSIG(obs,cen,o,c);
%PartSIG(obs,obs,o,o);
tempmu=partmuc+partsigco*inv(partsigoo)*(partyo-partmuo);
tempsig=partsigcc-partsigco*inv(partsigoo)*partsigoc;
tempLOD=J(curncen,1,0);
do i=1 to curncen;
tempi=curnobs+i;
tempLOD[i,1]=cury[curvar[tempi,1],1];
end;
if(curncen = 1) then
condcontribution=CDF('NORMAL',tempLOD[1,1], tempmu[1,1],
sqrt(tempsig[1,1]));
else
do;
%MultiCDF;
condcontribution=cdfcontribution;
end;
%mend CondCDF;
/*------
MACRO NAME: LIKELIHOOD
PURPOSE: To determine if current observation has:
(1) observed values for all nonmissing variables (none
censored), in which case we call the macro MultiPDF
(2) censored values for all nonmissing variables (none
observed), in which case we call the macro MultiCDF
(3) both observed and censored values for all nonmissing
variables, in which case we call the macro MultiPDF
followed by the macro CondCDF
to calculate contribution to log-likelihood function.
VARIABLES REFERENCED:
CURNOBS = number of observed values for current observation
CURNCEN = number of censored values for current observation
MACROS CALLED: They must be compiled prior to execuion of macro.
MULTIPDF()
CENMUSIG()
MULTICDF()
CONDCDF()
------*/
%macro Likelihood / store;
if(curnobs & ^curncen) then
do;
%MultiPDF;
f=pdfcontribution;
end;
else if(^curnobs & curncen) then
do;
%CenMuSig;
if(curncen = 1) then
f=CDF('NORMAL',tempLOD[1,1], tempmu[1,1],
sqrt(tempsig[1,1]));
else
do;
%MultiCDF;
f=cdfcontribution;
end;
end;
else if(curnobs & curncen) then
do;
%MultiPDF;
%CondCDF;
f=pdfcontribution*condcontribution;
end;
%mend Likelihood;
/*------
MACRO NAME: CONSTRAINTS
PURPOSE: To generate list of upper and lower constraints for
parameters. The constraints are printed assuming the
following order of the parameters: lower bounds
followed by a comma followed by the upper bounds for
means, variances, and finally for correlations
(MU1 SIG11 ... MUp SIGpp RHO12 RHO13 ... RHO23
RHO24 ... RHO(p-1,p)).
MACRO VARIABLES REFERENCED:
NUMVARS = numeric scalar representing total number of