Section 1.2, page 2
Data a;
/***Input data***/
Input Age @@; Cards;
45 55 67 89 78 67 56 43 44 45 57 69 78 76 54 34 23
46 67 65 54 98 78 67 58 49 53 42 74 97 43 45 46 47
48 59 70 89 78 76 65 34 45 36 67 45 47 75 64 61
;
/***end of input data***/
proc univariate ;var age;run;
Section 1.3, page 10
data b;set a;
*Use age data from Sec 1.2; *Categories are defined by the researcher;
if 20<=age<30 then AGECAT='[20,30 )';
else if 30<=age<40 then AGECAT='[30,40)';
else if 40<=age<50 then AGECAT='[40,50)';
else if 50<=age<60 then AGECAT='[50,60)';
else if 60<=age<70 then AGECAT='[60,70)';
else if 70<=age<80 then AGECAT='[70,80)';
else if 80<=age<90 then AGECAT='[80,90)';
else if 90<=age<100 then AGECAT='[90,100)';
proc freq;tables agecat;run;
Section 1.3, page 11
Data a;
/***Input data***/
Input age syst @@; cards;
45 100 55 125 67 155 89 145 78 187 67 167
56 163 43 158 44 148 45 157 57 169 69 147
78 178 76 156 54 135 34 167 23 124 46 165
67 147 65 148 54 138 98 144 78 136 67 188
58 168 49 148 53 148 42 158 74 144 97 149
43 137 45 128 46 159 47 157 48 147 59 136
70 135 89 167 78 135 76 164 65 132 34 173
45 145 36 156 67 125 45 156 47 145 75 164
64 183 61 128
;
/***end of input data***/
data b;set a;
if syst<145 then SYSTD='SYST 145 ';
else if syst>=145 then SYSTD='SYST >= 145';
if 20<=age<30 then AGECAT='[20,30 )';
else if 30<=age<40 then AGECAT='[30,40)';
else if 40<=age<50 then AGECAT='[40,50)';
else if 50<=age<60 then AGECAT='[50,60)';
else if 60<=age<70 then AGECAT='[60,70)';
else if 70<=age<80 then AGECAT='[70,80)';
else if 80<=age<90 then AGECAT='[80,90)';
else if 90<=age<100 then AGECAT='[90,100)';
proc freq;tables systd*agecat;run;
Section 1.4, page 13
Data a;
proc plan seed=1; *test seed for dummy data;
factors site=2 ordered age=2 ordered block=6 ordered trtgrp=3 ; run;
Section 1.6, page 17
proc univariate normal;var age;run;
Section 2.1.1, page 21
data a;
/*** input data***/
d=.03; *margin of error; alpha=.05; * Confidence level=0.95;
ptilt=.5; *assumed proportion;
/***end of input data ***/
z2side=probit(1-alpha/2); var=(d/z2side)**2; N=floor(ptilt*(1-ptilt)/var)+1;
keep n;proc print;run;
Section 2.1.1, page 22, top of the page
data a;
/***input data**/
n=1068;*sample size; n1=500;*subjects having the disease;
alpha=.05;*Corresponds to 95% CI;
/***end of input data ****/
PHAT=n1/n; z2side=probit(1-alpha/2);
LOWER_CI=phat-z2side*sqrt(phat*(1-phat)/n); UPPER_CI=phat+z2side*sqrt(phat*(1-phat)/n);
proc print; var lower_ci phat upper_ci; run;
Section 2.1.1, page 22, bottom of the page
data a;
/***input data***/
n1=3;* number of successes; n=20;* sample size;
/***end of input data***/
n2=n-n1;
do i=1 to n1; yes=1; output; end;
do i=1 to n2; yes=2;output; end;
data b;set a; proc freq;;tables yes/binomial;run;
Section 2.1.1, page 23, top of the page
data a;
/****input data ***/
p0=.23; p1=.35; alpha=.05; power=.8;*power of the test;
/**** end of input data ***/
z1side=probit(1-.05);*if 1 sided;
z2side=probit(1-.05/2);*if 2 sided ; zbeta=probit(power);
N1SIDE=floor((z1side*sqrt(p0*(1-p0))+zbeta*sqrt(p1*(1-p1)))/(p1-p0)**2)+1;
N2SIDE=floor((z2side*sqrt(p0*(1-p0))+zbeta*sqrt(p1*(1-p1)))/(p1-p0)**2)+1;
keep n1side n2side;proc print;run;
Section 2.1.1, page 23, bottom of the page
data a;
/**input data ***/
n=86; n1=35; p0=.23; alpha=.05;
/*** end of input data **/
PHAT=n1/n; z=(phat-p0)/sqrt(p0*(1-p0)/n); zabs=abs(z) ;
PVALUE1 = (1-probnorm(z)); PVALUE2=2*(1-probnorm(zabs));
keep phat pvalue1 pvalue2; proc print;run;
Section 2.1.1, page 24, top of the page
data a;
/***input data ***/
n=50; p0=.23; p1=.35; alpha=.05;
/*** end of input data **/
d=p1-p0; z2side=probit(1-alpha/2); z1side=probit(1-alpha);
sep0=sqrt(p0*(1-p0)/n); sep1=sqrt(p1*(1-p1)/n);
POWER1= 1-probnorm((z1side*sep0-d)/sep1 );
POWER2= 1-probnorm((z2side*sep0-abs(d))/sep1 );
keep power1 power2;proc print;run;
Section 2.1.2, page 24, bottom of the page
data a;
input days freq@@;
cards;
1 200 2 180 3 170 4 150 5 200
;
data a;set a; proc freq order=data; weight freq;
tables days/nocum testp=(20 20 20 20 20);*hypothesized distribution;run;
Section 2.2.1, page 25
data a;
/***input data***/
d=.03;*margin of error; alpha=.05; f=1; ptilt1=0.5; ptilt2=0.5;
/***end of input data ***/
z2side=probit(1-alpha/2); var=(d/z2side)**2;
N2=floor( ( (ptilt1*(1-ptilt1)/f) + (ptilt2*(1-ptilt2)) )/var) +1;N1=n2*f;
proc print; var n1 n2; run;
Section 2.2.1, page 26
Data a;
/****input data ****/
P1hat=1000/2135; *proportion having the disease for 1st sample;
P2hat=1012/2135; *proportion having the disease for 2nd sample;
n1=2135; *sample size for 1st sample;
n2=2135; *sample size for 2nd sample; alpha=.05;
/***end of input data ****/
z2side=probit(1-alpha/2); DIFFPROP=p1hat-p2hat;
LOWER_CI=diffprop-z2side*(sqrt( (p1hat*(1-p1hat)/n1)+
(p2hat*(1-p2hat)/n2)));
UPPER_CI=diffprop+z2side*(sqrt( (p1hat*(1-p1hat)/n1)+
(p2hat*(1-p2hat)/n2)));
proc print; var lower_ci diffprop upper_ci; run;
Section 2.2.1, page 27, top of the page
Data a;
/**input data ***/
alpha = 0.05; Power = 0.8; d=.03; f=1;
/**end of input data**/
z2side=probit(1-alpha/2); z1side=probit(1-alpha); zbeta=probit(power);
N21SIDE=floor((((z1side+zbeta)/d)**2)*((1/(4*f))+1/4))+1;
N22SIDE=floor((((z2side+zbeta)/d)**2)*((1/(4*f))+1/4))+1;
N11SIDE=n21side*f; N12SIDE=n22side*f;
proc print; var n11side n21side n12side n22side; run;
Section 2.2.1, page 27, bottom of the page
data a;
/***input data***/
yes1=2500; yes2=2000; *number of yes answers for groups 1 and 2;
n1=4361;*sample size for group 1; n2=4361;*sample size for group 2;
/***end of input data**/
p1hat=yes1/n1; p2hat=yes2/n2; pcomb=(yes1+yes2)/(n1+n2);
z=(p1hat-p2hat)/(sqrt((pcomb*(1-pcomb)/n1)+ (pcomb*(1-pcomb)/n2))); zabs=abs(z);PVALUE1=(1-probnorm(Z)); PVALUE2=2*(1-probnorm(zabs));
keep pvalue1 pvalue2; proc print; run;
Section 2.2.1, page 28
data a;
/*** input data***/
do OUTCOME=0 to 1;do GENDER = 0 to 1;
input freq @@;output;end;end;cards;
1861 2361 2500 2000
;
/******end of input data ***/
data a;set a; proc freq; weight freq; tables outcome*gender/chisq; run;
Section 2.2.1, page 29
Data a;
/****input data ****/
d=.03; *difference between the proportions; p1tilt=.5+(d/2); p2tilt=.5-(d/2);
n1=2000; *sample size for 1st sample;
n2=2000; *sample size for 2nd sample; alpha=.05;
/***end of input data ****/
z1side=probit(1-alpha); z2side=probit(1-alpha/2); n=n1+n2;
POWER1=1-probnorm( (z1side*sqrt((1/4)*((1/n1)+
(1/n2))) - d)/sqrt((p1tilt*(1-p1tilt)/n1)+(p2tilt*(1-p2tilt)/n2)) );
POWER2=1-probnorm( (z2side*sqrt((1/4)*((1/n1)+
( 1/n2)))-abs(d))/sqrt((p1tilt*(1-p1tilt)/n1)+(p2tilt*(1-p2tilt)/n2)) ); keep power1 power2; proc print ;run;
Section 2.2.2, page 30
data a;
/***input data***/
do OUTCOME=0 to 1;do GENDER = 0 to 1; input freq @@;output;end;end;cards;
1861 2361 2500 2000
;
/******end of input data ***/
data a;set a; proc freq; weight freq; tables gender*outcome/chisq relrisk;
*the rows are the groups (gender) and the columns are the dichotomous response (outcome); run;
Section 2.2.3, page 31
data a;
/***input data***/
do OUTCOME=0 to 1;do GENDER = 0 to 1;
input freq @@;output;end;end;cards;
1861 2361 2500 2000
;
/******end of input data ***/
data a;set a;
proc logistic descending; *By using the descending option, the model gives the probability of the outcome. If it is not used the probability obtained is for the non-occurrence of the event;
freq freq; model outcome=gender; run;
Section 2.2.4, page 33
data a;
/***input data***/
do city=0 to 1;do ad = 0 to 1; do soldmore=0 to 1;
input freq @@;output;end;end;end;cards;
8 10 13 5 6 10 10 10
;
data b;set a; proc sort;by city; proc freq;weight freq;
tables city*ad*soldmore/cmh;run;
Section 2.2.5, page 35
data a;
/*** input data***/
do GENDER=0 to 1;do BRAND = 1 to 5; input count @@;output;end;end;cards;
10 15 10 25 40 5 10 10 40 35
;
/******end of input data ***/
data a;set a; proc freq;weight count; tables gender*brand/chisq;run;
Section 2.3.1, page 37
data a;
/*****input data**/
do INST2=0 to 1;do INST1 = 0 to 1; input freq @@;output;end;end;cards;
125 75 275 20
;
/******end of input data ***/
data a;set a;proc freq;weight freq;
tables inst2*inst1/agree; test kappa; run;
Section 2.3.1, page 38
data b;
/***input data***/
n=495; alpha=.05;
passa=275;*instructor a pass and instructor b fail;
passb=75;*instructor b pass and instructor a fail;
/***end of input data***/
DIFF=(passa-passb)/n; se=(sqrt(passa+passb))/n;
z2side=probit(1-alpha/2);
UPPER_CI=diff+se*z2side; LOWER_CI=diff-se*z2side;
proc print; var lower_ci diff upper_CI ;run;
Section 2.4.1, page 41
proc freq;tables variable1*variable2/exact;
Section 2.4.2, page 42
data a;
/*****input data**/
do dth=0 to 1;do city = 1 to 3;
input freq @@;output;end;end;cards;
15 4 13 9 33 10
;
/******end of input data ***/
data a1;set a;
if city=1 then do;Z1=0;Z2=0;end;
if city=2 then do;z1=1;z2=0;end;
if city=3 then do;z1=0;z2=1;end;
proc logistic descending;freq freq; model dth=z1 z2;run;
Section 2.4.3, page 43
data a;
/***input data***/
do GENDER = 0 to 1;do OUTCOME=0 to 1;do EXERCISE =0 to 2;
input freq @@;output;end;end;end;cards;
21 23 34 43 32 32 24 33 20 26 27 28
;
/***end of input data***/
data a;set a; proc freq;weight freq; tables gender*outcome*exercise/cmh;run;
Section 2.4.4, page 44
data a;
/***input data***/
do RACE = 1 to 3;do BRANDS=1 to 5; input freq @@;output;end;end;cards;
5 8 25 7 6 6 6 18 5 5 10 20 16 7 7
;
/***end of input data***/
data a;set a;proc freq;weight freq; tables race*brands/chisq;run;
Section 2.5, page 46
data a;
input volunteer roominess looks gasmileage reliability @@;
cards;
1 1 0 1 1 2 0 1 0 0 3 0 1 1 1 4 1 1 0 1
5 0 1 1 0 6 1 1 1 1 7 0 0 1 1 8 1 1 1 0
9 0 0 1 1 10 1 1 0 0
;
proc freq;tables roominess*looks*gasmileage*reliability/agree;run;
Section 2.6.1, page 47
data a;
/***input data***/
do GENDER=0 to 1;do RATING = 0 to 2;
input freq @@;output;end;end;cards;
45 55 25 55 35 15
;
data a;set a; proc freq;weight freq; tables gender*rating/cmh;run;
Section 2.6.2, page 48, top of the page
data a;
/***input data***/
do RATING=0 to 2;do GENDER = 0 to 1;
input freq @@;output;end;end;cards;
45 55 55 35 25 15
;
data a;set a; proc freq;weight freq;
tables rating*gender/trend;run;
Section 2.6.3, page 48, bottom of the page
proc freq; weight freq ;tables gender*rating/measures;run;
Section 2.6.4, page 50
data a;
/***input data***/
*reference sample;
input refsamp@@;
cards;
45 55 25
;
/***end of input data***/
data a1;set a; count=_n_;refhalf=refsamp/2; proc sort; run;
data a2;set a1; if count=1 then refcum=refsamp; else refcum+refsamp;
reflag=lag(refcum); if reflag=. then reflag=0; refsum=reflag+refhalf;a=1;
keep refsamp refsum a;proc sort;by a;
data b;
/***input data***/
*Test sample;
input testsamp@@; cards;
55 35 15
;
data b1;set b; a=1;proc sort;by a;
data final;merge a2 b1;by a;drop a; prod1=testsamp*refsum;prod2=(refsamp+testsamp)**3;
proc univariate noprint;var refsamp testsamp prod1 prod2;
output out=sum sum=m n o p;
data sum;set sum;RBAR=o/(m*n);
serbar=(1/(2*sqrt(3*n)))*sqrt( 1 + ((n+1)/m)+(1/(m*(m+n-1)))-(p/(m*(m+n)*(m+n-1))));
z=abs((rbar-.5)/serbar); PVALUE=1-probnorm(z);
keep rbar pvalue; proc print;run;
Section 2.6.5, page 51, top of the page
test kappa wtkappa;
Section 2.6.5, page 51, bottom of the page
tables outcome*gender /agree (wt=fc);run;
Section 2.6.6.1, page 52
data a;
/***input data***/
input CLEAN CITY freq @@;cards;
0 0 17 0 1 9 1 0 19 1 1 11 2 0 8 2 1 5
;
/***end of data***/
data a;set a; proc logistic descending; model clean = city;freq freq;run;
Section 2.6.6.2, page 53
data a;
/***input data***/
input clean city @@;cards;
0 1 0 2 0 3 0 4 1 1 1 3 1 3 1 4 1 1 1 2 1 3 1 2 1 1 1 2 1 3 1 4 0 1 0 2 0 3 0 1 1 1 1 2 1 3 1 3 0 1 0 2 0 3 0 4 1 1 1 3 1 3 1 4 0 1 1 1 0 1 1 1 1 1 0 2 0 3 0 4 1 2 1 2 1 3 1 1 0 1 0 2 0 3 0 1 1 1 1 2 1 3 1 4 0 1 0 3 0 3 0 1 1 1 1 2 1 3 1 1 0 1 0 2 0 3 0 4 1 4 1 2 1 3 1 4 0 1 1 1 0 1 1 1 0 1 0 2 0 3 0 4 1 1 1 3 1 3 1 1 0 1 0 2 0 3 2 4 2 1 2 2 2 3 2 4 2 2 0 2 0 3 0 4 1 1 1 2 1 3 2 4 2 1 2 2 2 3 2 4 2 2 1 2 1 3 1 4 0 1 1 1 0 1 1 1 2 3 2 3 2 3 2 4 2 3 1 3 1 3 1 4 0 3 1 3 0 3 1 3 2 3 2 3 2 3 2 3 2 1 2 1 2 1 2 2 2 1 2 4 2 1 2 1
;
/***end of input data**/
data a;set a;
if city=1 then do;Z1=0; Z2=0;Z3=0; end;
if city=2 then do;z1=1; z2=0; z3=0; end;
if city=3 then do; z1=0; z2=1 ;z3=0; end;
if city=4 then do; z1=0; z2=0; z3=1;end;
proc logistic descending; model clean = z1 z2 z3 ; run;
Section 3.1, page 58, top of the page
data a;
/*** input data***/
sd=10; *guessed standard deviation ; alpha=.05; * alpha level; d=3; *assumed error margin;
/***end of input data ***/
z2side=probit(1-alpha/2); N=floor((sd*z2side/d)**2)+1;
keep n; proc print ;run;
Section 3.1, page 58, bottom of the page
data a;
/***input data***/
input AGE @@ ;cards;
34 15 45 45 56 41 72 14 16 34 46 23 15 28 28 30 39 23 70 28 10 60 32 24 16 17 18 36 22 24 43 72 45 47 28 49 12 11 57 23 35 37 14
;
/***end of input data***/
proc ttest alpha=.05;*if alpha is not specified it is assumed to be .05;
var age ;run;
Section 3.1, page 59, top of the page
data a;
/***input data***/
sd=10;* standard deviation guess;
mu0=35; *hypothesized mean;
mu1=40; *alternative mean needed to detect;
alpha=.05;*alpha level; power=.8; *power of the test at mu1;
/***end of input data***/
delta=mu1-mu0 ; z2side=probit(1-alpha/2); *2 sided;
z1side=probit(1-alpha);*1 sided; zbeta=probit(power);
N1SIDE=floor((sd*(z1side+zbeta)/delta)**2)+1;
N2SIDE= floor((sd*(z2side+zbeta)/delta)**2)+1;
keep n1side n2side; proc print;run;
Section 3.1, page 59, bottom of the page
data a;
/***input data***/
input AGE @@ ;cards;
55 45 49 16 21 62 14 66 34 23 15 39 23 41 28 20
40 32 24 46 37 18 42 25 17 28 49 12 57 23 24 72
;
/***end of input data***/
proc univariate normal;run;
Section 3.1, page 60
data a;
/***input data***/
input AGE @@;cards;
55 45 49 16 21 62 14 66 34 23 15 39 23 41 28 20
40 32 24 46 37 18 42 25 17 28 49 12 57 23 24 72
;
/***end of input data***/
proc ttest alpha=.05 h0=35; var age ; run;
Section 3.1, page 61, top of the page
data a;
/***input data***/
input AGE @@; cards;
55 45 49 16 21 62 14 66 34 23 15 39 23 41 28 20
40 32 24 46 37 18 42 25 17 28 49 12 57 23 24 72
;
/***end of input data***/
proc univariate noprint; output out=stats n=n var=S2;
Data a;set stats;
/***input data***/
Var=100; *null hypothesis of the variance;
/***end of input data***/
CHISQ=((n-1)*s2/var); PVALUE=1-probchi(chisq,n-1);
proc print; var s2 chisq pvalue; run;
Section 3.1, page 61, bottom of the page
data a;
/***input data***/
n=20;*sample size; sd=10;*standard deviation guess;
alpha=.05;*alpha level;
mu0=35;*hypothesized mean; mu1=40;*alternative mean needed to detect;
/***end of input data***/
delta=mu1-mu0;
z2side=probit(1-alpha/2);*2 sided;
z1side=probit(1-alpha);*1 sided; a=(delta*sqrt(n))/sd;
POWER1=1-probnorm(z1side-a); POWER2=1-probnorm(z2side-abs(a));
keep power1 power2; proc print;run;
Section 3.2.1.1, page 62
Data a;
/**input data**/
sd1=10;* assumed sd for sample 1; sd2=9;* assumed sd for sample 2;
alpha=.05; d=2.5; f=1;
/**end of input data**/
z2side=probnorm(1-alpha/2);