ST2137 Computer Aided Data Analysis
Midterm Solutions
Problem 1 (Total 20pts):
(1)F (2pts)
(2)T (2pts)
(3)F (2pts)
(4)F (2pts)
(5)F (2pts)
(6)T (2pts)
(7)F (2pts)
(8)F (2pts)
(9)T (2pts)
(10)T (2pts)
Problem 2 (Total 20pts):
(a)(Total 8pts)
data P2a; /* 1pt */
infile “G:\ST2137\P2a.txt”; /* 1pt */
input id 1-3 gender$ 4 height 5-7 weight 8-9; /* 2pt */
/* input @1 id 3.0 @4 gender$1 @5 height 3.0 @8 weight 2.0; */
run;
data P2b; /* 1pt */
infile “G:\ST2137\P2b.txt” ; /* 1pt */
input id 1-3 test 4-6; /* 1pt */
/* input @1 id 3.0 @4 test 3.0;*/
run; /* 1pt */
(b) (Total 7pts)
proc sort data=P2a; /* 1pt */
by id; /* 1pt */
proc sort data=P2b; /* 1pt */
by id;
data P2ab; /* 1pt */
merge P2a P2b; /* 1pt */
by id; /* 1pt */
run; /* 1pt */
(c ) (Total 5pts)
data P2ab1; /* 1pt */
set P2ab; /* 1pt */
if test>=90 then grade="A"; /* 1pt */
else if test>=70 then grade="B"; /* 1pt */
else grade=”C”; /* 1pt */
run;
Problem 3 (Total 20 pts):
(a)(Total 9 pts)
proc format; /* 1pt */
value $genfmt “M”=”male”
“F”=”female”; /* 1pt */
value $gradefmt “A”=”test>=90”
“B”=”70<=test<90”
“C”=”test<70”; /* 1pt */
proc freq data=P2ab1; /* 1pt */
title “Two-way Table for gender by grade”; /* 1pt */
tables gender*grade/chisq; /* 2pt */
format gender $genfmt. /* 1pt */
grade $gradefmt.; /* 1pt */
run;
(b) (Total 4 pts)
proc univariate data=P2ab1; /* 1pt */
var height; /* 1pt */
class gender; /* 1pt */
run; /* 1pt */
(c)(Total 7 pts)
proc sort data=P2ab1; /* 1pt */
by gender; /* 1pt */
proc gplot data=P2ab1; /* 1pt */
title “Scatter plot for weight and height for Male and Female”; /* 1pt */
by gender; /* 1pt */
plot weight*height; /* 1pt */
run; /* 1pt */
Problem 4 (Total 25 pts)
(a) (6 pts)
varnames<-c(“id”,”gender”,”height”,”weight”); #(2pts)
P4a=read.fwf(“G:/ST2137/P2a.txt”,header=F, col.names=varnames,width=c(3,1,3,2)); #(4pts)
## Alternative solution
varnames<-c(“id”,”gender”,”height”,”weight”); ##(2pts)
P4a=read.fwf(“G:/ST2137/P2a.txt”,header=F, width=c(3,1,3,2)); ## (3pts)
names(P4a)<-varnames; ##(1pt)
(b)(9pts)
(1) attach(P4a); # (1pt)
c(mean(height),var(height),median(height), quantile(height,.37)); #(4pts)
(2) attach(P4a);
table(gender); #(2pts)
(3) write.table(P4a,”G:/ST2137/P4a.txt”) #(2pts)
(c)(10 pts) ## Solution looks like
x<-numeric(2);
sink(“G:/ST2137/P4b.txt”) # (2pts)
x[1]=0;x[2]=1;test=1; #(1pt)
cat(“x[1]=”,x[1],”\n”);
cat(“x[2]=”,x[2],”\n”); #(1pt)
count=0; #(1pt)
while (test>=0){
count<-count+1; #(1pt)
x<-c(x,0);
x[count+2]=2*x[count+1]-3*x[count]; #(1pt)
test=x[count+2]; #(1pt)
cat(“x[“,count+2,”]=”,test,”\n”); #(2pt)
}
Sink(); #(1pt)
Problem 5 (Total 15 pts)
(a)(5pts) Steps
(1) Click ``Data", then click ``Sort cases" %% 1pt
(2) Click ``Subject", then move ``Subject" to the right panel %% 2pts
(3) Click ``Descending" button %% 1pt
(4) Click ``OK" %% 1pt
(b)(5pts) Steps
(1) Click ``Transform", then click ``Compute Variable" %% 1pt
(2) Put ``Average" into the ``Target Variable" panel %%1pt
(3) Write ``(Exam1+Exam2)/2" in the ``Numerical Expression" panel %% 2pts
(4) Click ``OK" %%1pt
(c )(5 pts) Steps
(1) Click ``Graph", then click ``Legacy Dialogs", then click ``Histogram" %% 1pt
(2)Click “Variable View” and label the variable “Risk” as “1=Average Risk” and “2=High Risk” %% 1pt
(3) Put ``Return" into the ``Variable" panel %% 1pt
(4) Put ``Risk" into the ``Columns" panel %% 1pt
(5) Click ``OK" %% 1pt