ST2137 Computer Aided Data Analysis

Midterm Solutions

Problem 1 (Total 20pts):

(1)F (2pts)

(2)T (2pts)

(3)F (2pts)

(4)F (2pts)

(5)F (2pts)

(6)T (2pts)

(7)F (2pts)

(8)F (2pts)

(9)T (2pts)

(10)T (2pts)

Problem 2 (Total 20pts):

(a)(Total 8pts)

data P2a; /* 1pt */

infile “G:\ST2137\P2a.txt”; /* 1pt */

input id 1-3 gender$ 4 height 5-7 weight 8-9; /* 2pt */

/* input @1 id 3.0 @4 gender$1 @5 height 3.0 @8 weight 2.0; */

run;

data P2b; /* 1pt */

infile “G:\ST2137\P2b.txt” ; /* 1pt */

input id 1-3 test 4-6; /* 1pt */

/* input @1 id 3.0 @4 test 3.0;*/

run; /* 1pt */

(b) (Total 7pts)

proc sort data=P2a; /* 1pt */

by id; /* 1pt */

proc sort data=P2b; /* 1pt */

by id;

data P2ab; /* 1pt */

merge P2a P2b; /* 1pt */

by id; /* 1pt */

run; /* 1pt */

(c ) (Total 5pts)

data P2ab1; /* 1pt */

set P2ab; /* 1pt */

if test>=90 then grade="A"; /* 1pt */

else if test>=70 then grade="B"; /* 1pt */

else grade=”C”; /* 1pt */

run;

Problem 3 (Total 20 pts):

(a)(Total 9 pts)

proc format; /* 1pt */

value $genfmt “M”=”male”

“F”=”female”; /* 1pt */

value $gradefmt “A”=”test>=90”

“B”=”70<=test<90”

“C”=”test<70”; /* 1pt */

proc freq data=P2ab1; /* 1pt */

title “Two-way Table for gender by grade”; /* 1pt */

tables gender*grade/chisq; /* 2pt */

format gender $genfmt. /* 1pt */

grade $gradefmt.; /* 1pt */

run;

(b) (Total 4 pts)

proc univariate data=P2ab1; /* 1pt */

var height; /* 1pt */

class gender; /* 1pt */

run; /* 1pt */

(c)(Total 7 pts)

proc sort data=P2ab1; /* 1pt */

by gender; /* 1pt */

proc gplot data=P2ab1; /* 1pt */

title “Scatter plot for weight and height for Male and Female”; /* 1pt */

by gender; /* 1pt */

plot weight*height; /* 1pt */

run; /* 1pt */

Problem 4 (Total 25 pts)

(a) (6 pts)

varnames<-c(“id”,”gender”,”height”,”weight”); #(2pts)

P4a=read.fwf(“G:/ST2137/P2a.txt”,header=F, col.names=varnames,width=c(3,1,3,2)); #(4pts)

## Alternative solution

varnames<-c(“id”,”gender”,”height”,”weight”); ##(2pts)

P4a=read.fwf(“G:/ST2137/P2a.txt”,header=F, width=c(3,1,3,2)); ## (3pts)

names(P4a)<-varnames; ##(1pt)

(b)(9pts)

(1) attach(P4a); # (1pt)

c(mean(height),var(height),median(height), quantile(height,.37)); #(4pts)

(2) attach(P4a);

table(gender); #(2pts)

(3) write.table(P4a,”G:/ST2137/P4a.txt”) #(2pts)

(c)(10 pts) ## Solution looks like

x<-numeric(2);

sink(“G:/ST2137/P4b.txt”) # (2pts)

x[1]=0;x[2]=1;test=1; #(1pt)

cat(“x[1]=”,x[1],”\n”);

cat(“x[2]=”,x[2],”\n”); #(1pt)

count=0; #(1pt)

while (test>=0){

count<-count+1; #(1pt)

x<-c(x,0);

x[count+2]=2*x[count+1]-3*x[count]; #(1pt)

test=x[count+2]; #(1pt)

cat(“x[“,count+2,”]=”,test,”\n”); #(2pt)

}

Sink(); #(1pt)

Problem 5 (Total 15 pts)

(a)(5pts) Steps

(1) Click ``Data", then click ``Sort cases" %% 1pt

(2) Click ``Subject", then move ``Subject" to the right panel %% 2pts

(3) Click ``Descending" button %% 1pt

(4) Click ``OK" %% 1pt

(b)(5pts) Steps

(1) Click ``Transform", then click ``Compute Variable" %% 1pt

(2) Put ``Average" into the ``Target Variable" panel %%1pt

(3) Write ``(Exam1+Exam2)/2" in the ``Numerical Expression" panel %% 2pts

(4) Click ``OK" %%1pt

(c )(5 pts) Steps

(1) Click ``Graph", then click ``Legacy Dialogs", then click ``Histogram" %% 1pt

(2)Click “Variable View” and label the variable “Risk” as “1=Average Risk” and “2=High Risk” %% 1pt

(3) Put ``Return" into the ``Variable" panel %% 1pt

(4) Put ``Risk" into the ``Columns" panel %% 1pt

(5) Click ``OK" %% 1pt