run_auc.m (main file)
filename = 'GPL85__GDS253__liver.csv'; % Data file (csv, first row with time points, first col with non-numeric IDs)
ci = 0.8; % Confidence interval for AUC and baseline (separately)
take_log = 1; % Take log of data?
N = 10000; % Number of bootstrap resamplings
[auc_, base, sig, reps, avg, time, pos, neg] = auc(filename, ci, take_log, N);
bi = find(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig));
up = find(pos(sig)>neg(sig) & ~(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig)));
down = find(pos(sig)<neg(sig) & ~(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig)));
auc.m
function [auc_, base, sig, reps, avg, time, pos, neg] = auc(filename, ci, take_log, N)
% Load data
data = importdata(filename, ',');
ids = data.textdata(2:end);
times = data.data(1,:);
data = data.data(2:end,:);
if take_log
data = log(data);
end
% Convenience variables
time = unique(times);
I = size(data,1); % Number of probe sets
T = length(time); % Number of time points
% Put all the data into a cell array so that each entry is a vector with
% the replicates for that probe set (row) and time point (col).
reps = cell(I, T);
avg = zeros(I, T);
for t=1:T
cols = find(times==time(t));
for i=1:I
reps{i,t} = data(i,cols);
avg(i,t) = mean(data(i,cols));
end
end
% Baselines
base.mean = zeros(I, 1);
base.std = zeros(I, 1);
base.ci_min = zeros(I, 1);
base.ci_max = zeros(I, 1);
for i=1:I
v = [reps{i,1}, reps{i,end}]; % Baseline points
% v = [reps{i,1}]; % Baseline points
base.mean(i) = mean(v); % This is the mean y-value
base.std(i) = std(v)*(time(end)-time(1)); % std of baseline AUC
z_base = norminv(1 - (1-ci)/2, 0, 1);
base.ci_min = -z_base*base.std;
base.ci_max = z_base*base.std;
end
% Find the mean and CI of the AUC
[auc_.mean, pos, neg] = auc_bailer(reps, time, base.mean); % For pos, neg
[auc_.ci_min, auc_.ci_max, auc_.std] = auc_bootstrap(N, reps, time, ci, base.mean);
sig = (auc_.ci_min > base.ci_max);
fprintf('Significant: %g of %g\n\n', sum(sig), I);
auc_bailer.m
function [auc_mean, pos, neg, auc_std] = auc_bailer(reps, time, base_mean)
I = size(reps,1); % Number of probe sets
T = size(reps,2); % Number of time points
for i=1:I
for t=1:T
reps{i,t} = reps{i,t} - base_mean(i);
end
end
pos = zeros(I, 1); % Positive area
neg = zeros(I, 1); % Negative area
% Bailer's method
w(1) = 1/2*(time(2)-time(1));
for t=2:T-1
w(t) = 1/2*(time(t+1)-time(t-1));
end
w(T) = 1/2*(time(T)-time(T-1));
auc_std = zeros(I, 1);
auc_mean = zeros(I, 1);
for i=1:I
y = zeros(1, T);
for t=1:T
y(t) = (mean(reps{i,t}));
auc_std(i) = auc_std(i) + w(t)^2 * var(reps{i,t})/length(reps{i,t});
end
[auc_mean(i), pos(i), neg(i)] = trapz_biphasic(time, y);
end
auc_std = sqrt(auc_std);
auc_bootstrap.m
function [ci_min, ci_max, stds] = auc_bootstrap(N, reps, time, ci, base_mean)
I = size(reps,1); % Number of probe sets
T = size(reps,2); % Number of time points
aucs = zeros(I, N);
if matlabpool('size') == 0
matlabpool open;
end
for i=1:I
for t=1:T
reps{i,t} = reps{i,t} - base_mean(i);
end
end
parfor n=1:N
b_reps = cell(I, T); % Bootstrapped version of reps
avg_vals = zeros(I, T); % Average expression values
for i=1:I
for t=1:T
r = reps{i,t};
for k=1:length(r)
b_reps{i,t}(k) = r(ceil(length(r)*rand)); % Random sample
end
avg_vals(i,t) = mean(b_reps{i,t});
end
end
% Numerical integration: trapezoidal rule
temp = zeros(I, 1);
for i=1:I
temp(i) = trapz_biphasic(time, avg_vals(i,:));
end
aucs(:,n) = temp; % For parfor
end
matlabpool close;
means = mean(aucs, 2);
stds = std(aucs, 0, 2);
% Calculate the confidence interval by looking at the distribution of
% bootstrap AUCs
s_aucs = sort(aucs, 2);
edge = (1-ci)/2; % Area on each edge of CI
min_cutoff = ceil(edge * N);
if min_cutoff == 0
min_cutoff = 1;
end
ci_min = s_aucs(:, min_cutoff);
max_cutoff = floor((1-edge) * N);
ci_max = s_aucs(:, max_cutoff);
trapz_biphasic.m
function [z, pos, neg] = trapz_biphasic(x, y)
z = 0;
pos = 0;
neg = 0;
s = sign(y);
temp_x = [];
temp_y = [];
s_cur = s(1);
for i=1:length(s)
if s(i) == s_cur
temp_x = [temp_x, x(i)];
temp_y = [temp_y, y(i)];
else
% Find the intercept
x_int = -y(i-1)*(x(i)-x(i-1))/(y(i)-y(i-1)) + x(i-1);
% Append the intercept to temp_x and temp_y
temp_x = [temp_x, x_int];
temp_y = [temp_y, 0];
% Calculate the partial sum
z_temp = abs(trapz(temp_x, temp_y));
z = z + z_temp;
if s_cur == 1
pos = pos + z_temp;
elseif s_cur == -1
neg = neg + z_temp;
else
error('This should not happen');
end
% Start new temp_x and temp_y with the intercepts first
temp_x = [x_int x(i)];
temp_y = [0 y(i)];
% Switch the signs
s_cur = s(i);
end
end
% Add any remaining points
z_temp = abs(trapz(temp_x, temp_y));
z = z + z_temp;
if s_cur == 1
pos = pos + z_temp;
elseif s_cur == -1
neg = neg + z_temp;
else
error('This should not happen');
end