run_auc.m (main file)

filename = 'GPL85__GDS253__liver.csv'; % Data file (csv, first row with time points, first col with non-numeric IDs)

ci = 0.8; % Confidence interval for AUC and baseline (separately)

take_log = 1; % Take log of data?

N = 10000; % Number of bootstrap resamplings

[auc_, base, sig, reps, avg, time, pos, neg] = auc(filename, ci, take_log, N);

bi = find(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig));

up = find(pos(sig)>neg(sig) & ~(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig)));

down = find(pos(sig)<neg(sig) & ~(neg(sig)>0.5*pos(sig) & pos(sig)>0.5*neg(sig)));

auc.m

function [auc_, base, sig, reps, avg, time, pos, neg] = auc(filename, ci, take_log, N)

% Load data

data = importdata(filename, ',');

ids = data.textdata(2:end);

times = data.data(1,:);

data = data.data(2:end,:);

if take_log

data = log(data);

end

% Convenience variables

time = unique(times);

I = size(data,1); % Number of probe sets

T = length(time); % Number of time points

% Put all the data into a cell array so that each entry is a vector with

% the replicates for that probe set (row) and time point (col).

reps = cell(I, T);

avg = zeros(I, T);

for t=1:T

cols = find(times==time(t));

for i=1:I

reps{i,t} = data(i,cols);

avg(i,t) = mean(data(i,cols));

end

end

% Baselines

base.mean = zeros(I, 1);

base.std = zeros(I, 1);

base.ci_min = zeros(I, 1);

base.ci_max = zeros(I, 1);

for i=1:I

v = [reps{i,1}, reps{i,end}]; % Baseline points

% v = [reps{i,1}]; % Baseline points

base.mean(i) = mean(v); % This is the mean y-value

base.std(i) = std(v)*(time(end)-time(1)); % std of baseline AUC

z_base = norminv(1 - (1-ci)/2, 0, 1);

base.ci_min = -z_base*base.std;

base.ci_max = z_base*base.std;

end

% Find the mean and CI of the AUC

[auc_.mean, pos, neg] = auc_bailer(reps, time, base.mean); % For pos, neg

[auc_.ci_min, auc_.ci_max, auc_.std] = auc_bootstrap(N, reps, time, ci, base.mean);

sig = (auc_.ci_min > base.ci_max);

fprintf('Significant: %g of %g\n\n', sum(sig), I);

auc_bailer.m

function [auc_mean, pos, neg, auc_std] = auc_bailer(reps, time, base_mean)

I = size(reps,1); % Number of probe sets

T = size(reps,2); % Number of time points

for i=1:I

for t=1:T

reps{i,t} = reps{i,t} - base_mean(i);

end

end

pos = zeros(I, 1); % Positive area

neg = zeros(I, 1); % Negative area

% Bailer's method

w(1) = 1/2*(time(2)-time(1));

for t=2:T-1

w(t) = 1/2*(time(t+1)-time(t-1));

end

w(T) = 1/2*(time(T)-time(T-1));

auc_std = zeros(I, 1);

auc_mean = zeros(I, 1);

for i=1:I

y = zeros(1, T);

for t=1:T

y(t) = (mean(reps{i,t}));

auc_std(i) = auc_std(i) + w(t)^2 * var(reps{i,t})/length(reps{i,t});

end

[auc_mean(i), pos(i), neg(i)] = trapz_biphasic(time, y);

end

auc_std = sqrt(auc_std);

auc_bootstrap.m

function [ci_min, ci_max, stds] = auc_bootstrap(N, reps, time, ci, base_mean)

I = size(reps,1); % Number of probe sets

T = size(reps,2); % Number of time points

aucs = zeros(I, N);

if matlabpool('size') == 0

matlabpool open;

end

for i=1:I

for t=1:T

reps{i,t} = reps{i,t} - base_mean(i);

end

end

parfor n=1:N

b_reps = cell(I, T); % Bootstrapped version of reps

avg_vals = zeros(I, T); % Average expression values

for i=1:I

for t=1:T

r = reps{i,t};

for k=1:length(r)

b_reps{i,t}(k) = r(ceil(length(r)*rand)); % Random sample

end

avg_vals(i,t) = mean(b_reps{i,t});

end

end

% Numerical integration: trapezoidal rule

temp = zeros(I, 1);

for i=1:I

temp(i) = trapz_biphasic(time, avg_vals(i,:));

end

aucs(:,n) = temp; % For parfor

end

matlabpool close;

means = mean(aucs, 2);

stds = std(aucs, 0, 2);

% Calculate the confidence interval by looking at the distribution of

% bootstrap AUCs

s_aucs = sort(aucs, 2);

edge = (1-ci)/2; % Area on each edge of CI

min_cutoff = ceil(edge * N);

if min_cutoff == 0

min_cutoff = 1;

end

ci_min = s_aucs(:, min_cutoff);

max_cutoff = floor((1-edge) * N);

ci_max = s_aucs(:, max_cutoff);

trapz_biphasic.m

function [z, pos, neg] = trapz_biphasic(x, y)

z = 0;

pos = 0;

neg = 0;

s = sign(y);

temp_x = [];

temp_y = [];

s_cur = s(1);

for i=1:length(s)

if s(i) == s_cur

temp_x = [temp_x, x(i)];

temp_y = [temp_y, y(i)];

else

% Find the intercept

x_int = -y(i-1)*(x(i)-x(i-1))/(y(i)-y(i-1)) + x(i-1);

% Append the intercept to temp_x and temp_y

temp_x = [temp_x, x_int];

temp_y = [temp_y, 0];

% Calculate the partial sum

z_temp = abs(trapz(temp_x, temp_y));

z = z + z_temp;

if s_cur == 1

pos = pos + z_temp;

elseif s_cur == -1

neg = neg + z_temp;

else

error('This should not happen');

end

% Start new temp_x and temp_y with the intercepts first

temp_x = [x_int x(i)];

temp_y = [0 y(i)];

% Switch the signs

s_cur = s(i);

end

end

% Add any remaining points

z_temp = abs(trapz(temp_x, temp_y));

z = z + z_temp;

if s_cur == 1

pos = pos + z_temp;

elseif s_cur == -1

neg = neg + z_temp;

else

error('This should not happen');

end