Skip to content
Snippets Groups Projects
Commit d076a7fe authored by Lena Hersemann's avatar Lena Hersemann
Browse files

added updated Matlab script version

parent c0a534f6
No related branches found
No related tags found
No related merge requests found
......@@ -4,42 +4,32 @@ close all;
%% load and process data
[facs_gfp_dat, txt_facs, ~] = xlsread('Data/facs_gfp_intensities.xlsx');
[imaging_gfp, ~, ~] = xlsread('Data/imaging_gfp_intensities.xlsx');
[imaging_live_data, txt_live, raw] = xlsread('Data/live_imaged_cells_gfp_and_info_to_map.xlsx');
[imaging_gfp, ~, ~] = xlsread('New_data_SD4/day1-127_all_gfp-control_all.xlsx');
[rna_seq_data, txt_seq, raw] = xlsread('Data/sequenced_cells_facs_gfp_cluster_info.xlsx');
facs_gfp = facs_gfp_dat(1:end, strcmp(txt_facs(1, :), 'gfp_over_fsc'));
trackName = txt_live(2:end, find(strcmp(txt_live(1, :), 'full_track_name')));
division = imaging_live_data(:, find(strcmp(txt_live(1, :), 'division'))-1);
imaging_live_gfp = imaging_live_data(:, find(strcmp(txt_live(1, :), 'Intensity_Mean_GFP_261'))-1);
imaging_live_speed = imaging_live_data(:, find(strcmp(txt_live(1, :), 'average_speed_fixed'))-1);
imaging_live_gfp = imaging_gfp;
cellName = txt_seq(2:end, find(strcmp(txt_seq(1, :), 'cell_id')));
seq_gfp = rna_seq_data(1:end, find(strcmp(txt_seq(1, :), 'gfp_over_fsc'))-1);
seq_cluster = txt_seq(2:end, find(strcmp(txt_seq(1, :), 'clusters')));
clusterIDs = unique(seq_cluster);
subplot(1,4,1);
subplot(1,3,1);
hist(log10(facs_gfp), 50);
title('FACS');
xlabel('gfp (log10)');
xlabel('GFP (log10)');
subplot(1,4,2);
subplot(1,3,2);
hist(log10(imaging_gfp), 50);
title('Imaging');
xlabel('gfp (log10)');
xlabel('GFP (log10)');
subplot(1,4,3);
hist(log10(imaging_live_gfp), 20);
title('Live imaging');
xlabel('gfp (log10)');
subplot(1,4,4);
subplot(1,3,3);
hist(log10(seq_gfp), 20);
title('RNA Seq');
xlabel('gfp (log10)');
xlabel('GFP (log10)');
%% calculate cluster GFP distributions using Gaussian mixture model
......@@ -63,7 +53,7 @@ for k=1:length(clusterIDs)
figure(2);
subplot(1,length(clusterIDs), k);
bar(b, h); hold on;
xlabel('gfp (log10)');
xlabel('GFP (log10)');
ylabel('Density');
Options.MaxIter = 1000;
......@@ -90,7 +80,7 @@ for k=1:length(clusterIDs)
figure(3);
plot(gfpGrid, pdf(cluster_distributions{k}, gfpGrid), 'LineWidth', 2); hold on;
xlabel('gfp (log10)');
xlabel('GFP (log10)');
fprintf('Clusters required %d\n', minIdx);
end
......@@ -102,12 +92,12 @@ end
quantile_transformed_gfp = [];
validLiveIdx = [];
for n=1:length(imaging_live_gfp)
for n=1:length(imaging_gfp)
currGfp = imaging_live_gfp(n);
currGfp = imaging_gfp(n);
if (~isnan(currGfp))
validIdx = find(currGfp < gfpVecImaging);
validIdx = find(currGfp == gfpVecImaging);
quantileGfp = cdfImagingGFP(validIdx(1));
......@@ -120,20 +110,17 @@ for n=1:length(imaging_live_gfp)
end
%kick out cells which have no GFP measurement
division = division(validLiveIdx);
imaging_live_gfp = imaging_live_gfp(validLiveIdx);
imaging_live_speed = imaging_live_speed(validLiveIdx);
track_name = trackName(validLiveIdx);
imaging_gfp = imaging_gfp(validLiveIdx);
figure;
subplot(1,2,1);
qqplot(facs_gfp, imaging_live_gfp);
xlabel('gfp (log10)');
qqplot(facs_gfp, imaging_gfp);
xlabel('GFP');
ylabel('Live imaging');
subplot(1,2,2);
qqplot(facs_gfp, quantile_transformed_gfp);
xlabel('gfp (log10)');
xlabel('GFP');
ylabel('Live imaging (after transform)');
%% Perform classification using quantile-transformed gfp values and the previously fitted Gaussian Mixtures
......@@ -148,27 +135,7 @@ for n=1:length(quantile_transformed_gfp)
[maxL, clusterId_pred(n)] = max(likelihood);
end
%% Select only non-dividing cells and perform analysis of imaging-based features
nonDividers = find(~division);
for k=1:length(clusterIDs)
figure(19);
subplot(1,2,1);
vals = imaging_live_gfp(nonDividers);
vals = vals(clusterId_pred(nonDividers)==k);
errorbar(k, mean(vals), std(vals)/sqrt(length(vals)), 'or', 'LineWidth', 2); hold on;
ylabel('gfp (log10)');
ylabel('Cluster Idx');
subplot(1,2,2);
vals = imaging_live_speed(nonDividers);
vals = vals(clusterId_pred(nonDividers)==k);
errorbar(k, mean(vals), std(vals)/sqrt(length(vals)), 'or', 'LineWidth', 2); hold on;
ylabel('Speed');
ylabel('Cluster Idx');
figure(20);
......@@ -186,15 +153,15 @@ end
%%Export processed and annotated imaging data in xls format
Dat = [division, imaging_live_gfp, imaging_live_speed, clusterId_pred'];
Dat = [imaging_live_gfp, clusterId_pred'];
cell_idx = validLiveIdx';
quantile_transformed_gfp = quantile_transformed_gfp';
clusterId_pred = clusterId_pred';
T = table(track_name, cell_idx, division, imaging_live_gfp, quantile_transformed_gfp, imaging_live_speed, clusterId_pred);
quantile_transformed_gfp_f = quantile_transformed_gfp';
clusterId_pred_f = clusterId_pred';
T = table(cell_idx, imaging_live_gfp, quantile_transformed_gfp_f, clusterId_pred_f);
writetable(T, 'imaging_data_processed.xlsx');
writetable(T, 'imaging_data_processed_SD4.xlsx');
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment