From cffc889ba782ad6fe6694b8adc6ace1a446414d4 Mon Sep 17 00:00:00 2001
From: Incardona Pietro <incardon@mpi-cbg.de>
Date: Sat, 9 Dec 2023 01:05:37 +0100
Subject: [PATCH] Moving on CNVcaller

---
 example/Sequencing/CNVcaller/Makefile |  12 +--
 example/Sequencing/CNVcaller/main.cu  | 103 +++++++++++++++++++++-----
 script/install_OPENBLAS.sh            |  12 +--
 3 files changed, 97 insertions(+), 30 deletions(-)

diff --git a/example/Sequencing/CNVcaller/Makefile b/example/Sequencing/CNVcaller/Makefile
index 41c562513..821467801 100644
--- a/example/Sequencing/CNVcaller/Makefile
+++ b/example/Sequencing/CNVcaller/Makefile
@@ -4,14 +4,14 @@ CUDA_CC=
 CC=mpic++
 ifdef HIP
 	CUDA_CC=hipcc
-	CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
+	CUDA_OPTIONS= -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0
 	LIBS_SELECT=$(LIBS)
 	CC=hipcc
 else
 	ifdef CUDA_ON_CPU
 		CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH)
         	INCLUDE_PATH_NVCC=
-        	CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000
+        	CUDA_OPTIONS= -D__NVCC__ -DCUDART_VERSION=11000
         	LIBS_SELECT=$(LIBS) -lhts -lgsl
 	else
 		ifeq (, $(shell which nvcc))
@@ -32,19 +32,19 @@ OBJ_DEPTH = main_depth.o
 all:
 
 %.o: %.cu
-	$(CUDA_CC) -O0 -g $(CUDA_OPTIONS) -c --std=c++14  -o $@ $< $(INCLUDE_PATH_NVCC)
+	$(CUDA_CC) -O0 -Wno-deprecated-enum-enum-conversion -Wno-interference-size  -g $(CUDA_OPTIONS) -DSE_CLASS1  -c --std=c++20 -I/home/i-bird/openfpm_dependencies_new/EIGEN/  -Iseqan3/include/ -Iseqan3/submodules/sdsl-lite/include -Iseqan3/submodules/cereal/include -DSEQAN3_HAS_ZLIB=1 -DSEQAN3_HAS_BZIP2=1  -o $@ $< $(INCLUDE_PATH_NVCC)
 
 %.o: %.cpp
 	$(CC) -O0 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
 
 CNV_caller: $(OBJ)
-	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread
 
 CNV_caller_CN2: $(OBJ_CN2)
-	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread
 
 CNV_caller_depth: $(OBJ_DEPTH)
-	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT)
+	$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread
 
 all: CNV_caller CNV_caller_CN2 CNV_caller_depth
 
diff --git a/example/Sequencing/CNVcaller/main.cu b/example/Sequencing/CNVcaller/main.cu
index c9b1404b7..3af9f7b43 100644
--- a/example/Sequencing/CNVcaller/main.cu
+++ b/example/Sequencing/CNVcaller/main.cu
@@ -3,6 +3,9 @@
  *
  */
 
+//#define SKIP_LOAD
+#define DEBUG_OUTPUT
+
 //! \cond [using_openmpi] \endcond
 #define OPENMPI
 //! \cond [using_openmpi] \endcond
@@ -12,7 +15,6 @@
 
 int n_count_all = 0;
 
-#include <htslib/sam.h>
 #include "VCluster/VCluster.hpp"
 #include "optimizer.hpp"
 #include <unordered_map>
@@ -23,53 +25,118 @@ int n_count_all = 0;
 
 int main(int argc, char* argv[])
 {
-    openfpm_init(&argc,&argv);
-
-    openfpm::vector<long int> number_or_reads;
+    openfpm_init(&argc,&argv);   
 
     short int chromosome=4;
 
     openfpm::vector<aggregate<short int,int, int>> break_point;
     openfpm::vector<aggregate<short int,int, int>> intervals;
+    openfpm::vector<aggregate<short int[256]>> bkp;
 
-    openfpm::vector<openfpm::vector<aggregate<short int,int, int, int>>> read_splits;
+    openfpm::vector<openfpm::vector<aggregate<short int,int, int, int, int>>> read_splits;
 
     // Read bed file for interval
 
     std::cout << "READING: Bed file" << std::endl;
-    read_bed_file("/nvme/bams/Twist_Exome_Core_RefSeq_Targets_b37_chr.bed",intervals,0);
+    read_bed_file("/nvme/bams/Twist_Comprehensive_Exome_Merged_Probes_hg19_liftover_sorted.bed",intervals,0);
 
     openfpm::vector<std::string> files_bams;
 
-    files_bams.add("/nvme/bams/DE14NGSUKBD138718_97851_cut.bam");
-    files_bams.add("/nvme/bams/DE12NGSUKBD138851_96029_cut.bam");
-    files_bams.add("/nvme/bams/DE06NGSUKBD138862_98760_cut.bam");
-    files_bams.add("/nvme/bams/DE08NGSUKBD138729_77978_cut.bam");
-    files_bams.add("/nvme/bams/DE11NGSUKBD138869_97627_cut.bam");
+    files_bams.add("/nvme/bams/batch_1/DE78NGSUKBD134462_76322.bam");
+    files_bams.add("/nvme/bams/DE14NGSUKBD138718_97851.bam");
+    files_bams.add("/nvme/bams/DE12NGSUKBD138851_96029.bam");
+    files_bams.add("/nvme/bams/DE06NGSUKBD138862_98760.bam");
+    files_bams.add("/nvme/bams/DE08NGSUKBD138729_77978.bam");
+    files_bams.add("/nvme/bams/DE11NGSUKBD138869_97627.bam");
+
+    files_bams.add("/nvme/bams/batch_1/DE47NGSUKBD116105_76321.bam");
+    files_bams.add("/nvme/bams/batch_1/DE06NGSUKBD138183_97987.bam");
+    files_bams.add("/nvme/bams/batch_1/DE07NGSUKBD138165_96729.bam");
+    files_bams.add("/nvme/bams/batch_1/DE12NGSUKBD138172_95188.bam");
+
+    openfpm::vector<openfpm::vector<char>> reference;
+    //read_fasta("/nvme/databases/human_g1k_v37_decoy.fasta",reference);
+    reference.load("reference_vector");
+    openfpm::vector<call_vcf> calls_to_test;
+    read_CNV_hypothesys("/nvme/databases/GRCh37_hg19_variants_2020-02-25.vcf.gz",calls_to_test);
+    openfpm::vector<openfpm::vector<seqs_call>> seqs;
+    CNV_event_test(calls_to_test,reference,seqs,files_bams,6);
 
-    files_bams.add("/nvme/bams/batch_1/DE78NGSUKBD134462_76322_cut.bam");
-    files_bams.add("/nvme/bams/batch_1/DE47NGSUKBD116105_76321_cut.bam");
-    files_bams.add("/nvme/bams/batch_1/DE06NGSUKBD138183_97987_cut.bam");
-    files_bams.add("/nvme/bams/batch_1/DE07NGSUKBD138165_96729_cut.bam");
-    files_bams.add("/nvme/bams/batch_1/DE12NGSUKBD138172_95188_cut.bam");
+    return 0;
 
     int N_samples = files_bams.size();
 
+    #ifndef SKIP_LOAD
+
     // Read break points
     read_splits.resize(N_samples);
     for (int i = 0 ; i < files_bams.size() ; i++) {
         read_break_points(files_bams.get(i),read_splits.get(i));
+        std::cout << "Break intervals: " << intervals.size() << std::endl;
         break_intervals(intervals,read_splits.get(i));
+        std::cout << "New intervals: " << intervals.size() << std::endl;
     }
 
+    #endif
+
     constexpr int N_int_opt = 1;
     constexpr int N_sample_opt = 10;
 
     openfpm::vector<aggregate<float[N_sample_opt+2]>> solutions;
+    openfpm::vector<aggregate<float[N_sample_opt+2]>> solutions2;
     openfpm::vector<aggregate<float[N_sample_opt]>> confidence;
+    openfpm::vector<aggregate<float[N_sample_opt]>> confidence2;
+    openfpm::vector<int> counts;
+
+    call_single_interval_mean_depth<N_int_opt,N_sample_opt>(intervals,files_bams,counts,solutions,confidence);
+    call_single_interval_mean_depth_local<N_int_opt,N_sample_opt>(intervals,files_bams,counts,solutions,solutions2,confidence2);
+
+    openfpm::vector<openfpm::vector<call_vcf>> calls;
+    calls.resize(N_sample_opt);
+
+    // Get only overlapping and consistent calls
+    for (int j = 0 ; j < N_samples ; j++) {
+        for (int i = 0 ; i < solutions.size() ; i++) {
+
+            if (intervals.get<1>(i) == 109650540){
+                int debug = 0;
+                debug++;
+            }
+
+            if (solutions.get<0>(i)[j+1] != 2 && solutions2.get<0>(i)[j+1] == solutions.get<0>(i)[j+1]) {
+                calls.get(j).add();
+                calls.get(j).last().chromosome = intervals.get<0>(i);
+                calls.get(j).last().start = intervals.get<1>(i);
+                calls.get(j).last().stop = intervals.get<2>(i);
+                calls.get(j).last().CN = solutions.get<0>(i)[j+1];
+                calls.get(j).last().num_region = 1;
+                calls.get(j).last().precise_start = intervals.get<1>(i) == (intervals.get<2>(i-1)+1);
+                calls.get(j).last().precise_stop = (intervals.get<2>(i)+1) == intervals.get<1>(i+1);
+                calls.get(j).last().quality = confidence.get<0>(i)[j];
+                calls.get(j).last().efficiency.add(solutions.get<0>(i)[j]);
+                calls.get(j).last().residual.add(solutions.get<0>(i)[N_sample_opt+1]);
+
+                // check how long it extends
+
+                int k = i+1;
+                for ( ; k < solutions.size() ; k++) {
+                    if (solutions.get<0>(k)[j+1] != 2 && solutions.get<0>(k)[j+1] == solutions2.get<0>(k)[j+1]) {
+                        calls.get(j).last().stop = intervals.get<2>(k);
+                        calls.get(j).last().num_region++;
+                        calls.get(j).last().precise_stop = (intervals.get<2>(k)+1) == intervals.get<1>(k+1);
+                        calls.get(j).last().quality *= confidence.get<0>(k)[j];
+                        calls.get(j).last().efficiency.add(solutions.get<0>(i)[j]);
+                        calls.get(j).last().residual.add(solutions.get<0>(i)[N_sample_opt+1]);
+                    } else {
+                        break;
+                    }
+                }
+                i = k-1;
+            }
+        }
+    }
 
-    call_single_interval_mean_depth<N_int_opt,N_sample_opt>(intervals,files_bams,solutions,confidence);
-    //call_single_interval_mean_depth_local<N_int_opt,N_sample_opt>(intervals,files_bams,solutions,confidence)
+    create_vcf_calls<10>(calls,files_bams);
 
 	openfpm_finalize();
 }
diff --git a/script/install_OPENBLAS.sh b/script/install_OPENBLAS.sh
index 0b93190d7..533725a54 100755
--- a/script/install_OPENBLAS.sh
+++ b/script/install_OPENBLAS.sh
@@ -18,11 +18,11 @@ if [ x"$platform" == x"darwin" ]; then
   tar -xf OpenBLAS-0.3.10.tar.gz
   cd OpenBLAS-0.3.10
 else
-  rm -rf OpenBLAS-0.3.13
-  rm -rf OpenBLAS-0.3.13.tar.gz
-  wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.3.13.tar.gz
-  tar -xf OpenBLAS-0.3.13.tar.gz
-  cd OpenBLAS-0.3.13
+  rm -rf OpenBLAS-0.3.24
+  #rm -rf OpenBLAS-0.3.24.tar.gz
+  #wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.3.13.tar.gz
+  tar -xf OpenBLAS-0.3.24.tar.gz
+  cd OpenBLAS-0.3.24
 fi
 
 #wget http://ppmcore.mpi-cbg.de/upload/openblas.diff
@@ -39,7 +39,7 @@ make install PREFIX=$1/OPENBLAS
 if [ ! "$(ls -A $1/OPENBLAS)" ]; then
    rm -rf $1/OPENBLAS
 else
-   rm -rf OpenBLAS-0.3.13
+   rm -rf OpenBLAS-0.3.24
    rm -rf OpenBLAS-0.3.10
    echo 3 > $1/OPENBLAS/version
    exit 0
-- 
GitLab