From cffc889ba782ad6fe6694b8adc6ace1a446414d4 Mon Sep 17 00:00:00 2001 From: Incardona Pietro <incardon@mpi-cbg.de> Date: Sat, 9 Dec 2023 01:05:37 +0100 Subject: [PATCH] Moving on CNVcaller --- example/Sequencing/CNVcaller/Makefile | 12 +-- example/Sequencing/CNVcaller/main.cu | 103 +++++++++++++++++++++----- script/install_OPENBLAS.sh | 12 +-- 3 files changed, 97 insertions(+), 30 deletions(-) diff --git a/example/Sequencing/CNVcaller/Makefile b/example/Sequencing/CNVcaller/Makefile index 41c562513..821467801 100644 --- a/example/Sequencing/CNVcaller/Makefile +++ b/example/Sequencing/CNVcaller/Makefile @@ -4,14 +4,14 @@ CUDA_CC= CC=mpic++ ifdef HIP CUDA_CC=hipcc - CUDA_OPTIONS=-D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0 + CUDA_OPTIONS= -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0 LIBS_SELECT=$(LIBS) CC=hipcc else ifdef CUDA_ON_CPU CUDA_CC=mpic++ -x c++ $(INCLUDE_PATH) INCLUDE_PATH_NVCC= - CUDA_OPTIONS=-DCUDA_ON_CPU -D__NVCC__ -DCUDART_VERSION=11000 + CUDA_OPTIONS= -D__NVCC__ -DCUDART_VERSION=11000 LIBS_SELECT=$(LIBS) -lhts -lgsl else ifeq (, $(shell which nvcc)) @@ -32,19 +32,19 @@ OBJ_DEPTH = main_depth.o all: %.o: %.cu - $(CUDA_CC) -O0 -g $(CUDA_OPTIONS) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC) + $(CUDA_CC) -O0 -Wno-deprecated-enum-enum-conversion -Wno-interference-size -g $(CUDA_OPTIONS) -DSE_CLASS1 -c --std=c++20 -I/home/i-bird/openfpm_dependencies_new/EIGEN/ -Iseqan3/include/ -Iseqan3/submodules/sdsl-lite/include -Iseqan3/submodules/cereal/include -DSEQAN3_HAS_ZLIB=1 -DSEQAN3_HAS_BZIP2=1 -o $@ $< $(INCLUDE_PATH_NVCC) %.o: %.cpp $(CC) -O0 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH) CNV_caller: $(OBJ) - $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread CNV_caller_CN2: $(OBJ_CN2) - $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread CNV_caller_depth: $(OBJ_DEPTH) - $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) + $(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS_SELECT) -lz -lbz2 -pthread all: CNV_caller CNV_caller_CN2 CNV_caller_depth diff --git a/example/Sequencing/CNVcaller/main.cu b/example/Sequencing/CNVcaller/main.cu index c9b1404b7..3af9f7b43 100644 --- a/example/Sequencing/CNVcaller/main.cu +++ b/example/Sequencing/CNVcaller/main.cu @@ -3,6 +3,9 @@ * */ +//#define SKIP_LOAD +#define DEBUG_OUTPUT + //! \cond [using_openmpi] \endcond #define OPENMPI //! \cond [using_openmpi] \endcond @@ -12,7 +15,6 @@ int n_count_all = 0; -#include <htslib/sam.h> #include "VCluster/VCluster.hpp" #include "optimizer.hpp" #include <unordered_map> @@ -23,53 +25,118 @@ int n_count_all = 0; int main(int argc, char* argv[]) { - openfpm_init(&argc,&argv); - - openfpm::vector<long int> number_or_reads; + openfpm_init(&argc,&argv); short int chromosome=4; openfpm::vector<aggregate<short int,int, int>> break_point; openfpm::vector<aggregate<short int,int, int>> intervals; + openfpm::vector<aggregate<short int[256]>> bkp; - openfpm::vector<openfpm::vector<aggregate<short int,int, int, int>>> read_splits; + openfpm::vector<openfpm::vector<aggregate<short int,int, int, int, int>>> read_splits; // Read bed file for interval std::cout << "READING: Bed file" << std::endl; - read_bed_file("/nvme/bams/Twist_Exome_Core_RefSeq_Targets_b37_chr.bed",intervals,0); + read_bed_file("/nvme/bams/Twist_Comprehensive_Exome_Merged_Probes_hg19_liftover_sorted.bed",intervals,0); openfpm::vector<std::string> files_bams; - files_bams.add("/nvme/bams/DE14NGSUKBD138718_97851_cut.bam"); - files_bams.add("/nvme/bams/DE12NGSUKBD138851_96029_cut.bam"); - files_bams.add("/nvme/bams/DE06NGSUKBD138862_98760_cut.bam"); - files_bams.add("/nvme/bams/DE08NGSUKBD138729_77978_cut.bam"); - files_bams.add("/nvme/bams/DE11NGSUKBD138869_97627_cut.bam"); + files_bams.add("/nvme/bams/batch_1/DE78NGSUKBD134462_76322.bam"); + files_bams.add("/nvme/bams/DE14NGSUKBD138718_97851.bam"); + files_bams.add("/nvme/bams/DE12NGSUKBD138851_96029.bam"); + files_bams.add("/nvme/bams/DE06NGSUKBD138862_98760.bam"); + files_bams.add("/nvme/bams/DE08NGSUKBD138729_77978.bam"); + files_bams.add("/nvme/bams/DE11NGSUKBD138869_97627.bam"); + + files_bams.add("/nvme/bams/batch_1/DE47NGSUKBD116105_76321.bam"); + files_bams.add("/nvme/bams/batch_1/DE06NGSUKBD138183_97987.bam"); + files_bams.add("/nvme/bams/batch_1/DE07NGSUKBD138165_96729.bam"); + files_bams.add("/nvme/bams/batch_1/DE12NGSUKBD138172_95188.bam"); + + openfpm::vector<openfpm::vector<char>> reference; + //read_fasta("/nvme/databases/human_g1k_v37_decoy.fasta",reference); + reference.load("reference_vector"); + openfpm::vector<call_vcf> calls_to_test; + read_CNV_hypothesys("/nvme/databases/GRCh37_hg19_variants_2020-02-25.vcf.gz",calls_to_test); + openfpm::vector<openfpm::vector<seqs_call>> seqs; + CNV_event_test(calls_to_test,reference,seqs,files_bams,6); - files_bams.add("/nvme/bams/batch_1/DE78NGSUKBD134462_76322_cut.bam"); - files_bams.add("/nvme/bams/batch_1/DE47NGSUKBD116105_76321_cut.bam"); - files_bams.add("/nvme/bams/batch_1/DE06NGSUKBD138183_97987_cut.bam"); - files_bams.add("/nvme/bams/batch_1/DE07NGSUKBD138165_96729_cut.bam"); - files_bams.add("/nvme/bams/batch_1/DE12NGSUKBD138172_95188_cut.bam"); + return 0; int N_samples = files_bams.size(); + #ifndef SKIP_LOAD + // Read break points read_splits.resize(N_samples); for (int i = 0 ; i < files_bams.size() ; i++) { read_break_points(files_bams.get(i),read_splits.get(i)); + std::cout << "Break intervals: " << intervals.size() << std::endl; break_intervals(intervals,read_splits.get(i)); + std::cout << "New intervals: " << intervals.size() << std::endl; } + #endif + constexpr int N_int_opt = 1; constexpr int N_sample_opt = 10; openfpm::vector<aggregate<float[N_sample_opt+2]>> solutions; + openfpm::vector<aggregate<float[N_sample_opt+2]>> solutions2; openfpm::vector<aggregate<float[N_sample_opt]>> confidence; + openfpm::vector<aggregate<float[N_sample_opt]>> confidence2; + openfpm::vector<int> counts; + + call_single_interval_mean_depth<N_int_opt,N_sample_opt>(intervals,files_bams,counts,solutions,confidence); + call_single_interval_mean_depth_local<N_int_opt,N_sample_opt>(intervals,files_bams,counts,solutions,solutions2,confidence2); + + openfpm::vector<openfpm::vector<call_vcf>> calls; + calls.resize(N_sample_opt); + + // Get only overlapping and consistent calls + for (int j = 0 ; j < N_samples ; j++) { + for (int i = 0 ; i < solutions.size() ; i++) { + + if (intervals.get<1>(i) == 109650540){ + int debug = 0; + debug++; + } + + if (solutions.get<0>(i)[j+1] != 2 && solutions2.get<0>(i)[j+1] == solutions.get<0>(i)[j+1]) { + calls.get(j).add(); + calls.get(j).last().chromosome = intervals.get<0>(i); + calls.get(j).last().start = intervals.get<1>(i); + calls.get(j).last().stop = intervals.get<2>(i); + calls.get(j).last().CN = solutions.get<0>(i)[j+1]; + calls.get(j).last().num_region = 1; + calls.get(j).last().precise_start = intervals.get<1>(i) == (intervals.get<2>(i-1)+1); + calls.get(j).last().precise_stop = (intervals.get<2>(i)+1) == intervals.get<1>(i+1); + calls.get(j).last().quality = confidence.get<0>(i)[j]; + calls.get(j).last().efficiency.add(solutions.get<0>(i)[j]); + calls.get(j).last().residual.add(solutions.get<0>(i)[N_sample_opt+1]); + + // check how long it extends + + int k = i+1; + for ( ; k < solutions.size() ; k++) { + if (solutions.get<0>(k)[j+1] != 2 && solutions.get<0>(k)[j+1] == solutions2.get<0>(k)[j+1]) { + calls.get(j).last().stop = intervals.get<2>(k); + calls.get(j).last().num_region++; + calls.get(j).last().precise_stop = (intervals.get<2>(k)+1) == intervals.get<1>(k+1); + calls.get(j).last().quality *= confidence.get<0>(k)[j]; + calls.get(j).last().efficiency.add(solutions.get<0>(i)[j]); + calls.get(j).last().residual.add(solutions.get<0>(i)[N_sample_opt+1]); + } else { + break; + } + } + i = k-1; + } + } + } - call_single_interval_mean_depth<N_int_opt,N_sample_opt>(intervals,files_bams,solutions,confidence); - //call_single_interval_mean_depth_local<N_int_opt,N_sample_opt>(intervals,files_bams,solutions,confidence) + create_vcf_calls<10>(calls,files_bams); openfpm_finalize(); } diff --git a/script/install_OPENBLAS.sh b/script/install_OPENBLAS.sh index 0b93190d7..533725a54 100755 --- a/script/install_OPENBLAS.sh +++ b/script/install_OPENBLAS.sh @@ -18,11 +18,11 @@ if [ x"$platform" == x"darwin" ]; then tar -xf OpenBLAS-0.3.10.tar.gz cd OpenBLAS-0.3.10 else - rm -rf OpenBLAS-0.3.13 - rm -rf OpenBLAS-0.3.13.tar.gz - wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.3.13.tar.gz - tar -xf OpenBLAS-0.3.13.tar.gz - cd OpenBLAS-0.3.13 + rm -rf OpenBLAS-0.3.24 + #rm -rf OpenBLAS-0.3.24.tar.gz + #wget http://ppmcore.mpi-cbg.de/upload/OpenBLAS-0.3.13.tar.gz + tar -xf OpenBLAS-0.3.24.tar.gz + cd OpenBLAS-0.3.24 fi #wget http://ppmcore.mpi-cbg.de/upload/openblas.diff @@ -39,7 +39,7 @@ make install PREFIX=$1/OPENBLAS if [ ! "$(ls -A $1/OPENBLAS)" ]; then rm -rf $1/OPENBLAS else - rm -rf OpenBLAS-0.3.13 + rm -rf OpenBLAS-0.3.24 rm -rf OpenBLAS-0.3.10 echo 3 > $1/OPENBLAS/version exit 0 -- GitLab