diff --git a/example/Sequencing/CNVcaller/functions.hpp b/example/Sequencing/CNVcaller/functions.hpp index 2b2de26d71cbc5f3dd3a582d0f77f26323f7a4dd..c917f3a73d7580598d4798e3f5fac896d081c875 100644 --- a/example/Sequencing/CNVcaller/functions.hpp +++ b/example/Sequencing/CNVcaller/functions.hpp @@ -2422,11 +2422,6 @@ void collect_reads_for_call_evidence_forward(std::string & bam_file, } } - if (best_sw2 > best_sw1) { - int debug = 0; - debug++; - } - if (best_sw1 >= len-sub_len || best_sw2 >= len-sub_len) { seqs.get<0>(cid).add(); int last = seqs.get<0>(cid).size() - 1; @@ -2640,6 +2635,21 @@ int get_32bases_pattern(int chrom, return 0; } +size_t convert_to_bit_pattern(const char * bases) { + size_t bases_pattern = 0; + for (int k = 0 ; k < 32 ; k++) { + bases_pattern = bases_pattern << 2; + char base_bit; + if (bases[k] == 'A') {base_bit = 0x00;} + else if (bases[k] == 'C') {base_bit = 0x01;} + else if (bases[k] == 'T') {base_bit = 0x02;} + else if (bases[k] == 'G') {base_bit = 0x03;} + bases_pattern |= ((size_t)base_bit); + } + + return bases_pattern; +} + int get_Nbases_pattern(int chrom, int start, openfpm::vector<openfpm::vector<char>> & chr_sequences, @@ -2728,11 +2738,75 @@ size_t add_32bases_pattern(int chrom, return bases_pattern; } +size_t create_seed_variation_from_variant(int chrom, + int rec_pos, + int start, + size_t bases_pattern, + const char * ref, + const char * alt, + openfpm::vector<openfpm::vector<char>> & chr_sequences) { + int pos_inv = 31-(rec_pos - start); + int pos = rec_pos - start; + + // delete the base in bases_pattern + + int n_ref = std::strlen(ref); + + // add the new base + + int n_alt = std::strlen(alt); + + size_t bases_pattern_hole = bases_pattern; + for (int k = 0 ; k < n_ref ; k++) { + bases_pattern_hole &= ~((size_t)0x3 << ((pos_inv-k)*2)); + } + + if (n_ref - n_alt != 0) + { + size_t bases_pattern_2; + int ret = get_32bases_pattern(chrom,rec_pos+n_ref - n_alt + 1,chr_sequences,bases_pattern_2); + bases_pattern_2 = bases_pattern_2 >> (pos+n_alt)*2; + + // nullify the bases + for (int k = pos ; k < 32 ; k++) { + bases_pattern_hole &= ~((size_t)0x3 << ((31-k)*2)); + } + + // merge bases_pattern and bases_pattern_2 + bases_pattern_hole |= bases_pattern_2; + } + + size_t bases_pattern_alt; + for (int s = 0 ; s < n_alt ; s++) { + char base = alt[0]; + size_t base_bit = 0; + if (base == 'A') + {base_bit = 0x00;} + else if (base == 'C') + {base_bit = 0x01;} + else if (base == 'T') + {base_bit = 0x02;} + else if (base == 'G') + {base_bit = 0x03;} + + bases_pattern_alt = bases_pattern_hole | (base_bit << ((pos_inv-s)*2)); + } + + return bases_pattern_alt; +} + +struct variant { + int pos; + std::string ref; + std::string alt; + float freq; +}; + size_t add_32bases_pattern_with_variability(int chrom, int start, openfpm::vector<openfpm::vector<char>> & chr_sequences, tsl::hopscotch_map<size_t, openfpm::vector<CNV_event_link>> & map, - char * database, + const char * database, int call_id, int padding_left, int padding_right) { @@ -2741,14 +2815,6 @@ size_t add_32bases_pattern_with_variability(int chrom, int ret = get_32bases_pattern(chrom,start,chr_sequences,bases_pattern); if (ret != 0) return -1; - std::string str; - str += std::to_string(chrom); + ":" + std::to_string(start+1) + "-" + std::to_string(start+32+1); - - bcf_srs_t *sr = bcf_sr_init(); - - bcf_sr_set_regions(sr,str.c_str(),true); - bcf_sr_add_reader(sr,database); - openfpm::vector<CNV_event_link> & calls = map[bases_pattern]; struct CNV_event_link cel; @@ -2759,11 +2825,135 @@ size_t add_32bases_pattern_with_variability(int chrom, cel.pos_seed = start; calls.add(cel); + std::string str; + if (chrom == 23) { + str += std::to_string("X") + ":" + std::to_string(start+1) + "-" + std::to_string(start+32+1); + } else if (chrom == 24) { + str += std::to_string("MT") + ":" + std::to_string(start+1) + "-" + std::to_string(start+32+1); + } else { + str += std::to_string(chrom+1) + ":" + std::to_string(start+1) + "-" + std::to_string(start+32+1); + } + + bcf_srs_t *sr = bcf_sr_init(); + + bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX); + bcf_sr_set_regions(sr,str.c_str(),false); + bcf_sr_add_reader(sr,database); + + // get the header of the vcf file + bcf_hdr_t *hdr = bcf_sr_get_header(sr,0); + + openfpm::vector<variant> variants; + + while ( bcf_sr_next_line(sr) ) + { + for (int i=0; i<sr->nreaders; i++) + { + if ( !bcf_sr_has_line(sr,i) ) continue; + bcf1_t *rec = bcf_sr_get_line(sr, i); + if (rec->pos >= start && rec->pos <= start+32) { + + for (int j=1; j<rec->n_allele; j++) + { + variants.add(); + + variants.last().pos = rec->pos; + variants.last().ref = std::string(rec->d.allele[0]); + variants.last().alt = std::string(rec->d.allele[j]); + + int n_float = 1; + void ** ptr; + ptr[0] = &variants.last().freq; + + // Get the frequency from AF in info of rec + int err = bcf_get_info_float(hdr, rec, "AF",ptr,&n_float); + } + } + } + } + bcf_sr_destroy(sr); + // Go across the group of variants + + int n_bits = ((variants.size() < 16)?variants.size():16); + int n_var_groups = 2 << (n_bits-1); + + for (short int i = 1 ; i < n_var_groups ; i++) { + double freq = 1.0; + + int n_indel = 0; + int n_snv = 0; + + for (int j = 0 ; j < n_bits ; j++) { + if ((i >> j) & 0x1) { + if (variants.get(j).freq == 0.0) { + freq *= 1e-9; + } else { + freq *= variants.get(j).freq; + } + + if (variants.get(j).ref.size() == 1 && variants.get(j).alt.size() == 1) { + n_snv++; + } + else { + n_indel++; + } + } + } + + if (n_indel > 1) {continue;} + if (n_indel == 1 && n_snv != 0) {continue;} + + if (freq >= 1e-9) { + // construct the seed (indels are single) + + size_t bases_pattern_alt = bases_pattern; + + for (int j = 0 ; j < n_bits ; j++) { + if ((i >> j) & 0x1) { + bases_pattern_alt = create_seed_variation_from_variant(chrom, + variants.get(j).pos, + start, + bases_pattern_alt, + variants.get(j).ref.c_str(), + variants.get(j).alt.c_str(), + chr_sequences); + } + } + + + openfpm::vector<CNV_event_link> & calls = map[bases_pattern_alt]; + + struct CNV_event_link cel; + cel.call_id = call_id; + cel.padding_left = padding_left; + cel.padding_right = padding_right; + cel.path = 0x0; + cel.pos_seed = start; + calls.add(cel); + } + } + return bases_pattern; } +size_t add_32bases_pattern_with_variability_range_shift(int chrom, + int start, + int stop, + int shift, + openfpm::vector<openfpm::vector<char>> & chr_sequences, + tsl::hopscotch_map<size_t, openfpm::vector<CNV_event_link>> & map, + const char * database, + int call_id, + int padding_left, + int padding_right) { + + for (int i = start ; i < stop ; i+= shift) { + add_32bases_pattern_with_variability(chrom,i,chr_sequences,map,database,call_id,padding_left+shift,padding_right); + } +} + size_t add_32bases_pattern_with_gap(int chrom, int start, openfpm::vector<openfpm::vector<char>> & chr_sequences, @@ -2864,7 +3054,7 @@ double insert_probability(int ins, openfpm::vector<double> & bp_insert_prob) { if (abs(ins) < bp_insert_prob.size()) { return bp_insert_prob.get(abs(ins)); } - 1.7e-8; + return 1.7e-8; } double calculate_event_probability(char * seq1, @@ -3026,7 +3216,18 @@ Eigen::ArrayXd Evidence_matrix_log(openfpm::vector<openfpm::vector<char>> & refe // for (int i = -padding ; i <= padding ; i++) { // for (int j = -padding ; j <= padding ; j++) { int id = (i+padding)*(2*padding+1) + (j+padding); - get_sequence(reference,chr,pos,pos+seq.size()+(call.stop+j-call.start-i),call.start+i,call.stop+j,seq1); + + int pos_event; + // check if the start is inside the call + if (pos < call.stop && + pos > call.start ) { + pos_event = call.start -(call.stop - pos); + } + else { + pos_event = pos; + } + + get_sequence(reference,chr,pos_event,pos_event+seq.size()+(call.stop+j-call.start-i),call.start+i,call.stop+j,seq1); std::cout << "--------------- " << i << "," << j << " --------------------------------- event: " << id << std::endl; events(id) = calculate_event_probability(seq2,seq1,seq3,call.stop-call.start,chr,pos,3e-3,chr_ins,reference,bp_insert_prob); std::cout << "EVENT ID: " << id << std::endl; diff --git a/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz b/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..4ca21bb13b94e5b7ff3ef3537f90869dc47f56c7 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz.tbi b/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..680f5773d801cd56b6b0fb7342fd484c706d046c Binary files /dev/null and b/example/Sequencing/CNVcaller/test/DGV_variants_MUC20.vcf.gz.tbi differ diff --git a/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.bam b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.bam new file mode 100644 index 0000000000000000000000000000000000000000..2e0c63a089ce1f2f989ecdd5694f4a5931456fc6 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.bam differ diff --git a/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.sam b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.sam new file mode 100644 index 0000000000000000000000000000000000000000..2663ebc9d3aaa7a690f93c3e5bdd037f2b659d9e --- /dev/null +++ b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv.sam @@ -0,0 +1,104 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:1 LN:249250621 +@SQ SN:2 LN:243199373 +@SQ SN:3 LN:198022430 +@SQ SN:4 LN:191154276 +@SQ SN:5 LN:180915260 +@SQ SN:6 LN:171115067 +@SQ SN:7 LN:159138663 +@SQ SN:8 LN:146364022 +@SQ SN:9 LN:141213431 +@SQ SN:10 LN:135534747 +@SQ SN:11 LN:135006516 +@SQ SN:12 LN:133851895 +@SQ SN:13 LN:115169878 +@SQ SN:14 LN:107349540 +@SQ SN:15 LN:102531392 +@SQ SN:16 LN:90354753 +@SQ SN:17 LN:81195210 +@SQ SN:18 LN:78077248 +@SQ SN:19 LN:59128983 +@SQ SN:20 LN:63025520 +@SQ SN:21 LN:48129895 +@SQ SN:22 LN:51304566 +@SQ SN:X LN:155270560 +@SQ SN:Y LN:59373566 +@SQ SN:MT LN:16569 +@SQ SN:GL000207.1 LN:4262 +@SQ SN:GL000226.1 LN:15008 +@SQ SN:GL000229.1 LN:19913 +@SQ SN:GL000231.1 LN:27386 +@SQ SN:GL000210.1 LN:27682 +@SQ SN:GL000239.1 LN:33824 +@SQ SN:GL000235.1 LN:34474 +@SQ SN:GL000201.1 LN:36148 +@SQ SN:GL000247.1 LN:36422 +@SQ SN:GL000245.1 LN:36651 +@SQ SN:GL000197.1 LN:37175 +@SQ SN:GL000203.1 LN:37498 +@SQ SN:GL000246.1 LN:38154 +@SQ SN:GL000249.1 LN:38502 +@SQ SN:GL000196.1 LN:38914 +@SQ SN:GL000248.1 LN:39786 +@SQ SN:GL000244.1 LN:39929 +@SQ SN:GL000238.1 LN:39939 +@SQ SN:GL000202.1 LN:40103 +@SQ SN:GL000234.1 LN:40531 +@SQ SN:GL000232.1 LN:40652 +@SQ SN:GL000206.1 LN:41001 +@SQ SN:GL000240.1 LN:41933 +@SQ SN:GL000236.1 LN:41934 +@SQ SN:GL000241.1 LN:42152 +@SQ SN:GL000243.1 LN:43341 +@SQ SN:GL000242.1 LN:43523 +@SQ SN:GL000230.1 LN:43691 +@SQ SN:GL000237.1 LN:45867 +@SQ SN:GL000233.1 LN:45941 +@SQ SN:GL000204.1 LN:81310 +@SQ SN:GL000198.1 LN:90085 +@SQ SN:GL000208.1 LN:92689 +@SQ SN:GL000191.1 LN:106433 +@SQ SN:GL000227.1 LN:128374 +@SQ SN:GL000228.1 LN:129120 +@SQ SN:GL000214.1 LN:137718 +@SQ SN:GL000221.1 LN:155397 +@SQ SN:GL000209.1 LN:159169 +@SQ SN:GL000218.1 LN:161147 +@SQ SN:GL000220.1 LN:161802 +@SQ SN:GL000213.1 LN:164239 +@SQ SN:GL000211.1 LN:166566 +@SQ SN:GL000199.1 LN:169874 +@SQ SN:GL000217.1 LN:172149 +@SQ SN:GL000216.1 LN:172294 +@SQ SN:GL000215.1 LN:172545 +@SQ SN:GL000205.1 LN:174588 +@SQ SN:GL000219.1 LN:179198 +@SQ SN:GL000224.1 LN:179693 +@SQ SN:GL000223.1 LN:180455 +@SQ SN:GL000195.1 LN:182896 +@SQ SN:GL000212.1 LN:186858 +@SQ SN:GL000222.1 LN:186861 +@SQ SN:GL000200.1 LN:187035 +@SQ SN:GL000193.1 LN:189789 +@SQ SN:GL000194.1 LN:191469 +@SQ SN:GL000225.1 LN:211173 +@SQ SN:GL000192.1 LN:547496 +@SQ SN:NC_007605 LN:171823 +@SQ SN:hs37d5 LN:35477943 +@RG ID:DE78NGSUKBD134462_76322.1 LB:DE78NGSUKBD134462_76322 PL:Illumina SM:DE78NGSUKBD134462_76322 PU:DE78NGSUKBD134462_76322.1 +@PG ID:pbrun fq2bam PN:pbrun fq2bam VN:4.1.0-1 CL:pbrun fq2bam --ref library/human_g1k_v37_decoy.fasta --in-fq /ceph01/projects/Exomes/230224_A00684_0261_BHNJK2DRX2/input/P2023016024EXO/DE78NGSUKBD134462_76322_1.fq.gz /ceph01/projects/Exomes/230224_A00684_0261_BHNJK2DRX2/input/P2023016024EXO/DE78NGSUKBD134462_76322_2.fq.gz --out-bam output/DE78NGSUKBD134462_76322/DE78NGSUKBD134462_76322.bam --out-recal-file output/DE78NGSUKBD134462_76322/DE78NGSUKBD134462_76322.recal_out --out-duplicate-metrics output/DE78NGSUKBD134462_76322/DE78NGSUKBD134462_76322.duplicate_metrics.txt --tmp-dir /ssd/incardona-5287303/tmp.5h60UiwZpr --read-group-sm DE78NGSUKBD134462_76322 --read-group-lb DE78NGSUKBD134462_76322 --read-group-pl Illumina --read-group-id-prefix DE78NGSUKBD134462_76322 --optical-duplicate-pixel-distance 2500 --knownSites library/Mills_and_1000G_gold_standard.indels.b37.vcf --knownSites library/1000G_phase1.indels.b37.vcf --knownSites library/dbsnp_138.b37.vcf +@PG ID:samtools PN:samtools PP:pbrun fq2bam VN:1.15.1 CL:samtools view -H /nvme/bams/batch_1/DE78NGSUKBD134462_76322.bam 1 +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.15.1 CL:samtools view -b test/HG002_one_read_cnv_call.sam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.15.1 CL:samtools view -h HG002_one_read_cnv.bam +A00684:261:HNJK2DRX2:1:2278:6008:32690 163 3 112743033 60 101M = 112743048 116 ATTGGTGACAGAGAGATGCAGGATCAAGAAGAGTATTGTTTTGTTTTGTTCTGATTTTAAGGATAGTTTACTAGTCAGGATTCCACCAGCCTGTAGGGGTG FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,F:FFFFFFFF:FFF:FFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:101 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:101 XS:i:23 +A00684:261:HNJK2DRX2:1:2278:6008:32690 83 3 112743048 60 101M = 112743033 -116 ATGCAGGATCAAGAAGAGTATTGTTTTGTTTTGTTCTGATTTTAAGGATAGTTTACTAGTCAGGATTCCACCAGCCTGTAGGGGTGGGTTGCCCCTACACA FFFF:FFFFFFFFFFFFFFF:FFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF MD:Z:101 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:101 XS:i:27 +A00684:261:HNJK2DRX2:1:2134:13467:28792 99 3 112743069 60 99M = 112743097 128 TGTTTTGTTTTGTTCTGATTTTAAGGATAGTTTACTAGTCAGGATTCCACCAGCCTGTAGGGGTGGGTTGCCCCTACACACCTGTGGGTGTTTCTCGTA FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:99 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:99 XS:i:21 +A00684:261:HNJK2DRX2:1:2134:13467:28792 147 3 112743097 60 100M = 112743069 -128 AGTTTACTAGTCAGGATTCCACCAGCCTGTAGGGGTGGGTTGCCCCTACACACCTGTGGGTGTTTCTCGTAAGGTGGGACGAGAGATTTGGAAAAGAAAA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:100 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:100 XS:i:75 +A00684:261:HNJK2DRX2:2:2161:18557:21668 163 3 112751046 0 100M = 112751319 374 AATTGTTCTATTATTTGAAATAGCTTGATTAGATTTTTTGTAGATACTTTAACTCCCCCTCTTTTTAAAAGAATTTTAATAAAGCTGAGATAAGAGGCAT F,FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:F:FF:FFFFFFFFFF,FFFFFFF: MD:Z:100 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:100 XS:i:100 +A00684:261:HNJK2DRX2:2:2161:18557:21668 83 3 112751319 0 101M = 112751046 -374 CAGATGTAGGGGTGGGTTGCCCCTACACACCTGTGGGTGTTTCTCGTAAGGTGGGACGAGAGATTTGGAAAAGAAAAAGACACAGAGACAAAGTATAGAGA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:101 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:101 XS:i:101 +A00684:261:HNJK2DRX2:2:2238:17264:30044 163 3 195345844 10 100M = 195345905 162 ACACCCTTTGCACCGATGACATCTCTGAAGAGGCAAAGACACTCACAATGGACATATTGACATTGGCTCACACCTCCACAGAAGCTAAGGGCCTGTCCTC FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF XA:Z:3,+195451896,100M,1; MD:Z:100 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:100 XS:i:95 +A00684:261:HNJK2DRX2:2:2238:17264:30044 83 3 195345905 0 101M = 195345844 -162 ATTGGCTCACACCTCCACAGAAGCTAAGGGCCTGTCCTCAGAGAGCAGCGCCTCTTCCGACAGCCCCCATCCAGTCATCACCCCGTCACGGGCCTCAGAGA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF XA:Z:3,-195451957,101M,0; MD:Z:61G39 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:1 AS:i:96 XS:i:101 +A00684:261:HNJK2DRX2:2:2247:20419:13432 163 3 195345860 0 101M = 195345905 146 TGACAGCTCTGAAGAGGCAAAGACACTCACAATGGACATATTGACATTGGCTCACACCTCCACAGAAGCTAAGGGCCTGTCCTCAGAGAGCAGTGCCTCTT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F XA:Z:3,+195451912,101M,1; MD:Z:5T87C7 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:2 AS:i:91 XS:i:96 +A00684:261:HNJK2DRX2:2:2247:20419:13432 83 3 195345905 9 101M = 195345860 -146 ATTGGCTCACACCTCCACAGAAGCTAAGGGCCTGTCCTCAGAGAGCAGTGCCTCTTCCGACGGCCCCCATCCAGTCATCACCCCGTCACGGGCCTCAGAGA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF XA:Z:3,-195451957,101M,2; MD:Z:48C52 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:1 AS:i:96 XS:i:91 +A00684:261:HNJK2DRX2:1:2105:3278:30436 99 4 64327 60 101M = 64409 183 ATAAAAATAAGGAACTTAATCAAAATAAATGTAAAGTGACCGAGCATAGTGGCTCACACCTGTAATGCCAGCACATTGGGATGCCAAGGTGGGGAGATCAC FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:51A49 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:1 AS:i:96 XS:i:34 +A00684:261:HNJK2DRX2:1:2105:3278:30436 147 4 64409 60 101M = 64327 -183 GCCAAGGTGGGGAGATCACTTGAGGCCAGGAGTTCCAGACTAGCCTGGCCATTGTGTCACAAGGGTGCTGCTAGGTCTGTTGTTTAAATGGCATATAGCAG FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MD:Z:101 PG:Z:MarkDuplicates RG:Z:DE78NGSUKBD134462_76322.1 NM:i:0 AS:i:101 XS:i:42 diff --git a/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam new file mode 100644 index 0000000000000000000000000000000000000000..3a959167524c0df96417bedf1281c4a4d9fa507b Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam differ diff --git a/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam.bai b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..d39b290ecb74a3a43b8d1fa80ffc79d98775ac55 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_3calls_reads_cnv_sort.bam.bai differ diff --git a/example/Sequencing/CNVcaller/test/HG002_one_read.bam b/example/Sequencing/CNVcaller/test/HG002_one_read.bam new file mode 100644 index 0000000000000000000000000000000000000000..5ff95d365866693704085856b1e67f8fe1c69c8b Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_one_read.bam differ diff --git a/example/Sequencing/CNVcaller/test/HG002_one_read.bam.bai b/example/Sequencing/CNVcaller/test/HG002_one_read.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..be9c05bf9172fd49012cc13a315dd01effb292e6 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_one_read.bam.bai differ diff --git a/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam b/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam new file mode 100644 index 0000000000000000000000000000000000000000..c55d3ed2e8090e9a33caedc4c5b56eec46779719 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam differ diff --git a/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam.bai b/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..bf18e5b5222023caa1afec138f3bd725db4f3bb5 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_one_read_cnv.bam.bai differ diff --git a/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam b/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam new file mode 100644 index 0000000000000000000000000000000000000000..5cbc37fa737a52096ed0a79b3bcac19eda781b2c Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam differ diff --git a/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam.bai b/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..575d7623b527d5c00e5f696dcb112c59d350a7bc Binary files /dev/null and b/example/Sequencing/CNVcaller/test/HG002_reads_cnv_MUC20.bam.bai differ diff --git a/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz b/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..f58447d80a20bf71534bc8182ce3aa755482641f Binary files /dev/null and b/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz.tbi b/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..2b1cc7a84f32351565d007bab393131ce01f4e2e Binary files /dev/null and b/example/Sequencing/CNVcaller/test/gnomad_variants.vcf.gz.tbi differ diff --git a/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz b/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a3db0d784c4e51d435805418511929e2be4d25a Binary files /dev/null and b/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz.tbi b/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..7480579e3e6997373eb5c275186f99219a5c7d9b Binary files /dev/null and b/example/Sequencing/CNVcaller/test/gnomad_variants_variability_test.vcf.gz.tbi differ diff --git a/example/Sequencing/CNVcaller/test/test_multiple_CNV.vcf.gz b/example/Sequencing/CNVcaller/test/test_multiple_CNV.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..21056baa94b935e735157397eb7fa8e1f32ce3c5 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/test_multiple_CNV.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/test_multiple_CNV_8shift.vcf.gz b/example/Sequencing/CNVcaller/test/test_multiple_CNV_8shift.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..0246e5702f0f4f19217e40474a65439c58df34bd Binary files /dev/null and b/example/Sequencing/CNVcaller/test/test_multiple_CNV_8shift.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz b/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..74c7c14d2168c827cf96c78747a9b7a1f50beaa8 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz.tbi b/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..a7f82c25036c56641ff8da96b76ca1535c79d780 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/test_multiple_CNV_MUC20.vcf.gz.tbi differ diff --git a/example/Sequencing/CNVcaller/test/test_one_CNV.vcf.gz b/example/Sequencing/CNVcaller/test/test_one_CNV.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..44d847ea18c4451fd742f2d8f5b4c543e953bf5d Binary files /dev/null and b/example/Sequencing/CNVcaller/test/test_one_CNV.vcf.gz differ diff --git a/example/Sequencing/CNVcaller/test/unseed_3calls_cnv b/example/Sequencing/CNVcaller/test/unseed_3calls_cnv new file mode 100644 index 0000000000000000000000000000000000000000..d7abfa2be904387357ebe43ea4b78f41c53d2325 Binary files /dev/null and b/example/Sequencing/CNVcaller/test/unseed_3calls_cnv differ diff --git a/example/Sequencing/CNVcaller/test/unseed_3calls_cnv_variability b/example/Sequencing/CNVcaller/test/unseed_3calls_cnv_variability new file mode 100644 index 0000000000000000000000000000000000000000..a6faa05f0815fa55f6122410d6b4f8d75719672f Binary files /dev/null and b/example/Sequencing/CNVcaller/test/unseed_3calls_cnv_variability differ diff --git a/example/Sequencing/CNVcaller/testing.cpp b/example/Sequencing/CNVcaller/testing.cpp index 1a1c67dbc9aa7b32ce8e82fb6b058774ef9495fb..4823a4bd60ecc16e45b4288560df8ba84d80914b 100644 --- a/example/Sequencing/CNVcaller/testing.cpp +++ b/example/Sequencing/CNVcaller/testing.cpp @@ -57,31 +57,100 @@ BOOST_AUTO_TEST_CASE( CNV_caller_test_add_32bases_pattern_with_variability ) reference.load("reference_vector"); // chromosome 0 - add_32bases_pattern_with_variability(0,195452030,reference,map,"test/gnomad_variants.vcf.gz",0,-1,NO_PADDING); + add_32bases_pattern_with_variability(2,195452030,reference,map,"test/gnomad_variants.vcf.gz",0,-1,NO_PADDING); - BOOST_REQUIRE_EQUAL(map.size(),1); - auto i = map.begin(); - BOOST_REQUIRE_EQUAL(i->first >> 62,0); - BOOST_REQUIRE_EQUAL(i->first & 0x3,3); - - BOOST_REQUIRE_EQUAL(i->second.size(),1); - - BOOST_REQUIRE_EQUAL(i->second.get(0).padding_left, -1); - BOOST_REQUIRE_EQUAL(i->second.get(0).padding_right, NO_PADDING); + BOOST_REQUIRE_EQUAL(map.size(),8); + + // check that these seed are presents + std::string s1("TCATCACCCCGTCACGGGCCTCAGAGAGCAGC"); // reference + std::string s2("TCATCACCCCCTCACGGGCCTCAGAGAGCAGC"); // variants + std::string s3("TCATCACCCCGTCACGGGTCTCAGAGAGCAGC"); + std::string s4("TCATCACCCCGTCACGGGCCTCAGAGCAGCGC"); // <--- indel + std::string s5("TCATCACCCCGTCACGGGCCTCAGAGAGCAGT"); + + std::string s6("TCATCACCCCCTCACGGGTCTCAGAGAGCAGC"); // 1-2 + std::string s7("TCATCACCCCCTCACGGGCCTCAGAGAGCAGT"); // 1-4 + std::string s8("TCATCACCCCGTCACGGGTCTCAGAGAGCAGT"); // 2-4 + + size_t s1_s = CNVcaller::convert_to_bit_pattern(s1.c_str()); + size_t s2_s = CNVcaller::convert_to_bit_pattern(s2.c_str()); + size_t s3_s = CNVcaller::convert_to_bit_pattern(s3.c_str()); + size_t s4_s = CNVcaller::convert_to_bit_pattern(s4.c_str()); + size_t s5_s = CNVcaller::convert_to_bit_pattern(s5.c_str()); + size_t s6_s = CNVcaller::convert_to_bit_pattern(s6.c_str()); + size_t s7_s = CNVcaller::convert_to_bit_pattern(s7.c_str()); + size_t s8_s = CNVcaller::convert_to_bit_pattern(s8.c_str()); + + + BOOST_REQUIRE(map.find(s1_s) != map.end()); + BOOST_REQUIRE(map.find(s2_s) != map.end()); + BOOST_REQUIRE(map.find(s3_s) != map.end()); + BOOST_REQUIRE(map.find(s4_s) != map.end()); + BOOST_REQUIRE(map.find(s5_s) != map.end()); + BOOST_REQUIRE(map.find(s6_s) != map.end()); + BOOST_REQUIRE(map.find(s7_s) != map.end()); + BOOST_REQUIRE(map.find(s8_s) != map.end()); + + // print all seeds in map + +/* auto i = map.begin(); + + while (i != map.end()) { + std::cout << CNVcaller::string_from_32bases_pattern(i->first) << std::endl; + i++; + }*/ +} - map.clear(); +BOOST_AUTO_TEST_CASE( CNV_caller_test_add_32bases_pattern_with_variability_range_shift ) +{ + openfpm::vector<openfpm::vector<char>> reference; + tsl::hopscotch_map<size_t, openfpm::vector<CNVcaller::CNV_event_link>> map; - add_32bases_pattern_with_variability(0,195452030,reference,map,"test/gnomad_variants.vcf.gz",1,-2,NO_PADDING); + reference.load("reference_vector"); - BOOST_REQUIRE_EQUAL(map.size(),1); - i = map.begin(); - BOOST_REQUIRE_EQUAL(i->first >> 62,3); - BOOST_REQUIRE_EQUAL(i->first & 0x03,2); + // chromosome 0 + add_32bases_pattern_with_variability_range_shift(2,195452030,19545262,8,reference,map,"test/gnomad_variants.vcf.gz",0,-1,NO_PADDING); + BOOST_REQUIRE_EQUAL(map.size(),8); - - BOOST_REQUIRE_EQUAL(i->second.get(0).padding_left, -2); - BOOST_REQUIRE_EQUAL(i->second.get(0).padding_right, NO_PADDING); + // check that these seed are presents + std::string s1("TCATCACCCCGTCACGGGCCTCAGAGAGCAGC"); // reference + std::string s2("TCATCACCCCCTCACGGGCCTCAGAGAGCAGC"); // variants + std::string s3("TCATCACCCCGTCACGGGTCTCAGAGAGCAGC"); + std::string s4("TCATCACCCCGTCACGGGCCTCAGAGCAGCGC"); // <--- indel + std::string s5("TCATCACCCCGTCACGGGCCTCAGAGAGCAGT"); + + std::string s6("TCATCACCCCCTCACGGGTCTCAGAGAGCAGC"); // 1-2 + std::string s7("TCATCACCCCCTCACGGGCCTCAGAGAGCAGT"); // 1-4 + std::string s8("TCATCACCCCGTCACGGGTCTCAGAGAGCAGT"); // 2-4 + + size_t s1_s = CNVcaller::convert_to_bit_pattern(s1.c_str()); + size_t s2_s = CNVcaller::convert_to_bit_pattern(s2.c_str()); + size_t s3_s = CNVcaller::convert_to_bit_pattern(s3.c_str()); + size_t s4_s = CNVcaller::convert_to_bit_pattern(s4.c_str()); + size_t s5_s = CNVcaller::convert_to_bit_pattern(s5.c_str()); + size_t s6_s = CNVcaller::convert_to_bit_pattern(s6.c_str()); + size_t s7_s = CNVcaller::convert_to_bit_pattern(s7.c_str()); + size_t s8_s = CNVcaller::convert_to_bit_pattern(s8.c_str()); + + + BOOST_REQUIRE(map.find(s1_s) != map.end()); + BOOST_REQUIRE(map.find(s2_s) != map.end()); + BOOST_REQUIRE(map.find(s3_s) != map.end()); + BOOST_REQUIRE(map.find(s4_s) != map.end()); + BOOST_REQUIRE(map.find(s5_s) != map.end()); + BOOST_REQUIRE(map.find(s6_s) != map.end()); + BOOST_REQUIRE(map.find(s7_s) != map.end()); + BOOST_REQUIRE(map.find(s8_s) != map.end()); + + // print all seeds in map + +/* auto i = map.begin(); + + while (i != map.end()) { + std::cout << CNVcaller::string_from_32bases_pattern(i->first) << std::endl; + i++; + }*/ } BOOST_AUTO_TEST_CASE( CNV_caller_test_32_bases_to_string ) @@ -674,39 +743,203 @@ BOOST_AUTO_TEST_CASE( CNV_caller_evidence_probability_for_each_event ) // Get the seed number associated to the call int seed_num = 0; - for (int i = 0 ; i < seqs.get<0>(0).get<0>(0).size() ; i++) { + // For rach read + for (int j = 0 ; j < seqs.get<0>(0).size() ; j++) { + for (int i = 0 ; i < seqs.get<0>(0).get<0>(j).size() ; i++) { + + seed_pos = seqs.get<0>(0).get<0>(j).get<4>(i); + + Eigen::ArrayXd eml3 = Evidence_matrix_log(reference, + seqs.get<0>(0).get<0>(j).get<0>(i), + un_seed.get(j), + calls_to_test.get(j), + -6, + seqs.get<0>(0).get<0>(j).get<1>(i), + seqs.get<0>(0).get<0>(j).get<2>(i), + seed_pos, + chr_ins, + bp_insert_prob); + + std::cout << "Probability: " << eml3 << std::endl; + + BOOST_REQUIRE(eml3(59) > -20.0 ); + BOOST_REQUIRE(eml3(126) > -20.0); + } + } + + +} + + +BOOST_AUTO_TEST_CASE( CNV_caller_evidence_probability_for_each_event_with_variability ) +{ + openfpm::vector<openfpm::vector<char>> reference; + reference.load("reference_vector"); + + openfpm::vector<CNVcaller::call_vcf> calls_to_test; + CNVcaller::read_CNV_hypothesys("test/test_multiple_CNV_MUC20.vcf.gz",calls_to_test); + tsl::hopscotch_map<size_t, openfpm::vector<CNVcaller::CNV_event_link>> map; + + All_samples_call_related_relevant_read_type seqs; + + // remeber is 0-based index, call id is 0, padding between call-start and seed is -1 + int offset = 0; + + // call id 1 + add_32bases_pattern_with_variability(2,195452561,reference,map,"test/gnomad_variants_variability_test.vcf.gz",1,195452561-195451991+32,NO_PADDING); + + auto i = map.begin(); + + while (i != map.end()) { + std::cout << CNVcaller::string_from_32bases_pattern(i->first) << std::endl; + i++; + } + + seqs.resize(3); + + std::string test_bam("test/HG002_3calls_reads_cnv_sort.bam"); + collect_reads_for_call_evidence(test_bam,map,calls_to_test,seqs.get<0>(0),reference,6,3); + + // We check that all read are coupled, for all elements in the map + + for (int i = 0 ; i < seqs.size() ; i++) { + for (int j = 0 ; j < seqs.get<0>(i).size() ; j++) { + for (int k = 0 ; k < seqs.get<0>(i).get<0>(j).size() ; k++) { + auto & pair = seqs.get<0>(i).get<0>(j).get<0>(k); + BOOST_REQUIRE((pair.get<1>(3) == 'A' || pair.get<1>(3) == 'C' || pair.get<1>(3) == 'T' || pair.get<1>(3) == 'G')); + } + } + } + + openfpm::vector<CNVcaller::seed_id> seed; + convert_seed_map_to_array(seed,map,calls_to_test); + + // Find all reference points + openfpm::vector<openfpm::vector<aggregate<short int, int, int>>> un_seed; +// un_seed.resize(calls_to_test.size()); +// get_reference_point(seed,reference,un_seed); + un_seed.load("test/unseed_3calls_cnv_variability"); + + // Calculate evidence for each of the read pair + + openfpm::vector<int> chr_ins; + chr_ins.resize(23); + + for (int i = 0 ; i < chr_ins.size(); i++) { + chr_ins.get(i) = 210; + } + + // Load insert size probability + + openfpm::vector<double> bp_insert_prob; + bp_insert_prob.load("insert_probability_distribution"); + + // Get the seed number associated to the call + int seed_num = 0; + + // For rach read + for (int j = 0 ; j < seqs.get<0>(0).size() ; j++) { + for (int i = 0 ; i < seqs.get<0>(0).get<0>(j).size() ; i++) { + + size_t seed_pos = seqs.get<0>(0).get<0>(j).get<4>(i); + + Eigen::ArrayXd eml3 = Evidence_matrix_log(reference, + seqs.get<0>(0).get<0>(j).get<0>(i), + un_seed.get(j), + calls_to_test.get(j), + -6, + seqs.get<0>(0).get<0>(j).get<1>(i), + seqs.get<0>(0).get<0>(j).get<2>(i), + seed_pos, + chr_ins, + bp_insert_prob); + + std::cout << "Probability: " << eml3 << std::endl; + + BOOST_REQUIRE(eml3(59) > -20.0 ); + BOOST_REQUIRE(eml3(121) < -20.0); + } + } + + +} + +// Many read events +BOOST_AUTO_TEST_CASE( CNV_caller_evidence_probability_for_each_event_with_variability_many_reads ) +{ + openfpm::vector<openfpm::vector<char>> reference; + reference.load("reference_vector"); + + openfpm::vector<CNVcaller::call_vcf> calls_to_test; + CNVcaller::read_CNV_hypothesys("test/DGV_variants_MUC20.vcf.gz",calls_to_test); + tsl::hopscotch_map<size_t, openfpm::vector<CNVcaller::CNV_event_link>> map; + + All_samples_call_related_relevant_read_type seqs; + + // remeber is 0-based index, call id is 0, padding between call-start and seed is -1 + int offset = 0; + + // call id 1 + add_32bases_pattern_with_variability(2,195452561,reference,map,"test/gnomad_variants_variability_test.vcf.gz",1,195452561-195451991+32,NO_PADDING); + + auto i = map.begin(); + + while (i != map.end()) { + std::cout << CNVcaller::string_from_32bases_pattern(i->first) << std::endl; + i++; + } + + seqs.resize(3); + + std::string test_bam("test/HG002_reads_cnv_MUC20.bam"); + collect_reads_for_call_evidence(test_bam,map,calls_to_test,seqs.get<0>(0),reference,6,3); + + // We check that all read are coupled, for all elements in the map + + for (int i = 0 ; i < seqs.size() ; i++) { + for (int j = 0 ; j < seqs.get<0>(i).size() ; j++) { + for (int k = 0 ; k < seqs.get<0>(i).get<0>(j).size() ; k++) { + auto & pair = seqs.get<0>(i).get<0>(j).get<0>(k); + BOOST_REQUIRE((pair.get<1>(3) == 'A' || pair.get<1>(3) == 'C' || pair.get<1>(3) == 'T' || pair.get<1>(3) == 'G')); + } + } + } + + openfpm::vector<CNVcaller::seed_id> seed; + convert_seed_map_to_array(seed,map,calls_to_test); + + // Find all reference points + openfpm::vector<openfpm::vector<aggregate<short int, int, int>>> un_seed; +// un_seed.resize(calls_to_test.size()); +// get_reference_point(seed,reference,un_seed); + un_seed.load("test/unseed_3calls_cnv_variability"); - seed_pos = seqs.get<0>(0).get<0>(0).get<4>(i); + // Calculate evidence for each of the read pair - Eigen::ArrayXd eml3 = Evidence_matrix_log(reference, - seqs.get<0>(0).get<0>(0).get<0>(i), - un_seed.get(0), - calls_to_test.get(0), - -6, - seqs.get<0>(0).get<0>(0).get<1>(i), - seqs.get<0>(0).get<0>(0).get<2>(i), - seed_pos, - chr_ins, - bp_insert_prob); + openfpm::vector<int> chr_ins; + chr_ins.resize(23); - BOOST_REQUIRE(eml3(59) > -20.0 ); - BOOST_REQUIRE(eml3(126) > -20.0); + for (int i = 0 ; i < chr_ins.size(); i++) { + chr_ins.get(i) = 210; } - char seq[16384]; - CNVcaller::get_sequence(reference,2,195451991,195451991+100,-1,-1,seq); + // Load insert size probability + + openfpm::vector<double> bp_insert_prob; + bp_insert_prob.load("insert_probability_distribution"); - std::cout << seq << std::endl; + // Get the seed number associated to the call + int seed_num = 0; // For rach read for (int j = 0 ; j < seqs.get<0>(0).size() ; j++) { - for (int i = 0 ; i < seqs.get<0>(0).get<0>(0).size() ; i++) { + for (int i = 0 ; i < seqs.get<0>(0).get<0>(j).size() ; i++) { - seed_pos = seqs.get<0>(0).get<0>(j).get<4>(i); + size_t seed_pos = seqs.get<0>(0).get<0>(j).get<4>(i); Eigen::ArrayXd eml3 = Evidence_matrix_log(reference, seqs.get<0>(0).get<0>(j).get<0>(i), - un_seed.get(0), + un_seed.get(j), calls_to_test.get(j), -6, seqs.get<0>(0).get<0>(j).get<1>(i), @@ -718,13 +951,14 @@ BOOST_AUTO_TEST_CASE( CNV_caller_evidence_probability_for_each_event ) std::cout << "Probability: " << eml3 << std::endl; BOOST_REQUIRE(eml3(59) > -20.0 ); - BOOST_REQUIRE(eml3(126) > -20.0); + BOOST_REQUIRE(eml3(121) < -20.0); } } } + BOOST_AUTO_TEST_SUITE_END() // initialization function: