diff --git a/chipseq_workflow/cs_region_dba.R b/chipseq_workflow/cs_region_dba.R index 7f1a02d6ff4ddf4531085432b8e00db50d4acbbb..4c4f25b4d03d829cdb97bc5835903d7403310674 100755 --- a/chipseq_workflow/cs_region_dba.R +++ b/chipseq_workflow/cs_region_dba.R @@ -3,7 +3,9 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R") #devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R") -devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R") +#devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/bio/diffex_commons.R") + require(tidyr) require(knitr) diff --git a/chipseq_workflow/cs_snippets.sh b/chipseq_workflow/cs_snippets.sh index f781a93f1d3886388f1365c8deedf00c57787fb4..b6f55d8e14a0d5937e42847ed32eb40cfb54c966 100755 --- a/chipseq_workflow/cs_snippets.sh +++ b/chipseq_workflow/cs_snippets.sh @@ -1,10 +1,20 @@ ######################################################################################################################## -### Picard ResortSam +### Chromosome Sorting + +## By default the sorting of samtools faidx is not good, but we can resort it, and samtools sort will respect this sorting order +## To fix existing alginments either use ReorderSam or reheader | resort with samtools + +## check consist sorting order +#samtools view /projects/bioinfo/holger/projects/krause_chipseq/alignments_mmfilt/H3HA_Sphere_mmf.bam | cut -f 3 | uniq +#sort -k1,1g -k2,2n $regionModel.bed | cut -f1 | uniq +### --> use reorder sam http://sourceforge.net/p/samtools/mailman/samtools-help/thread/4DB6F0CD.6050403@umdnj.edu/ sort -k1V $bowtieIndex.fa.dict_tmp > $bowtieIndex.dict samtools view -h $bamFile | removeMultiMappers > ${bamBaseName}_mmf.tmp.bam samtools index ${bamBaseName}_mmf.tmp.bam # because ReorderSam runs substantially faster if the input is an indexed BAM file. + +# Use Picard ResortSam for resorting --> this will sort according to chromosome order in reference fasta only picard ReorderSam I=${bamBaseName}_mmf.tmp.bam O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa ## samtools view -h $bamFile | removeMultiMappers | picard ReorderSam I=/dev/stdin O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa samtools index ${bamBaseName}_mmf.bam diff --git a/chipseq_workflow/peak_report.R b/chipseq_workflow/peak_report.R index cb1c5288a452e86326740b13b7cbeeeb3db31c66..005a2775daf3dab3c851f8f25ffdd535803ec899 100755 --- a/chipseq_workflow/peak_report.R +++ b/chipseq_workflow/peak_report.R @@ -35,8 +35,13 @@ b_peaks <- list.files(".", "*.broadPeak.*", full.names=TRUE, recursive=T) %>% peaks <- rbind_list(n_peaks, b_peaks) +## todo remove this later +peaks <- mutate(peaks, file=file %>%str_replace( "_narrow", ".narrow") %>% str_replace("_broad", ".broad")) + +with(peaks, as.data.frame(table(file))) peaks %<>% separate(file, c("sample", "peak_type", "ext"), sep="[.]") %>% select(-ext) + splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F) #peaks %>% splitProtTime %>% head diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 7ef500a9ea427a9022a7434f1e55f181b3a8a5af..0407aac75abefcb6e4f952b74777dbc23328f738 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -19,7 +19,8 @@ export PATH=/home/brandl/bin/spinr:$PATH # which tophat; which bowtie2; which cuffdiff -export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work +## no longer needed because packages are no kept in home +#export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work ## create fastq report for all fastq and fastq.gz files in the current directory