Commit 843116cf authored by Holger Brandl's avatar Holger Brandl

impl bonobo genome build

parent dc070d46
......@@ -142,6 +142,90 @@ jl wait --email
wait # for star index creation
########################################################################################################################
## bonobo
#export NGS_TOOLS=/net/mack/lustre/projects/bioinfo/$(whoami)/scripts/ngs_tools
export NGS_TOOLS="/projects/bioinfo/$(whoami)/scripts/ngs_tools"
umask u=rwx,g=rwx,o=
ls "${NGS_TOOLS}" >/dev/null || { echo "not all project resources are well defined" 1>&2; exit 1; }
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
# todo refactor this to become more independent of the actual species
IGENOME_BASE=/projects/bioinfo/igenomes/Pan_paniscus/Ensembl_v91_custom/panpan1.1
mkdir -p ${IGENOME_BASE}
mkdir -p ${IGENOME_BASE}/Sequence/WholeGenomeFasta
cd ${IGENOME_BASE}/Sequence/WholeGenomeFasta
wget ftp://ftp.ensembl.org/pub/release-91/fasta/pan_paniscus/dna/Pan_paniscus.panpan1.1.dna_sm.toplevel.fa.gz
#gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz cut -f1 | head
## remove scaffolds
gunzip Pan_paniscus.panpan1.1.dna_sm.toplevel.fa.gz
grep -F ">" Pan_paniscus.panpan1.1.dna_sm.toplevel.fa
## just use primary assembly (see https://www.ncbi.nlm.nih.gov/assembly/GCF_000258655.2/) and discard scaffolds
kscript - Pan_paniscus.panpan1.1.dna_sm.toplevel.fa <<"EOF" | cut -f1 -d' ' > genome.fa
//DEPS de.mpicbg.scicomp:kutils:0.7
//KOTLIN_OPTS -J-Xmx20g
import de.mpicbg.scicomp.bioinfo.openFasta
import java.io.File
import kotlin.system.exitProcess
openFasta(File(args[0])).
filterNot { it.description!!.contains("dna_sm:scaffold") }.
forEach { print(it.toEntryString()) }
EOF
## check if scaffold-filtering filtering worked
grep -F ">" genome.fa
samtools faidx genome.fa &
mcdir ${IGENOME_BASE}/Annotation/Genes
wget ftp://ftp.ensembl.org/pub/release-91/gtf/pan_paniscus/Pan_paniscus.panpan1.1.91.gtf.gz
gunzip -c Pan_paniscus.panpan1.1.91.gtf.gz > genes.gtf
#head genes.gtf
ll ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa
dge_create_star_index ${IGENOME_BASE} &
mcdir ${IGENOME_BASE}/Sequence
mkdir ${IGENOME_BASE}/Sequence/BowtieIndex
jl submit -w 10:00 -m 50g "~/bin/bowtie-1.1.2/bowtie-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome.fa
mkdir ${IGENOME_BASE}/Sequence/Bowtie2Index
jl submit -w 10:00 -m 50g "bowtie2-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome.fa
mcdir ${IGENOME_BASE}/Sequence/KallistoIndex
wget ftp://ftp.ensembl.org/pub/release-91/fasta/pan_paniscus/cdna/Pan_paniscus.panpan1.1.cdna.all.fa.gz
refFasta=cdna.fasta
gunzip -c *.cdna.all.fa.gz > ${refFasta}
jl submit --wait "kallisto index -i ${refFasta}.kallisto.idx ${refFasta}" &
jl wait --email
wait # for samtools and star-index creation
########################################################################################################################
## sync igenomes between bioninfo and cluster
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment