Commit 4c7b1efa authored by Holger Brandl's avatar Holger Brandl

new mouse ens92 ensembl build

parent c10a6f48
#export NGS_TOOLS=/net/mack/lustre/projects/bioinfo/$(whoami)/scripts/ngs_tools
export NGS_TOOLS="/projects/bioinfo/$(whoami)/scripts/ngs_tools"
umask u=rwx,g=rwx,o=
ls "${NGS_TOOLS}" >/dev/null || { echo "not all project resources are well defined" 1>&2; exit 1; }
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
# todo refactor this to become more independent of the actual species
IGENOME_BASE=/projects/bioinfo/igenomes/Homo_sapiens/Ensembl_v88_custom/GRCh38
mcdir ${IGENOME_BASE}
mcdir ${IGENOME_BASE}/Sequence/WholeGenomeFasta
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
#gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz cut -f1 | head
## remove scaffolds
gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz > Homo_sapiens.GRCh38.dna.primary_assembly.fa
grep -F ">" genome_with_scaffolds.fa
kscript - Homo_sapiens.GRCh38.dna.primary_assembly.fa <<"EOF" | cut -f1 -d' ' > genome.fa
//DEPS de.mpicbg.scicomp:kutils:0.7
//KOTLIN_OPTS -J-Xmx20g
import de.mpicbg.scicomp.bioinfo.openFasta
import java.io.File
import kotlin.system.exitProcess
openFasta(File(args[0])).
filterNot { it.description!!.contains("dna:scaffold") }.
forEach { print(it.toEntryString()) }
EOF
samtools faidx genome.fa &
mcdir ${IGENOME_BASE}/Annotation/Genes
wget ftp://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz
gunzip -c Homo_sapiens.GRCh38.88.gtf.gz > genes.gtf
#head genes.gtf
ll ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa
dge_create_star_index ${IGENOME_BASE} &
mkdir ${IGENOME_BASE}/Sequence/BowtieIndex
jl submit "~/bin/bowtie-1.1.2/bowtie-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome.fa
mkdir ${IGENOME_BASE}/Sequence/Bowtie2Index
jl submit "bowtie2-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome.fa
mkdir ${IGENOME_BASE}/Sequence/KallistoIndex
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
refFasta=cdna.fasta
gunzip -c *.cdna.all.fa.gz > ${refFasta}
jl submit "kallisto index -i ${refFasta}.kallisto.idx ${refFasta}"
jl wait --email
wait # for star index creation
......@@ -10,23 +10,23 @@ ls "${NGS_TOOLS}" >/dev/null || { echo "not all project resources are well defin
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
IGENOME_BASE=/projects/bioinfo/igenomes/Mus_musculus/Ensembl_v92_custom/GRCm38_p6
# todo refactor this to become more independent of the actual species
#chmod -R u+xw /projects/bioinfo/igenomes/
chmod -R u+w /projects/bioinfo/igenomes/Mus_musculus
IGENOME_BASE=/projects/bioinfo/igenomes/Homo_sapiens/Ensembl_v88_custom/GRCh38
mcdir ${IGENOME_BASE}
mkdir -p ${IGENOME_BASE}/Sequence/
mcdir ${IGENOME_BASE}/Sequence/WholeGenomeFasta
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
wget ftp://ftp.ensembl.org/pub/release-92/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz
#gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz cut -f1 | head
## remove scaffolds
gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz > Homo_sapiens.GRCh38.dna.primary_assembly.fa
grep -F ">" genome_with_scaffolds.fa
gunzip -c Mus_musculus.GRCm38.dna.primary_assembly.fa.gz > Mus_musculus.GRCm38.dna.primary_assembly.fa
grep -F ">" Mus_musculus.GRCm38.dna.primary_assembly.fa
kscript - Homo_sapiens.GRCh38.dna.primary_assembly.fa <<"EOF" | cut -f1 -d' ' > genome.fa
kscript - Mus_musculus.GRCm38.dna.primary_assembly.fa <<"EOF" | cut -f1 -d' ' > genome.fa
//DEPS de.mpicbg.scicomp:kutils:0.7
//KOTLIN_OPTS -J-Xmx20g
......@@ -45,6 +45,7 @@ samtools faidx genome.fa &
mcdir ${IGENOME_BASE}/Annotation/Genes
wget ftp://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz
#wget ftp://ftp.ensembl.org/pub/release-92/gtf/mus_musculus/Mus_musculus.GRCm38.92.gtf.gz
gunzip -c Homo_sapiens.GRCh38.88.gtf.gz > genes.gtf
#head genes.gtf
......@@ -62,7 +63,7 @@ ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequen
mkdir ${IGENOME_BASE}/Sequence/KallistoIndex
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
wget ftp://ftp.ensembl.org/pub/release-92/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz
refFasta=cdna.fasta
gunzip -c *.cdna.all.fa.gz > ${refFasta}
......@@ -71,3 +72,5 @@ jl submit "kallisto index -i ${refFasta}.kallisto.idx ${refFasta}"
jl wait --email
wait # for star index creation
mailme "igenome build done in $(pwd)"
\ No newline at end of file
......@@ -65,83 +65,6 @@ tar xvf Homo_sapiens_NCBI_GRCh38.tar.gz
# GRCh38 (hg38)
########################################################################################################################
## hsap latest
#export NGS_TOOLS=/net/mack/lustre/projects/bioinfo/$(whoami)/scripts/ngs_tools
export NGS_TOOLS="/projects/bioinfo/$(whoami)/scripts/ngs_tools"
umask u=rwx,g=rwx,o=
ls "${NGS_TOOLS}" >/dev/null || { echo "not all project resources are well defined" 1>&2; exit 1; }
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
# todo refactor this to become more independent of the actual species
IGENOME_BASE=/projects/bioinfo/igenomes/Homo_sapiens/Ensembl_v88_custom/GRCh38
mcdir ${IGENOME_BASE}
mcdir ${IGENOME_BASE}/Sequence/WholeGenomeFasta
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
#gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz cut -f1 | head
## remove scaffolds
gunzip -c Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz > Homo_sapiens.GRCh38.dna.primary_assembly.fa
grep -F ">" genome_with_scaffolds.fa
kscript - Homo_sapiens.GRCh38.dna.primary_assembly.fa <<"EOF" | cut -f1 -d' ' > genome.fa
//DEPS de.mpicbg.scicomp:kutils:0.7
//KOTLIN_OPTS -J-Xmx20g
import de.mpicbg.scicomp.bioinfo.openFasta
import java.io.File
import kotlin.system.exitProcess
openFasta(File(args[0])).
filterNot { it.description!!.contains("dna:scaffold") }.
forEach { print(it.toEntryString()) }
EOF
samtools faidx genome.fa &
mcdir ${IGENOME_BASE}/Annotation/Genes
wget ftp://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz
gunzip -c Homo_sapiens.GRCh38.88.gtf.gz > genes.gtf
#head genes.gtf
ll ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa
dge_create_star_index ${IGENOME_BASE} &
mkdir ${IGENOME_BASE}/Sequence/BowtieIndex
jl submit "~/bin/bowtie-1.1.2/bowtie-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/BowtieIndex/genome.fa
mkdir ${IGENOME_BASE}/Sequence/Bowtie2Index
jl submit "bowtie2-build ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome"
ln -s ${IGENOME_BASE}/Sequence/WholeGenomeFasta/genome.fa ${IGENOME_BASE}/Sequence/Bowtie2Index/genome.fa
mkdir ${IGENOME_BASE}/Sequence/KallistoIndex
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
refFasta=cdna.fasta
gunzip -c *.cdna.all.fa.gz > ${refFasta}
jl submit "kallisto index -i ${refFasta}.kallisto.idx ${refFasta}"
jl wait --email
wait # for star index creation
########################################################################################################################
## bonobo
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment