Commit 16ef78d7 authored by mirandaa's avatar mirandaa

update main to match new resources, trigger bug

parent fee4d33e
......@@ -3,19 +3,28 @@ Created on 08.11.2018
@author: mirandaa
'''
import re
if __name__ == '__main__':
print('''program to get the fasta files from a list of accession numbers
you will need to replace the fasta file with yours,
and specify the accession numbers in the file below (guess wich one)
then run this script with those files in the same directory
good luck!
''')
if __name__ == '__main__': #A8DYP0
with open('Accessionnumbers.txt', 'r') as accessionnumbers_file:
accessionnumbers = accessionnumbers_file.readlines()
accessionnumbers = [e.strip() for e in accessionnumbers]
print(accessionnumbers)
with open('uniprot-proteome_drosophila_20181030.fasta', 'r') as fasta_file:
with open('protg_uniprot-mouse20170704.fasta', 'r') as fasta_file:
fasta = fasta_file.read()
parts = fasta.split('>')
#parts = fasta.split('>')
parts =re.split('^>', fasta)
partsDict = {}
for part in parts:
part1 = part.split('\n')[0]
......@@ -26,7 +35,11 @@ if __name__ == '__main__': #A8DYP0
cleaner_dict = {}
for k in partsDict:
if k=='': continue
acccessionNumber = k.split('|')[1]
try:
acccessionNumber = k.split('|')[1]
except Exception as e:
print('problemwith ',k)
continue
cleaner_dict[acccessionNumber] = partsDict[k]
result = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment