Commit 89c2ef05 authored by mirandaa's avatar mirandaa

formating of input files

parent 1ce17479
......@@ -4,22 +4,49 @@ Created on 08.11.2018
@author: mirandaa
'''
import glob
if __name__ == '__main__':
print('''program to get the fasta files from a list of accession numbers
you will need to replace the fasta file with yours,
and specify the accession numbers in the file below (guess wich one)
then run this script with those files in the same directory
print('''Program to get the fasta files from a list of accession numbers
save a *.fasta file in the same directory as the this script
also save a *.txt file, with the accesion numbers, in the same directory
the file should look like this...
P15105
P00186
Q91X77
then to run the program
python main
good luck!
''')
''')
txtFiles = glob.glob('*.txt')
if not txtFiles:
raise Exception('no *.txt file found')
raise SystemExit
txtFile = txtFiles[0]
print('using {} as accesor file'.format(txtFile))
with open('Accessionnumbers.txt', 'r') as accessionnumbers_file:
with open(txtFile, 'r') as accessionnumbers_file:
accessionnumbers = accessionnumbers_file.readlines()
accessionnumbers = [e.strip() for e in accessionnumbers]
print(accessionnumbers)
fastaFiles = glob.glob('*.fasta')
if not fastaFiles:
raise Exception('no *.fasta file found')
raise SystemExit
fastaFile = fastaFiles[0]
with open('protg_uniprot-mouse20170704.fasta', 'r') as fasta_file:
print('using {} as fastaFile file'.format(fastaFile))
with open(fastaFile, 'r') as fasta_file:
fasta = fasta_file.read()
parts = fasta.split('\n>')
......@@ -44,8 +71,11 @@ good luck!
for acs in accessionnumbers:
result.append('>'+acs)
result.append(cleaner_dict[acs])
print('resutl file is result.fasta')
with open('result.fasta','w') as f:
for item in result:
f.write("%s\n" % item)
print('done')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment