main.py 1.93 KB
Newer Older
mirandaa's avatar
mirandaa committed
1 2 3 4 5
'''
Created on 08.11.2018

@author: mirandaa
'''
6

mirandaa's avatar
mirandaa committed
7 8
import glob

9
if __name__ == '__main__': 
mirandaa's avatar
mirandaa committed
10 11 12 13 14 15 16 17 18 19 20 21 22
    print('''Program to get the fasta files from a list of accession numbers

save a *.fasta file in the same directory as the this script
also save a *.txt file, with the accesion numbers, in the same directory

the file should look like this...

P15105
P00186
Q91X77

then to run the program
python main 
23 24

good luck!
mirandaa's avatar
mirandaa committed
25 26 27 28 29 30 31 32 33 34
''')
    
    txtFiles = glob.glob('*.txt')
    
    if not txtFiles:
        raise Exception('no *.txt file found')
        raise SystemExit
    txtFile = txtFiles[0]
    
    print('using {} as accesor file'.format(txtFile))
mirandaa's avatar
mirandaa committed
35

mirandaa's avatar
mirandaa committed
36
    with open(txtFile, 'r') as accessionnumbers_file:
mirandaa's avatar
mirandaa committed
37 38 39
        accessionnumbers = accessionnumbers_file.readlines()
        accessionnumbers = [e.strip() for e in accessionnumbers]
        
mirandaa's avatar
mirandaa committed
40 41 42 43 44 45
    fastaFiles = glob.glob('*.fasta')
    
    if not fastaFiles:
        raise Exception('no *.fasta file found')
        raise SystemExit
    fastaFile = fastaFiles[0]
mirandaa's avatar
mirandaa committed
46
    
mirandaa's avatar
mirandaa committed
47 48 49
    print('using {} as fastaFile file'.format(fastaFile))
    
    with open(fastaFile, 'r') as fasta_file:
mirandaa's avatar
mirandaa committed
50 51
        fasta = fasta_file.read()
    
mirandaa's avatar
mirandaa committed
52
    parts = fasta.split('\n>')
mirandaa's avatar
mirandaa committed
53 54 55 56 57 58 59 60 61 62
    partsDict = {}
    for part in parts:
        part1 = part.split('\n')[0]
        part2 = '\n'.join(part.split('\n')[1:])
        part2 = part2.replace('\n', '')
        partsDict[part1] = part2
    
    cleaner_dict = {}
    for k in partsDict:
        if k=='': continue
63 64 65 66 67
        try:
            acccessionNumber = k.split('|')[1]
        except Exception as e:
            print('problemwith ',k)
            continue
mirandaa's avatar
mirandaa committed
68 69 70 71 72 73
        cleaner_dict[acccessionNumber] = partsDict[k]
        
    result = []
    for acs in accessionnumbers:
        result.append('>'+acs)
        result.append(cleaner_dict[acs])
mirandaa's avatar
mirandaa committed
74 75
    
    print('resutl file is result.fasta')
mirandaa's avatar
mirandaa committed
76 77 78
    with open('result.fasta','w') as f:
        for item in result:
            f.write("%s\n" % item)
mirandaa's avatar
mirandaa committed
79 80
            
    print('done')
mirandaa's avatar
mirandaa committed
81