forked from abretaud/galaxy-apollo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_features_from_gff3.py
99 lines (85 loc) · 4.03 KB
/
create_features_from_gff3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
import sys
import json
import time
import argparse
from webapollo import WebApolloInstance, featuresToFeatureSchema
from webapollo import WAAuth, OrgOrGuess, GuessOrg, AssertUser
from BCBio import GFF
import logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services')
WAAuth(parser)
parser.add_argument('email', help='User Email')
OrgOrGuess(parser)
parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file')
args = parser.parse_args()
wa = WebApolloInstance(args.apollo, args.username, args.password)
# User must have an account
gx_user = AssertUser(wa.users.loadUsers(email=args.email))
# Get organism
org_cn = GuessOrg(args, wa)
if isinstance(org_cn, list):
org_cn = org_cn[0]
# TODO: Check user perms on org.
org = wa.organisms.findOrganismByCn(org_cn)
bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID']
sys.stdout.write('# ')
sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages']))
sys.stdout.write('\n')
# print(wa.annotations.getFeatures())
for rec in GFF.parse(args.gff3):
wa.annotations.setSequence(rec.id, org['id'])
for feature in rec.features:
# We can only handle genes right now
if feature.type != 'gene':
continue
# Convert the feature into a presentation that Apollo will accept
featureData = featuresToFeatureSchema([feature])
try:
# We're experiencing a (transient?) problem where gene_001 to
# gene_025 will be rejected. Thus, hardcode to a known working
# gene name and update later.
featureData[0]['name'] = 'gene_000'
# Extract CDS feature from the feature data, this will be used
# to set the CDS location correctly (apollo currently screwing
# this up (2.0.6))
CDS = featureData[0]['children'][0]['children']
CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location']
# Create the new feature
newfeature = wa.annotations.addFeature(featureData, trustme=True)
# Extract the UUIDs that apollo returns to us
mrna_id = newfeature['features'][0]['uniquename']
gene_id = newfeature['features'][0]['parent_id']
# Sleep to give it time to actually persist the feature. Apollo
# is terrible about writing + immediately reading back written
# data.
time.sleep(1)
# Correct the translation start, but with strand specific log
if CDS['strand'] == 1:
wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax']))
else:
wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1)
# Finally we set the name, this should be correct.
wa.annotations.setName(mrna_id, feature.qualifiers.get('product', ["Unknown"])[0])
wa.annotations.setName(gene_id, feature.qualifiers.get('product', ["Unknown"])[0])
for (k, v) in feature.qualifiers.items():
if k not in bad_quals:
# set qualifier
pass
sys.stdout.write('\t'.join([
feature.id,
gene_id,
'success',
"Dropped qualifiers: %s" % (json.dumps({k: v for (k, v) in feature.qualifiers.items() if k not in bad_quals})),
]))
except Exception as e:
sys.stdout.write('\t'.join([
feature.id,
'',
'ERROR',
str(e)
]))
sys.stdout.write('\n')