-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_biosample_geo_loc.py
41 lines (33 loc) · 1.27 KB
/
extract_biosample_geo_loc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/bin/env python
import requests, sys, os
from xml.etree import ElementTree
# Function to retrieve 'geo_loc_name' from a biosample ID using the NCBI API
def get_geo_loc_name(biosample_id, api_key=None):
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
"db": "biosample",
"id": biosample_id,
"api_key": api_key
}
response = requests.get(base_url, params=params)
if response.status_code != 200:
return None
tree = ElementTree.fromstring(response.content)
for attribute in tree.findall('.//Attribute'):
if attribute.get('attribute_name') == 'geo_loc_name':
return attribute.text
return None
# Replace 'your_ncbi_api_key' with your actual NCBI API key if you have one
api_key = sys.argv[1]
# Read biosample IDs from the file
file_path = sys.argv[2]
with open(file_path, 'r') as file:
biosample_ids = file.readlines()
# Output file path
output_file_path = sys.argv[3]
# Processing all biosample IDs from the list
with open(output_file_path, 'w') as output_file:
for biosample_id in biosample_ids:
biosample_id = biosample_id.strip()
geo_loc_name = get_geo_loc_name(biosample_id, api_key=api_key)
output_file.write(f"{biosample_id}\t{geo_loc_name}\n")