From 6c33a37f2c91f4348a0e8b0eb59464f87fd30019 Mon Sep 17 00:00:00 2001 From: hadleyking Date: Wed, 11 Oct 2023 13:14:31 -0400 Subject: [PATCH 1/3] Updates to enable fixture for test.portal Changes to be committed: modified: api/apps.py modified: api/model/groups.py modified: api/model/prefix.py modified: api/models.py new file: bcodb/fixtures/test_portal.json new file: token.json --- api/apps.py | 6 +- api/model/groups.py | 5 +- api/model/prefix.py | 5 +- api/models.py | 22 +- bcodb/fixtures/test_portal.json | 6544 +++++++++++++++++++++++++++++++ token.json | 1 + 6 files changed, 6570 insertions(+), 13 deletions(-) create mode 100644 bcodb/fixtures/test_portal.json create mode 100644 token.json diff --git a/api/apps.py b/api/apps.py index a27797b3..f06dd842 100755 --- a/api/apps.py +++ b/api/apps.py @@ -20,5 +20,7 @@ class ApiConfig(AppConfig): def ready(self): """Create the anonymous user if they don't exist.""" - if not 'test' in sys.argv: - post_migrate.connect(populate_models, sender=self) \ No newline at end of file + if 'test' in sys.argv or 'loaddata' in sys.argv or 'flush' in sys.argv: + return + else: + post_migrate.connect(populate_models, sender=self) \ No newline at end of file diff --git a/api/model/groups.py b/api/model/groups.py index fc73cc4b..2bda7bdf 100644 --- a/api/model/groups.py +++ b/api/model/groups.py @@ -448,7 +448,10 @@ def associate_user_group(sender, instance, created, **kwargs): if the user isn't anon or the already existent bco_drafter or bco_publisher. """ - if not 'test' in sys.argv: + if 'test' in sys.argv or 'loaddata' in sys.argv: + return + + else: if created: print(instance) Group.objects.create(name=instance) diff --git a/api/model/prefix.py b/api/model/prefix.py index b2a92c50..2a2713bb 100644 --- a/api/model/prefix.py +++ b/api/model/prefix.py @@ -681,7 +681,10 @@ def create_permissions_for_prefix(sender, instance=None, **kwargs): # owner_user=User.objects.get(username='wheel') # ) - if not 'test' in sys.argv: + if 'test' in sys.argv or 'loaddata' in sys.argv: + return + + else: owner_user = User.objects.get(username=instance.owner_user) owner_group = Group.objects.get(name=instance.owner_group_id) drafters = Group.objects.get(name=instance.prefix.lower() + "_drafter") diff --git a/api/models.py b/api/models.py index a5978f60..276b85a1 100755 --- a/api/models.py +++ b/api/models.py @@ -21,6 +21,7 @@ Source: https://www.django-rest-framework.org/api-guide/authentication/#generating-tokens """ +import sys from django.db import models from django.conf import settings from django.contrib.auth.models import Group, User @@ -125,15 +126,18 @@ def create_auth_token(sender, instance=None, created=False, **kwargs): """Link user creation to token generation. Source: https://www.django-rest-framework.org/api-guide/authentication/#generating-tokens """ - if created: - # The anonymous user's token is hard-coded - # in server.conf. - if instance.username == "anon": - # Create anon's record with the hard-coded key. - Token.objects.create(user=instance, key=settings.ANON_KEY) - else: - # Create a normal user's record. - Token.objects.create(user=instance) + if 'loaddata' in sys.argv: + return + else: + if created: + # The anonymous user's token is hard-coded + # in server.conf. + if instance.username == "anon": + # Create anon's record with the hard-coded key. + Token.objects.create(user=instance, key=settings.ANON_KEY) + else: + # Create a normal user's record. + Token.objects.create(user=instance) # Link object deletion to object permissions deletion. diff --git a/bcodb/fixtures/test_portal.json b/bcodb/fixtures/test_portal.json new file mode 100644 index 00000000..d1db6a0f --- /dev/null +++ b/bcodb/fixtures/test_portal.json @@ -0,0 +1,6544 @@ +[ + + { + "model": "authtoken.token", + "pk": "2f2a599026581c158a07f968c56292c77f4be875", + "fields": { + "user": 2, + "created": "2023-10-11T14:53:20.557Z" + } + }, + { + "model": "authtoken.token", + "pk": "258220666ca9e667e560b42a1eb4d4080fc1c744", + "fields": { + "user": 5, + "created": "2023-10-11T14:53:20.560Z" + } + }, + { + "model": "authtoken.token", + "pk": "44d2669cfca52239ef4d97e1a7cfdd69d4eeb95e", + "fields": { + "user": 4, + "created": "2023-10-11T14:53:20.559Z" + } + }, + { + "model": "authtoken.token", + "pk": "627626823549f787c3ec763ff687169206626149", + "fields": { + "user": 3, + "created": "2023-10-11T14:53:20.558Z" + } + }, + { + "model": "authtoken.token", + "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "fields": { + "user": 8, + "created": "2023-10-11T14:53:20.564Z" + } + }, + { + "model": "authtoken.token", + "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "fields": { + "user": 7, + "created": "2023-10-11T14:53:20.563Z" + } + }, + { + "model": "authtoken.token", + "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", + "fields": { + "user": 6, + "created": "2023-10-11T14:53:20.561Z" + } + }, + { + "model": "authtoken.token", + "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "fields": { + "user": 9, + "created": "2023-10-11T14:53:20.565Z" + } + }, + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2022-06-28T23:06:35.693Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 2, + "fields": { + "action_time": "2022-06-28T23:08:10.571Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 3, + "fields": { + "action_time": "2022-06-28T23:09:47.922Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 4, + "fields": { + "action_time": "2022-06-28T23:12:37.828Z", + "user": 6, + "content_type": 10, + "object_id": "2", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 5, + "fields": { + "action_time": "2022-06-28T23:14:01.431Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 6, + "fields": { + "action_time": "2022-06-28T23:16:50.236Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 7, + "fields": { + "action_time": "2022-06-28T23:19:25.710Z", + "user": 6, + "content_type": 10, + "object_id": "3", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 8, + "fields": { + "action_time": "2022-06-28T23:21:05.713Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 9, + "fields": { + "action_time": "2022-06-28T23:21:43.425Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 10, + "fields": { + "action_time": "2022-06-28T23:23:00.080Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 11, + "fields": { + "action_time": "2022-06-28T23:23:13.087Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 12, + "fields": { + "action_time": "2022-06-28T23:41:21.155Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 13, + "fields": { + "action_time": "2022-06-28T23:43:57.562Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 14, + "fields": { + "action_time": "2022-06-28T23:44:43.690Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "auth.permission", + "pk": 1, + "fields": { + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" + } + }, + { + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, + "fields": { + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add new_users", + "content_type": 9, + "codename": "add_new_users" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change new_users", + "content_type": 9, + "codename": "change_new_users" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete new_users", + "content_type": 9, + "codename": "delete_new_users" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view new_users", + "content_type": 9, + "codename": "view_new_users" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add bco", + "content_type": 10, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change bco", + "content_type": 10, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete bco", + "content_type": 10, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view bco", + "content_type": 10, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add prefix_table", + "content_type": 11, + "codename": "add_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change prefix_table", + "content_type": 11, + "codename": "change_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete prefix_table", + "content_type": 11, + "codename": "delete_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view prefix_table", + "content_type": 11, + "codename": "view_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add group info", + "content_type": 12, + "codename": "add_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change group info", + "content_type": 12, + "codename": "change_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete group info", + "content_type": 12, + "codename": "delete_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view group info", + "content_type": 12, + "codename": "view_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can add BCOs with prefix BCO", + "content_type": 10, + "codename": "add_BCO" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can change BCOs with prefix BCO", + "content_type": 10, + "codename": "change_BCO" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can delete BCOs with prefix BCO", + "content_type": 10, + "codename": "delete_BCO" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can view BCOs with prefix BCO", + "content_type": 10, + "codename": "view_BCO" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can draft BCOs with prefix BCO", + "content_type": 10, + "codename": "draft_BCO" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can publish BCOs with prefix BCO", + "content_type": 10, + "codename": "publish_BCO" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can add group object permission", + "content_type": 14, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can change group object permission", + "content_type": 14, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can delete group object permission", + "content_type": 14, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can view group object permission", + "content_type": 14, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can add user object permission", + "content_type": 15, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can change user object permission", + "content_type": 15, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can delete user object permission", + "content_type": 15, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can view user object permission", + "content_type": 15, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can add BCOs with prefix TEST", + "content_type": 10, + "codename": "add_TEST" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can change BCOs with prefix TEST", + "content_type": 10, + "codename": "change_TEST" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can delete BCOs with prefix TEST", + "content_type": 10, + "codename": "delete_TEST" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can view BCOs with prefix TEST", + "content_type": 10, + "codename": "view_TEST" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can draft BCOs with prefix TEST", + "content_type": 10, + "codename": "draft_TEST" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can publish BCOs with prefix TEST", + "content_type": 10, + "codename": "publish_TEST" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can add BCOs with prefix OTHER", + "content_type": 10, + "codename": "add_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 74, + "fields": { + "name": "Can change BCOs with prefix OTHER", + "content_type": 10, + "codename": "change_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 75, + "fields": { + "name": "Can delete BCOs with prefix OTHER", + "content_type": 10, + "codename": "delete_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 76, + "fields": { + "name": "Can view BCOs with prefix OTHER", + "content_type": 10, + "codename": "view_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 77, + "fields": { + "name": "Can draft BCOs with prefix OTHER", + "content_type": 10, + "codename": "draft_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 78, + "fields": { + "name": "Can publish BCOs with prefix OTHER", + "content_type": 10, + "codename": "publish_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 79, + "fields": { + "name": "Can add blacklisted token", + "content_type": 16, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 80, + "fields": { + "name": "Can change blacklisted token", + "content_type": 16, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 81, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 16, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 82, + "fields": { + "name": "Can view blacklisted token", + "content_type": 16, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 83, + "fields": { + "name": "Can add authentication", + "content_type": 17, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 84, + "fields": { + "name": "Can change authentication", + "content_type": 17, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 85, + "fields": { + "name": "Can delete authentication", + "content_type": 17, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 86, + "fields": { + "name": "Can view authentication", + "content_type": 17, + "codename": "view_authentication" + } + }, + { + "model": "auth.group", + "pk": 1, + "fields": { + "name": "bco_drafter", + "permissions": [ + 53, + 54, + 55, + 57, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 2, + "fields": { + "name": "bco_publisher", + "permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 3, + "fields": { + "name": "anon", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 4, + "fields": { + "name": "wheel", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 5, + "fields": { + "name": "group_admins", + "permissions": [ + 9, + 10, + 11, + 12 + ] + } + }, + { + "model": "auth.group", + "pk": 6, + "fields": { + "name": "prefix_admins", + "permissions": [ + 49, + 50, + 51, + 52 + ] + } + }, + { + "model": "auth.group", + "pk": 7, + "fields": { + "name": "AnonymousUser", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 8, + "fields": { + "name": "bco_api_user", + "permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 9, + "fields": { + "name": "test50", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 10, + "fields": { + "name": "hivelab37", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 11, + "fields": { + "name": "jdoe58", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 12, + "fields": { + "name": "test_drafter", + "permissions": [ + 67, + 68, + 69, + 71, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 13, + "fields": { + "name": "test_publisher", + "permissions": [ + 67, + 68, + 69, + 71, + 72, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 14, + "fields": { + "name": "other_drafter", + "permissions": [ + 73, + 74, + 75, + 77, + 76 + ] + } + }, + { + "model": "auth.group", + "pk": 15, + "fields": { + "name": "other_publisher", + "permissions": [ + 73, + 74, + 75, + 77, + 78, + 76 + ] + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", + "last_login": null, + "is_superuser": false, + "username": "bco_drafter", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.496Z", + "groups": [ + 1 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 2, + "fields": { + "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", + "last_login": null, + "is_superuser": false, + "username": "bco_publisher", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.506Z", + "groups": [ + 1, + 2 + ], + "user_permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.user", + "pk": 3, + "fields": { + "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", + "last_login": null, + "is_superuser": false, + "username": "anon", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.517Z", + "groups": [ + 3 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", + "last_login": null, + "is_superuser": true, + "username": "wheel", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.528Z", + "groups": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.844Z", + "groups": [ + 1, + 2, + 7 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", + "last_login": "2022-06-28T23:05:37.314Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "BioCompute", + "last_name": "Objects", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:53.381Z", + "groups": [ + 1, + 2, + 6, + 8, + 12, + 13, + 14, + 15 + ], + "user_permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "test50", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39.093Z", + "groups": [ + 1, + 2, + 9, + 12 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 8, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab37", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [ + 1, + 2, + 10 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 9, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe58", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [ + 1, + 2, + 11 + ], + "user_permissions": [] + } + }, + + { + "model": "sessions.session", + "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", + "expire_date": "2022-07-12T15:13:16.934Z" + } + }, + { + "model": "sessions.session", + "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", + "expire_date": "2022-05-24T20:46:02.514Z" + } + }, + { + "model": "sessions.session", + "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", + "expire_date": "2022-05-24T20:54:17.321Z" + } + }, + { + "model": "sessions.session", + "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", + "expire_date": "2022-07-12T23:05:37.317Z" + } + }, + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.0", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.groupinfo", + "pk": 1, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Group administrators", + "expiration": null, + "group": "group_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 2, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Prefix administrators", + "expiration": null, + "group": "prefix_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 3, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 4, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 5, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 6, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.prefix_table", + "pk": 1, + "fields": { + "n_objects": 8, + "prefix": "BCO" + } + }, + { + "model": "api.prefix_table", + "pk": 2, + "fields": { + "n_objects": 3, + "prefix": "TEST" + } + }, + { + "model": "api.prefix_table", + "pk": 3, + "fields": { + "n_objects": 3, + "prefix": "OTHER" + } + }, + { + "model": "api.prefix", + "pk": 1, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T20:35:14.712Z", + "created_by": "bco_publisher", + "description": null, + "expires": null, + "owner_group": "bco_publisher", + "owner_user": "bco_publisher", + "prefix": "BCO" + } + }, + { + "model": "api.prefix", + "pk": 2, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:32.633Z", + "created_by": "bco_api_user", + "description": "Just a test prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "TEST" + } + }, + { + "model": "api.prefix", + "pk": 3, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:35.104Z", + "created_by": "bco_api_user", + "description": "Just an other prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER" + } + } +] \ No newline at end of file diff --git a/token.json b/token.json new file mode 100644 index 00000000..50e4e407 --- /dev/null +++ b/token.json @@ -0,0 +1 @@ +[{"model": "authtoken.token", "pk": "00edb7f97f1a100c1d1b463f167908d178158b6d", "fields": {"user": 2, "created": "2023-10-11T16:46:23.774Z"}}, {"model": "authtoken.token", "pk": "627626823549f787c3ec763ff687169206626149", "fields": {"user": 4, "created": "2023-10-11T16:46:23.782Z"}}, {"model": "authtoken.token", "pk": "a8d6fb100e9f46925dda3efe46870d2bb34e03a9", "fields": {"user": 1, "created": "2023-10-11T16:46:23.765Z"}}, {"model": "authtoken.token", "pk": "b64fffb062421c1ea17fe7e0034484e494708f63", "fields": {"user": 3, "created": "2023-10-11T16:46:23.778Z"}}, {"model": "authtoken.token", "pk": "c0d204332435783b4b745eca04a6946d8c124b38", "fields": {"user": 5, "created": "2023-10-11T16:46:23.867Z"}}] \ No newline at end of file From 7da3ce72d3a6549bee5e178062a521ee68a64e3b Mon Sep 17 00:00:00 2001 From: hadleyking Date: Thu, 12 Oct 2023 13:17:27 -0400 Subject: [PATCH 2/3] Fix emails in fixtures Changes to be committed: new file: bcodb/fixtures/local_data.json modified: bcodb/fixtures/test_portal.json --- bcodb/fixtures/local_data.json | 6544 +++++++++++++++++++++++++++++++ bcodb/fixtures/test_portal.json | 6 +- 2 files changed, 6547 insertions(+), 3 deletions(-) create mode 100644 bcodb/fixtures/local_data.json diff --git a/bcodb/fixtures/local_data.json b/bcodb/fixtures/local_data.json new file mode 100644 index 00000000..7550d9ae --- /dev/null +++ b/bcodb/fixtures/local_data.json @@ -0,0 +1,6544 @@ +[ + + { + "model": "authtoken.token", + "pk": "2f2a599026581c158a07f968c56292c77f4be875", + "fields": { + "user": 2, + "created": "2023-10-11T14:53:20.557Z" + } + }, + { + "model": "authtoken.token", + "pk": "258220666ca9e667e560b42a1eb4d4080fc1c744", + "fields": { + "user": 5, + "created": "2023-10-11T14:53:20.560Z" + } + }, + { + "model": "authtoken.token", + "pk": "44d2669cfca52239ef4d97e1a7cfdd69d4eeb95e", + "fields": { + "user": 4, + "created": "2023-10-11T14:53:20.559Z" + } + }, + { + "model": "authtoken.token", + "pk": "627626823549f787c3ec763ff687169206626149", + "fields": { + "user": 3, + "created": "2023-10-11T14:53:20.558Z" + } + }, + { + "model": "authtoken.token", + "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "fields": { + "user": 8, + "created": "2023-10-11T14:53:20.564Z" + } + }, + { + "model": "authtoken.token", + "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "fields": { + "user": 7, + "created": "2023-10-11T14:53:20.563Z" + } + }, + { + "model": "authtoken.token", + "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", + "fields": { + "user": 6, + "created": "2023-10-11T14:53:20.561Z" + } + }, + { + "model": "authtoken.token", + "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "fields": { + "user": 9, + "created": "2023-10-11T14:53:20.565Z" + } + }, + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2022-06-28T23:06:35.693Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://localhost:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 2, + "fields": { + "action_time": "2022-06-28T23:08:10.571Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://localhost:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 3, + "fields": { + "action_time": "2022-06-28T23:09:47.922Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://localhost:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 4, + "fields": { + "action_time": "2022-06-28T23:12:37.828Z", + "user": 6, + "content_type": 10, + "object_id": "2", + "object_repr": "http://localhost:8000/BCO_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 5, + "fields": { + "action_time": "2022-06-28T23:14:01.431Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://localhost:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 6, + "fields": { + "action_time": "2022-06-28T23:16:50.236Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://localhost:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 7, + "fields": { + "action_time": "2022-06-28T23:19:25.710Z", + "user": 6, + "content_type": 10, + "object_id": "3", + "object_repr": "http://localhost:8000/BCO_000002/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 8, + "fields": { + "action_time": "2022-06-28T23:21:05.713Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "http://localhost:8000/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 9, + "fields": { + "action_time": "2022-06-28T23:21:43.425Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "http://localhost:8000/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 10, + "fields": { + "action_time": "2022-06-28T23:23:00.080Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "http://localhost:8000/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 11, + "fields": { + "action_time": "2022-06-28T23:23:13.087Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://localhost:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 12, + "fields": { + "action_time": "2022-06-28T23:41:21.155Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "http://localhost:8000/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 13, + "fields": { + "action_time": "2022-06-28T23:43:57.562Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://localhost:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 14, + "fields": { + "action_time": "2022-06-28T23:44:43.690Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://localhost:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "auth.permission", + "pk": 1, + "fields": { + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" + } + }, + { + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, + "fields": { + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add new_users", + "content_type": 9, + "codename": "add_new_users" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change new_users", + "content_type": 9, + "codename": "change_new_users" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete new_users", + "content_type": 9, + "codename": "delete_new_users" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view new_users", + "content_type": 9, + "codename": "view_new_users" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add bco", + "content_type": 10, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change bco", + "content_type": 10, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete bco", + "content_type": 10, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view bco", + "content_type": 10, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add prefix_table", + "content_type": 11, + "codename": "add_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change prefix_table", + "content_type": 11, + "codename": "change_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete prefix_table", + "content_type": 11, + "codename": "delete_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view prefix_table", + "content_type": 11, + "codename": "view_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add group info", + "content_type": 12, + "codename": "add_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change group info", + "content_type": 12, + "codename": "change_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete group info", + "content_type": 12, + "codename": "delete_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view group info", + "content_type": 12, + "codename": "view_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can add BCOs with prefix BCO", + "content_type": 10, + "codename": "add_BCO" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can change BCOs with prefix BCO", + "content_type": 10, + "codename": "change_BCO" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can delete BCOs with prefix BCO", + "content_type": 10, + "codename": "delete_BCO" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can view BCOs with prefix BCO", + "content_type": 10, + "codename": "view_BCO" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can draft BCOs with prefix BCO", + "content_type": 10, + "codename": "draft_BCO" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can publish BCOs with prefix BCO", + "content_type": 10, + "codename": "publish_BCO" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can add group object permission", + "content_type": 14, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can change group object permission", + "content_type": 14, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can delete group object permission", + "content_type": 14, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can view group object permission", + "content_type": 14, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can add user object permission", + "content_type": 15, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can change user object permission", + "content_type": 15, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can delete user object permission", + "content_type": 15, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can view user object permission", + "content_type": 15, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can add BCOs with prefix TEST", + "content_type": 10, + "codename": "add_TEST" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can change BCOs with prefix TEST", + "content_type": 10, + "codename": "change_TEST" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can delete BCOs with prefix TEST", + "content_type": 10, + "codename": "delete_TEST" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can view BCOs with prefix TEST", + "content_type": 10, + "codename": "view_TEST" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can draft BCOs with prefix TEST", + "content_type": 10, + "codename": "draft_TEST" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can publish BCOs with prefix TEST", + "content_type": 10, + "codename": "publish_TEST" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can add BCOs with prefix OTHER", + "content_type": 10, + "codename": "add_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 74, + "fields": { + "name": "Can change BCOs with prefix OTHER", + "content_type": 10, + "codename": "change_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 75, + "fields": { + "name": "Can delete BCOs with prefix OTHER", + "content_type": 10, + "codename": "delete_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 76, + "fields": { + "name": "Can view BCOs with prefix OTHER", + "content_type": 10, + "codename": "view_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 77, + "fields": { + "name": "Can draft BCOs with prefix OTHER", + "content_type": 10, + "codename": "draft_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 78, + "fields": { + "name": "Can publish BCOs with prefix OTHER", + "content_type": 10, + "codename": "publish_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 79, + "fields": { + "name": "Can add blacklisted token", + "content_type": 16, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 80, + "fields": { + "name": "Can change blacklisted token", + "content_type": 16, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 81, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 16, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 82, + "fields": { + "name": "Can view blacklisted token", + "content_type": 16, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 83, + "fields": { + "name": "Can add authentication", + "content_type": 17, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 84, + "fields": { + "name": "Can change authentication", + "content_type": 17, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 85, + "fields": { + "name": "Can delete authentication", + "content_type": 17, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 86, + "fields": { + "name": "Can view authentication", + "content_type": 17, + "codename": "view_authentication" + } + }, + { + "model": "auth.group", + "pk": 1, + "fields": { + "name": "bco_drafter", + "permissions": [ + 53, + 54, + 55, + 57, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 2, + "fields": { + "name": "bco_publisher", + "permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 3, + "fields": { + "name": "anon", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 4, + "fields": { + "name": "wheel", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 5, + "fields": { + "name": "group_admins", + "permissions": [ + 9, + 10, + 11, + 12 + ] + } + }, + { + "model": "auth.group", + "pk": 6, + "fields": { + "name": "prefix_admins", + "permissions": [ + 49, + 50, + 51, + 52 + ] + } + }, + { + "model": "auth.group", + "pk": 7, + "fields": { + "name": "AnonymousUser", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 8, + "fields": { + "name": "bco_api_user", + "permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 9, + "fields": { + "name": "test50", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 10, + "fields": { + "name": "hivelab37", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 11, + "fields": { + "name": "jdoe58", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 12, + "fields": { + "name": "test_drafter", + "permissions": [ + 67, + 68, + 69, + 71, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 13, + "fields": { + "name": "test_publisher", + "permissions": [ + 67, + 68, + 69, + 71, + 72, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 14, + "fields": { + "name": "other_drafter", + "permissions": [ + 73, + 74, + 75, + 77, + 76 + ] + } + }, + { + "model": "auth.group", + "pk": 15, + "fields": { + "name": "other_publisher", + "permissions": [ + 73, + 74, + 75, + 77, + 78, + 76 + ] + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", + "last_login": null, + "is_superuser": false, + "username": "bco_drafter", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.496Z", + "groups": [ + 1 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 2, + "fields": { + "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", + "last_login": null, + "is_superuser": false, + "username": "bco_publisher", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.506Z", + "groups": [ + 1, + 2 + ], + "user_permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.user", + "pk": 3, + "fields": { + "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", + "last_login": null, + "is_superuser": false, + "username": "anon", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.517Z", + "groups": [ + 3 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", + "last_login": null, + "is_superuser": true, + "username": "wheel", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.528Z", + "groups": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.844Z", + "groups": [ + 1, + 2, + 7 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", + "last_login": "2022-06-28T23:05:37.314Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "BioCompute", + "last_name": "Objects", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:53.381Z", + "groups": [ + 1, + 2, + 6, + 8, + 12, + 13, + 14, + 15 + ], + "user_permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "test50", + "first_name": "", + "last_name": "", + "email": "test@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39.093Z", + "groups": [ + 1, + 2, + 9, + 12 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 8, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab37", + "first_name": "HIVE", + "last_name": "Lab", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [ + 1, + 2, + 10 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 9, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe58", + "first_name": "John", + "last_name": "Doe", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [ + 1, + 2, + 11 + ], + "user_permissions": [] + } + }, + + { + "model": "sessions.session", + "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", + "expire_date": "2022-07-12T15:13:16.934Z" + } + }, + { + "model": "sessions.session", + "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", + "expire_date": "2022-05-24T20:46:02.514Z" + } + }, + { + "model": "sessions.session", + "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", + "expire_date": "2022-05-24T20:54:17.321Z" + } + }, + { + "model": "sessions.session", + "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", + "expire_date": "2022-07-12T23:05:37.317Z" + } + }, + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.0", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "http://localhost:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "http://localhost:8000/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "http://localhost:8000/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://localhost:8000/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "http://localhost:8000/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "http://localhost:8000/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "http://localhost:8000/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://localhost:8000/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "http://localhost:8000/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "http://localhost:8000/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "http://localhost:8000/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://localhost:8000/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.groupinfo", + "pk": 1, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Group administrators", + "expiration": null, + "group": "group_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 2, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Prefix administrators", + "expiration": null, + "group": "prefix_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 3, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 4, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 5, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 6, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.prefix_table", + "pk": 1, + "fields": { + "n_objects": 8, + "prefix": "BCO" + } + }, + { + "model": "api.prefix_table", + "pk": 2, + "fields": { + "n_objects": 3, + "prefix": "TEST" + } + }, + { + "model": "api.prefix_table", + "pk": 3, + "fields": { + "n_objects": 3, + "prefix": "OTHER" + } + }, + { + "model": "api.prefix", + "pk": 1, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T20:35:14.712Z", + "created_by": "bco_publisher", + "description": null, + "expires": null, + "owner_group": "bco_publisher", + "owner_user": "bco_publisher", + "prefix": "BCO" + } + }, + { + "model": "api.prefix", + "pk": 2, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:32.633Z", + "created_by": "bco_api_user", + "description": "Just a test prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "TEST" + } + }, + { + "model": "api.prefix", + "pk": 3, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:35.104Z", + "created_by": "bco_api_user", + "description": "Just an other prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER" + } + } +] \ No newline at end of file diff --git a/bcodb/fixtures/test_portal.json b/bcodb/fixtures/test_portal.json index d1db6a0f..486e8b66 100644 --- a/bcodb/fixtures/test_portal.json +++ b/bcodb/fixtures/test_portal.json @@ -1397,9 +1397,9 @@ "last_login": null, "is_superuser": false, "username": "hivelab37", - "first_name": "", - "last_name": "", - "email": "", + "first_name": "HIVE", + "last_name": "Lab", + "email": "hivelab@testing.com", "is_staff": false, "is_active": true, "date_joined": "2022-05-10T20:53:42.499Z", From 41ef299c3ee6accd973bc77ea0ebaca29338a765 Mon Sep 17 00:00:00 2001 From: hadleyking Date: Thu, 12 Oct 2023 14:47:50 -0400 Subject: [PATCH 3/3] Fixes for publishing with fixtures Changes to be committed: modified: api/model/prefix.py modified: api/scripts/utilities/DbUtils.py --- api/model/prefix.py | 4 +++- api/scripts/utilities/DbUtils.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/api/model/prefix.py b/api/model/prefix.py index 2a2713bb..496dc922 100644 --- a/api/model/prefix.py +++ b/api/model/prefix.py @@ -723,7 +723,9 @@ def create_counter_for_prefix(sender, instance=None, created=False, **kwargs): instance: api.model.prefix.Prefix created: bool """ - if not 'test' in sys.argv: + if 'test' in sys.argv or 'loaddata' in sys.argv or 'flush' in sys.argv: + return + else: if created: prefix_table.objects.create(n_objects=1, prefix=instance.prefix) diff --git a/api/scripts/utilities/DbUtils.py b/api/scripts/utilities/DbUtils.py index 18eefb05..398e86e1 100755 --- a/api/scripts/utilities/DbUtils.py +++ b/api/scripts/utilities/DbUtils.py @@ -731,7 +731,10 @@ def messages(self, parameters, p_content=False): "status_code": "409", "message": "The provided object " + parameters["object_id"] - + " has already been created on this server.", + + " has already been created on this server." + + " If you wish to publish a new version of this BCO try" + + " to save the DRAFT with a different version number, and" + + " then resubmit.", }, "409_draft_object_id_conflict": { "request_status": "FAILURE",