Skip to content

Commit

Permalink
upload to aws added
Browse files Browse the repository at this point in the history
  • Loading branch information
baminou committed Mar 12, 2018
1 parent f542130 commit 462afdd
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 13 deletions.
30 changes: 30 additions & 0 deletions workflow/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ workflow:
sv_padding: sv_padding
snv_padding: snv_padding
indel_padding: indel_padding
associated_vcfs: associated_vcfs
depends_on:
- completed@download_files

Expand All @@ -170,6 +171,9 @@ workflow:
normal_bam: normal_bam
tumor_bams: tumor_bams
experiment: experiment
indel_padding: indel_padding
snv_padding: snv_padding
sv_padding: sv_padding
depends_on:
- completed@generate_minibam

Expand All @@ -182,6 +186,14 @@ workflow:
depends_on:
- completed@generate_song_payloads

upload_to_aws:
tool: upload_to_aws
input:
input_directory: directory@download_files
payloads: payloads@generate_song_payloads
study_id: study_id


clean_directory:
tool: clean_directory
input:
Expand Down Expand Up @@ -237,6 +249,14 @@ tools:
type: array
experiment:
type: object
indel_padding:
type: object
snv_padding:
type: object
sv_padding:
type: object
associated_vcfs:
type: array
output:
payloads:
type: array
Expand All @@ -251,6 +271,16 @@ tools:
study_id:
type: string

upload_to_aws:
command: upload_to_aws.py
input:
input_directory:
type: string
payloads:
type: array
study_id:
type: string

clean_directory:
command: clean_directory.py
input:
Expand Down
2 changes: 1 addition & 1 deletion workflow/tools/generate_minibam.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
tmp_json['oxoQScore'] = task_dict.get('input').get('tumor_bams')[i].get('oxog_score')
tmp_json['associatedVcfs'] = []
for j in range(0,len(task_dict.get('input').get('vcf_files'))):
tmp_json['associatedVcfs'].append(task_dict.get('input').get('vcf_files')[i].get('file_name'))
tmp_json['associatedVcfs'].append(task_dict.get('input').get('vcf_files')[j].get('file_name'))
json_input['tumours'].append(tmp_json)

json_file = 'run.json'
Expand Down
24 changes: 21 additions & 3 deletions workflow/tools/generate_song_payloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
normal_bam = task_dict.get('input').get('normal_bam')
tumor_bams = task_dict.get('input').get('tumor_bams')
experiment = task_dict.get('input').get('experiment')
indel_padding = task_dict.get('input').get('indel_padding')
snv_padding = task_dict.get('input').get('snv_padding')
sv_padding = task_dict.get('input').get('sv_padding')
associated_vcfs = task_dict.get('input').get('associated_vcfs')

save_output_json(task_dict)

def create_payload_json(bam, experiment, input_directory, output_file):
def create_payload_json(bam, experiment, input_directory, output_file, associated_vcfs):
donor_payload = DonorPayload(donor_gender=bam.get('sample').get('donor').get('gender'),donor_submitter_id=bam.get('sample').get('donor').get('submitter_id'))
experiment_payload = ExperimentPayload(aligned=experiment.get('aligned'),library_strategy=experiment.get('library_strategy'),reference_genome=experiment.get('reference_genome'))

Expand All @@ -46,17 +50,31 @@ def create_payload_json(bam, experiment, input_directory, output_file):
song_payload.add_file_payload(minibam_payload)
song_payload.add_file_payload(minibai_payload)
song_payload.add_sample_payload(sample_payload)


song_payload.add_info('minibam_generator',{
'git_url': "https://github.com/ICGC-TCGA-PanCancer/pcawg-minibam",
"dockstore": "https://dockstore.org/workflows/ICGC-TCGA-PanCancer/pcawg-minibam",
"release": "1.0.0"
})
song_payload.add_info('snv_padding', snv_padding)
song_payload.add_info('sv_padding', sv_padding)
song_payload.add_info('isPcawg',True)
song_payload.add_info('indelPadding',indel_padding)
song_payload.add_info('full_size_bam', bam)
song_payload.add_info('vcf_files',associated_vcfs)

song_payload.to_json_file(output_file)


payloads = []

create_payload_json(normal_bam, experiment, input_directory, os.path.join(input_directory, 'normal_minibam.json'))

create_payload_json(normal_bam, experiment, input_directory, os.path.join(input_directory, 'normal_minibam.json'), associated_vcfs)
payloads.append('normal_minibam.json')

for i in range(0,len(tumor_bams)):
create_payload_json(tumor_bams[i], experiment, input_directory, os.path.join(input_directory, 'tumor_minibam_'+str(i)+'.json'))
create_payload_json(tumor_bams[i], experiment, input_directory, os.path.join(input_directory, 'tumor_minibam_'+str(i)+'.json'), associated_vcfs)
payloads.append( 'tumor_minibam_'+str(i)+'.json')

save_output_json({
Expand Down
19 changes: 10 additions & 9 deletions workflow/tools/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,29 @@
from utils import get_task_dict, save_output_json, get_md5
import sys

import shutil

def upload_file(input_directory, study_id, payload):
upload_container = "quay.io/baminou/dckr_song_upload"
song_server = 'http://142.1.177.168:8080'

subprocess.check_output(['docker', 'pull', upload_container])

subprocess.check_output(['docker','run','-e','ACCESSTOKEN',
'-v', input_directory+':/app',upload_container, 'upload','-s',study_id,
'-u', song_server, '-p', '/app/'+payload,
'-o','manifest.txt','-j','manifest.json',
subprocess.check_output(['docker', 'run',
'--net=host',
'-e', 'ACCESSTOKEN',
'-e', 'STORAGEURL=' + os.environ.get('STORAGEURL_COLLAB'),
'-e', 'METADATAURL=' + os.environ.get('METADATAURL_COLLAB'),
'-v', input_directory + ':/app', upload_container,
'upload', '-s', study_id,
'-u', song_server, '-p', '/app/' + payload,
'-o', 'manifest.txt', '-j', 'manifest.json',
'-d', '/app/'])

return json.load(open(os.path.join(input_directory,'manifest.json')))


task_dict = get_task_dict(sys.argv[1])
cwd = os.getcwd()

save_output_json(task_dict)


payloads = task_dict.get('input').get('payloads')
input_directory = task_dict.get('input').get('input_directory')
study_id = task_dict.get('input').get('study_id')
Expand Down
58 changes: 58 additions & 0 deletions workflow/tools/upload_to_aws.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python

import os
import sys
from utils import get_md5, get_task_dict, save_output_json
import subprocess
import time
import json


allowed_codes = { 'LIRI-JP', 'PACA-CA' , 'PRAD-CA', 'RECA-EU', 'PAEN-AU', 'PACA-AU',
'BOCA-UK','OV-AU', 'MELA-AU', 'BRCA-UK', 'PRAD-UK', 'CMDI-UK', 'LINC-JP',
'ORCA-IN', 'BTCA-SG', 'LAML-KR', 'LICA-FR', 'CLLE-ES', 'ESAD-UK', 'PAEN-IT'}


task_dict = get_task_dict(sys.argv[1])
cwd = os.getcwd()

subprocess.check_output(['docker','pull','mesosphere/aws-cli'])

def upload_file(input_directory, study_id, payload):
upload_container = "quay.io/baminou/dckr_song_upload"
song_server = 'http://142.1.177.168:8080'

subprocess.check_output(['docker', 'pull', upload_container])

subprocess.check_output(['docker', 'run',
'--net=host',
'-e', 'ACCESSTOKEN',
'-e', 'STORAGEURL=' + os.environ.get('STORAGEURL_AWS'),
'-e', 'METADATAURL=' + os.environ.get('METADATAURL_AWS'),
'-v', input_directory + ':/app', upload_container,
'upload', '-s', study_id,
'-u', song_server, '-p', '/app/' + payload,
'-o', 'manifest.txt', '-j', 'manifest.json',
'-d', '/app/'])

return json.load(open(os.path.join(input_directory,'manifest.json')))


task_dict = get_task_dict(sys.argv[1])
cwd = os.getcwd()

payloads = task_dict.get('input').get('payloads')
input_directory = task_dict.get('input').get('input_directory')
study_id = task_dict.get('input').get('study_id')

task_start = int(time.time())
run = study_id in allowed_codes

manifests = []

for i in range(0,len(payloads)):
manifests.append(upload_file(input_directory, study_id, payloads[i]))

save_output_json({
'manifests': manifests
})

0 comments on commit 462afdd

Please sign in to comment.