Skip to content

Commit

Permalink
Fix cluster name handling in atac-seq script (#118)
Browse files Browse the repository at this point in the history
* Update handling of cluster names

* Bump version

* Update
  • Loading branch information
keller-mark authored Mar 6, 2023
1 parent 7a20064 commit 0b94a44
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
2 changes: 1 addition & 1 deletion containers/scatac-csv-to-arrow/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.2
0.0.3
19 changes: 15 additions & 4 deletions containers/scatac-csv-to-arrow/context/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ def arrow_to_csv(arrow_file, csv_file):
df = pa.ipc.open_file(arrow_file).read_pandas()
df.to_csv(csv_file)

def try_str_to_int(val):
try:
# Keep only numeric characters.
val_numeric = "".join(filter(str.isdigit, val))
if len(val_numeric) > 0:
return int(val_numeric)
except:
return val

# Big TODO: deduplicate this with h5ad-to-arrow
def arrow_to_json(arrow_file, **kwargs):
Expand All @@ -31,22 +39,25 @@ def arrow_to_json(arrow_file, **kwargs):
df = pa.ipc.open_file(arrow_file).read_pandas()
df_items = df.T.to_dict().items()

# It is possible for the cluster names to not be integers.
df['leiden'] = df['leiden'].astype(str)
leiden_clusters = sorted(df['leiden'].unique(), key=try_str_to_int)

id_to_umap = {
k: {
"mappings": {"UMAP": [v['umap_x'], v['umap_y']]},
"factors": {"Leiden Cluster": str(int(v['leiden']))}
"factors": {"Leiden Cluster": str(v['leiden'])}
}
for (k,v) in df_items
}
pretty_json_umap = json.dumps(id_to_umap).replace('}},', '}},\n')
with open(umap_json, 'w') as f:
f.write(pretty_json_umap)

leiden_clusters = sorted(df['leiden'].unique().astype('uint8'))
id_to_factors = {
'Leiden Cluster': {
'map': [str(cluster) for cluster in leiden_clusters],
'cells': { k: v['leiden'] for (k,v) in df_items }
'map': leiden_clusters,
'cells': { k: leiden_clusters.index(str(v['leiden'])) for (k,v) in df_items }
}
}
pretty_json_factors = json.dumps(id_to_factors).replace('}},', '}},\n')
Expand Down
2 changes: 1 addition & 1 deletion scatac-csv-to-arrow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class: CommandLineTool
baseCommand: ['python', '/main.py', '--output_dir', './output', '--input_dir']
hints:
DockerRequirement:
dockerPull: hubmap/portal-container-scatac-csv-to-arrow:0.0.1
dockerPull: hubmap/portal-container-scatac-csv-to-arrow:0.0.3
inputs:
input_directory:
type: Directory
Expand Down

0 comments on commit 0b94a44

Please sign in to comment.