Skip to content

Commit

Permalink
split column_type out of SignaturePicklist a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jun 12, 2021
1 parent 1bdf88e commit 3c05f95
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def extract(args):

notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")

n_empty_val, dup_vals = picklist.load(picklist.pickfile)
n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)

notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
if n_empty_val:
Expand Down
18 changes: 13 additions & 5 deletions src/sourmash/sig/picklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class SignaturePicklist:
"""
def __init__(self, pickfile, column_name, coltype):
self.pickfile = pickfile # note: can be None
self.column_name = column_name
self.column_name = column_name # note: can be None
self.coltype = coltype

if coltype not in ('md5', 'md5prefix8', 'name', 'ident', 'ident.'):
Expand Down Expand Up @@ -79,7 +79,12 @@ def _get_sig_attribute(self, ss):

return q

def load(self, pickfile):
def init(self, values=[]):
if self.pickset is not None:
raise ValueError("already initialized?")
self.pickset = set(values)

def load(self, pickfile, column_name):
"load pickset, return num empty vals, and set of duplicate vals."
pickset = self.pickset
if pickset is None:
Expand All @@ -90,12 +95,12 @@ def load(self, pickfile):
with open(pickfile, newline='') as csvfile:
r = csv.DictReader(csvfile)

if self.column_name not in r.fieldnames:
raise ValueError("column '{self.column_name}' not in pickfile '{pickfile}'")
if column_name not in r.fieldnames:
raise ValueError("column '{column_name}' not in pickfile '{pickfile}'")

for row in r:
# pick out values from column
col = row[self.column_name]
col = row[column_name]
if not col:
n_empty_val += 1
continue
Expand All @@ -111,6 +116,9 @@ def load(self, pickfile):
self.pickset = pickset
return n_empty_val, dup_vals

def add(self, value):
self.pickset.add(value)

def __contains__(self, ss):
"does this signature match anything in the picklist?"
q = self._get_sig_attribute(ss)
Expand Down

0 comments on commit 3c05f95

Please sign in to comment.