Skip to content

Commit

Permalink
Fixed seeds in certain methods to provide deterministic background co…
Browse files Browse the repository at this point in the history
…mpositon and stable results.
  • Loading branch information
dmitrymyl committed Oct 22, 2021
1 parent 5ee5a3e commit bd9b48a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 13 deletions.
6 changes: 3 additions & 3 deletions ortho2align/cli_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
bg_from_inter_ranges_processing_group.add_argument('-seed',
type=int,
nargs='?',
default=123,
default=0,
help='random seed number for sampling intergenic regions (default: %(default)s).')
bg_from_inter_ranges_output_group = bg_from_inter_ranges_parser.add_argument_group('Output')
bg_from_inter_ranges_output_group.add_argument('-output',
Expand Down Expand Up @@ -82,7 +82,7 @@
bg_from_shuffled_ranges_processing_group.add_argument('-seed',
type=int,
nargs='?',
default=123,
default=0,
help='random seed number for sampling intergenic regions (default: %(default)s).')
bg_from_shuffled_ranges_output_group = bg_from_shuffled_ranges_parser.add_argument_group('Output')
bg_from_shuffled_ranges_output_group.add_argument('-output',
Expand Down Expand Up @@ -161,7 +161,7 @@
estimate_background_processing_group.add_argument('-seed',
type=int,
nargs='?',
default=123,
default=0,
help='random seed for sampling scores (default: %(default)s).')
estimate_background_processing_group.add_argument('--silent',
action='store_true',
Expand Down
8 changes: 6 additions & 2 deletions ortho2align/genomicranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -1762,14 +1762,16 @@ def inter_ranges(self, distance=0, verbose=False):

def shuffle_inside_chrom(self, seed=0):
shuffled_granges = []
local_random = random.Random()
local_random.seed(seed)
for grange in self:
chromsize = self.sequence_file.chromsizes.get(grange.chrom)
if chromsize is None:
end = grange.end
else:
end = chromsize.size - len(grange)
try:
shuffled_start = random.randint(0, end)
shuffled_start = local_random.randint(0, end)
except ValueError:
shuffled_start = 0
shuffled_end = shuffled_start + len(grange)
Expand All @@ -1786,7 +1788,9 @@ def shuffle_inside_chrom(self, seed=0):
def sample_granges(self, n, seed=0):
if n > len(self):
raise ValueError(f'Value of n={n} is greater than number of genomic ranges: {len(self)}.')
sample = random.sample(self, n)
local_random = random.Random()
local_random.seed(seed)
sample = local_random.sample(self, n)
used_args = {'collection', }
kwargs = {attr: getattr(self, attr)
for attr in set(self.init_args) - used_args}
Expand Down
17 changes: 10 additions & 7 deletions ortho2align/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def bg_from_inter_ranges(genes_filename,
seed (int): a random seed (default: 0).
silent (bool): if True, will suppress a progress bar (default: False).
"""
random.seed(seed)
local_random = random.Random()
local_random.seed(seed)

cmd_hints = ['parsing the annotation...',
'sampling intergenic regions...',
Expand All @@ -103,8 +104,8 @@ def bg_from_inter_ranges(genes_filename,
if len(inter_genes) < sample_size:
raise ValueError(f'The number of observations ({sample_size}) '
'must be less than the number of intergenic '
f'regions ({len(inter_genes)}) derived from genes')
samples = BaseGenomicRangesList(random.sample(inter_genes, k=sample_size))
f'regions ({len(inter_genes)}) derived from genes.')
samples = BaseGenomicRangesList(local_random.sample(inter_genes, k=sample_size))

pbar.update()

Expand Down Expand Up @@ -226,8 +227,9 @@ def _estimate_bg_for_single_query_blast(query, bg_ranges, word_size, output_name
scores += alignment_scores
score_size = len(scores)
if score_size > observations:
random.seed(seed)
scores = random.sample(scores, observations)
local_random = random.Random()
local_random.seed(seed)
scores = local_random.sample(scores, observations)
with open(output_name, 'w') as outfile:
json.dump(scores, outfile)
return output_name, score_size
Expand Down Expand Up @@ -264,8 +266,9 @@ def _estimate_bg_for_single_query_seqfile(query, seqfile, word_size, output_name
scores = [hsp.score for hsp in alignment.HSPs]
score_size = len(scores)
if score_size > observations:
random.seed(seed)
scores = random.sample(scores, observations)
local_random = random.Random()
local_random.seed(seed)
scores = local_random.sample(scores, observations)
with open(output_name, 'w') as outfile:
json.dump(scores, outfile)
return score_size
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


setup(name='ortho2align',
version='0.9',
version='1.0.1',
description='A lncRNA ortholog discovery tool based on syntenic regions and statistical assessment of alignment nonrandomness.',
url='http://github.com/dmitrymyl/ortho2align',
author='Dmitry Mylarshchikov',
Expand Down

0 comments on commit bd9b48a

Please sign in to comment.