From bd9b48ad049583193fc055e009562047ddf5f1b3 Mon Sep 17 00:00:00 2001 From: dmitrymyl Date: Fri, 22 Oct 2021 18:00:41 +0300 Subject: [PATCH] Fixed seeds in certain methods to provide deterministic background compositon and stable results. --- ortho2align/cli_scripts.py | 6 +++--- ortho2align/genomicranges.py | 8 ++++++-- ortho2align/pipeline.py | 17 ++++++++++------- setup.py | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ortho2align/cli_scripts.py b/ortho2align/cli_scripts.py index 0cfe97d..91b4032 100644 --- a/ortho2align/cli_scripts.py +++ b/ortho2align/cli_scripts.py @@ -38,7 +38,7 @@ bg_from_inter_ranges_processing_group.add_argument('-seed', type=int, nargs='?', - default=123, + default=0, help='random seed number for sampling intergenic regions (default: %(default)s).') bg_from_inter_ranges_output_group = bg_from_inter_ranges_parser.add_argument_group('Output') bg_from_inter_ranges_output_group.add_argument('-output', @@ -82,7 +82,7 @@ bg_from_shuffled_ranges_processing_group.add_argument('-seed', type=int, nargs='?', - default=123, + default=0, help='random seed number for sampling intergenic regions (default: %(default)s).') bg_from_shuffled_ranges_output_group = bg_from_shuffled_ranges_parser.add_argument_group('Output') bg_from_shuffled_ranges_output_group.add_argument('-output', @@ -161,7 +161,7 @@ estimate_background_processing_group.add_argument('-seed', type=int, nargs='?', - default=123, + default=0, help='random seed for sampling scores (default: %(default)s).') estimate_background_processing_group.add_argument('--silent', action='store_true', diff --git a/ortho2align/genomicranges.py b/ortho2align/genomicranges.py index c4f38cb..4f4e0c1 100644 --- a/ortho2align/genomicranges.py +++ b/ortho2align/genomicranges.py @@ -1762,6 +1762,8 @@ def inter_ranges(self, distance=0, verbose=False): def shuffle_inside_chrom(self, seed=0): shuffled_granges = [] + local_random = random.Random() + local_random.seed(seed) for grange in self: chromsize = self.sequence_file.chromsizes.get(grange.chrom) if chromsize is None: @@ -1769,7 +1771,7 @@ def shuffle_inside_chrom(self, seed=0): else: end = chromsize.size - len(grange) try: - shuffled_start = random.randint(0, end) + shuffled_start = local_random.randint(0, end) except ValueError: shuffled_start = 0 shuffled_end = shuffled_start + len(grange) @@ -1786,7 +1788,9 @@ def shuffle_inside_chrom(self, seed=0): def sample_granges(self, n, seed=0): if n > len(self): raise ValueError(f'Value of n={n} is greater than number of genomic ranges: {len(self)}.') - sample = random.sample(self, n) + local_random = random.Random() + local_random.seed(seed) + sample = local_random.sample(self, n) used_args = {'collection', } kwargs = {attr: getattr(self, attr) for attr in set(self.init_args) - used_args} diff --git a/ortho2align/pipeline.py b/ortho2align/pipeline.py index bc3a062..335b3c7 100644 --- a/ortho2align/pipeline.py +++ b/ortho2align/pipeline.py @@ -84,7 +84,8 @@ def bg_from_inter_ranges(genes_filename, seed (int): a random seed (default: 0). silent (bool): if True, will suppress a progress bar (default: False). """ - random.seed(seed) + local_random = random.Random() + local_random.seed(seed) cmd_hints = ['parsing the annotation...', 'sampling intergenic regions...', @@ -103,8 +104,8 @@ def bg_from_inter_ranges(genes_filename, if len(inter_genes) < sample_size: raise ValueError(f'The number of observations ({sample_size}) ' 'must be less than the number of intergenic ' - f'regions ({len(inter_genes)}) derived from genes') - samples = BaseGenomicRangesList(random.sample(inter_genes, k=sample_size)) + f'regions ({len(inter_genes)}) derived from genes.') + samples = BaseGenomicRangesList(local_random.sample(inter_genes, k=sample_size)) pbar.update() @@ -226,8 +227,9 @@ def _estimate_bg_for_single_query_blast(query, bg_ranges, word_size, output_name scores += alignment_scores score_size = len(scores) if score_size > observations: - random.seed(seed) - scores = random.sample(scores, observations) + local_random = random.Random() + local_random.seed(seed) + scores = local_random.sample(scores, observations) with open(output_name, 'w') as outfile: json.dump(scores, outfile) return output_name, score_size @@ -264,8 +266,9 @@ def _estimate_bg_for_single_query_seqfile(query, seqfile, word_size, output_name scores = [hsp.score for hsp in alignment.HSPs] score_size = len(scores) if score_size > observations: - random.seed(seed) - scores = random.sample(scores, observations) + local_random = random.Random() + local_random.seed(seed) + scores = local_random.sample(scores, observations) with open(output_name, 'w') as outfile: json.dump(scores, outfile) return score_size diff --git a/setup.py b/setup.py index a3cdcbe..f4bbd55 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup(name='ortho2align', - version='0.9', + version='1.0.1', description='A lncRNA ortholog discovery tool based on syntenic regions and statistical assessment of alignment nonrandomness.', url='http://github.com/dmitrymyl/ortho2align', author='Dmitry Mylarshchikov',