From 976e385c1d9df92c075575125475b22c423205b9 Mon Sep 17 00:00:00 2001 From: Ramy Date: Sat, 28 Sep 2024 15:37:00 +0200 Subject: [PATCH] Implemented Suffix Tree Data Structure (#11554) * Implemented KD-Tree Data Structure * Implemented KD-Tree Data Structure. updated DIRECTORY.md. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Create __init__.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Replaced legacy `np.random.rand` call with `np.random.Generator` in kd_tree/example_usage.py * Replaced legacy `np.random.rand` call with `np.random.Generator` in kd_tree/hypercube_points.py * added typehints and docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstring for search() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added tests. Updated docstrings/typehints * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated tests and used | for type annotations * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * E501 for build_kdtree.py, hypercube_points.py, nearest_neighbour_search.py * I001 for example_usage.py and test_kdtree.py * I001 for example_usage.py and test_kdtree.py * Update data_structures/kd_tree/build_kdtree.py Co-authored-by: Christian Clauss * Update data_structures/kd_tree/example/hypercube_points.py Co-authored-by: Christian Clauss * Update data_structures/kd_tree/example/hypercube_points.py Co-authored-by: Christian Clauss * Added new test cases requested in Review. Refactored the test_build_kdtree() to include various checks. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considered ruff errors * Considered ruff errors * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update kd_node.py * imported annotations from __future__ * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Implementation of the suffix tree data structure * Adding data to DIRECTORY.md * Minor file renaming * minor correction * renaming in DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-1 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-2 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-3 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-4 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-5 * Implemented Suffix Tree Data Structure. Added some comments to my files in #11532, #11554. * updating DIRECTORY.md * Implemented Suffix Tree Data Structure. Added some comments to my files in #11532, #11554. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss Co-authored-by: Ramy-Badr-Ahmed --- DIRECTORY.md | 7 ++ data_structures/kd_tree/build_kdtree.py | 8 +++ .../kd_tree/example/example_usage.py | 8 +++ .../kd_tree/example/hypercube_points.py | 8 +++ data_structures/kd_tree/kd_node.py | 8 +++ .../kd_tree/nearest_neighbour_search.py | 8 +++ data_structures/kd_tree/tests/test_kdtree.py | 8 +++ data_structures/suffix_tree/__init__.py | 0 .../suffix_tree/example/__init__.py | 0 .../suffix_tree/example/example_usage.py | 37 +++++++++++ data_structures/suffix_tree/suffix_tree.py | 66 +++++++++++++++++++ .../suffix_tree/suffix_tree_node.py | 36 ++++++++++ data_structures/suffix_tree/tests/__init__.py | 0 .../suffix_tree/tests/test_suffix_tree.py | 59 +++++++++++++++++ 14 files changed, 253 insertions(+) create mode 100644 data_structures/suffix_tree/__init__.py create mode 100644 data_structures/suffix_tree/example/__init__.py create mode 100644 data_structures/suffix_tree/example/example_usage.py create mode 100644 data_structures/suffix_tree/suffix_tree.py create mode 100644 data_structures/suffix_tree/suffix_tree_node.py create mode 100644 data_structures/suffix_tree/tests/__init__.py create mode 100644 data_structures/suffix_tree/tests/test_suffix_tree.py diff --git a/DIRECTORY.md b/DIRECTORY.md index e965d3b32ccf..955001e2aa23 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -291,6 +291,13 @@ * [Stack With Doubly Linked List](data_structures/stacks/stack_with_doubly_linked_list.py) * [Stack With Singly Linked List](data_structures/stacks/stack_with_singly_linked_list.py) * [Stock Span Problem](data_structures/stacks/stock_span_problem.py) + * Suffix Tree + * Example + * [Example Usage](data_structures/suffix_tree/example/example_usage.py) + * [Suffix Tree](data_structures/suffix_tree/suffix_tree.py) + * [Suffix Tree Node](data_structures/suffix_tree/suffix_tree_node.py) + * Tests + * [Test Suffix Tree](data_structures/suffix_tree/tests/test_suffix_tree.py) * Trie * [Radix Tree](data_structures/trie/radix_tree.py) * [Trie](data_structures/trie/trie.py) diff --git a/data_structures/kd_tree/build_kdtree.py b/data_structures/kd_tree/build_kdtree.py index c5b800a2c992..074a5dac4d42 100644 --- a/data_structures/kd_tree/build_kdtree.py +++ b/data_structures/kd_tree/build_kdtree.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + from data_structures.kd_tree.kd_node import KDNode diff --git a/data_structures/kd_tree/example/example_usage.py b/data_structures/kd_tree/example/example_usage.py index e270f0cdd245..892c3b8c4a2a 100644 --- a/data_structures/kd_tree/example/example_usage.py +++ b/data_structures/kd_tree/example/example_usage.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + import numpy as np from data_structures.kd_tree.build_kdtree import build_kdtree diff --git a/data_structures/kd_tree/example/hypercube_points.py b/data_structures/kd_tree/example/hypercube_points.py index 2d8800ac9338..66744856e6d5 100644 --- a/data_structures/kd_tree/example/hypercube_points.py +++ b/data_structures/kd_tree/example/hypercube_points.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + import numpy as np diff --git a/data_structures/kd_tree/kd_node.py b/data_structures/kd_tree/kd_node.py index e1011027938d..5a22ef609077 100644 --- a/data_structures/kd_tree/kd_node.py +++ b/data_structures/kd_tree/kd_node.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + from __future__ import annotations diff --git a/data_structures/kd_tree/nearest_neighbour_search.py b/data_structures/kd_tree/nearest_neighbour_search.py index d9727736f21c..8104944c08f0 100644 --- a/data_structures/kd_tree/nearest_neighbour_search.py +++ b/data_structures/kd_tree/nearest_neighbour_search.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + from data_structures.kd_tree.kd_node import KDNode diff --git a/data_structures/kd_tree/tests/test_kdtree.py b/data_structures/kd_tree/tests/test_kdtree.py index 81f2cc990074..dce5e4f34ff4 100644 --- a/data_structures/kd_tree/tests/test_kdtree.py +++ b/data_structures/kd_tree/tests/test_kdtree.py @@ -1,3 +1,11 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11532 +# https://github.com/TheAlgorithms/Python/pull/11532 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + import numpy as np import pytest diff --git a/data_structures/suffix_tree/__init__.py b/data_structures/suffix_tree/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/example/__init__.py b/data_structures/suffix_tree/example/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/example/example_usage.py b/data_structures/suffix_tree/example/example_usage.py new file mode 100644 index 000000000000..724ac57e8bfb --- /dev/null +++ b/data_structures/suffix_tree/example/example_usage.py @@ -0,0 +1,37 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11554 +# https://github.com/TheAlgorithms/Python/pull/11554 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + +from data_structures.suffix_tree.suffix_tree import SuffixTree + + +def main() -> None: + """ + Demonstrate the usage of the SuffixTree class. + + - Initializes a SuffixTree with a predefined text. + - Defines a list of patterns to search for within the suffix tree. + - Searches for each pattern in the suffix tree. + + Patterns tested: + - "ana" (found) --> True + - "ban" (found) --> True + - "na" (found) --> True + - "xyz" (not found) --> False + - "mon" (found) --> True + """ + text = "monkey banana" + suffix_tree = SuffixTree(text) + + patterns = ["ana", "ban", "na", "xyz", "mon"] + for pattern in patterns: + found = suffix_tree.search(pattern) + print(f"Pattern '{pattern}' found: {found}") + + +if __name__ == "__main__": + main() diff --git a/data_structures/suffix_tree/suffix_tree.py b/data_structures/suffix_tree/suffix_tree.py new file mode 100644 index 000000000000..ad54fb0ba009 --- /dev/null +++ b/data_structures/suffix_tree/suffix_tree.py @@ -0,0 +1,66 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11554 +# https://github.com/TheAlgorithms/Python/pull/11554 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + +from data_structures.suffix_tree.suffix_tree_node import SuffixTreeNode + + +class SuffixTree: + def __init__(self, text: str) -> None: + """ + Initializes the suffix tree with the given text. + + Args: + text (str): The text for which the suffix tree is to be built. + """ + self.text: str = text + self.root: SuffixTreeNode = SuffixTreeNode() + self.build_suffix_tree() + + def build_suffix_tree(self) -> None: + """ + Builds the suffix tree for the given text by adding all suffixes. + """ + text = self.text + n = len(text) + for i in range(n): + suffix = text[i:] + self._add_suffix(suffix, i) + + def _add_suffix(self, suffix: str, index: int) -> None: + """ + Adds a suffix to the suffix tree. + + Args: + suffix (str): The suffix to add. + index (int): The starting index of the suffix in the original text. + """ + node = self.root + for char in suffix: + if char not in node.children: + node.children[char] = SuffixTreeNode() + node = node.children[char] + node.is_end_of_string = True + node.start = index + node.end = index + len(suffix) - 1 + + def search(self, pattern: str) -> bool: + """ + Searches for a pattern in the suffix tree. + + Args: + pattern (str): The pattern to search for. + + Returns: + bool: True if the pattern is found, False otherwise. + """ + node = self.root + for char in pattern: + if char not in node.children: + return False + node = node.children[char] + return True diff --git a/data_structures/suffix_tree/suffix_tree_node.py b/data_structures/suffix_tree/suffix_tree_node.py new file mode 100644 index 000000000000..e5b628645063 --- /dev/null +++ b/data_structures/suffix_tree/suffix_tree_node.py @@ -0,0 +1,36 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11554 +# https://github.com/TheAlgorithms/Python/pull/11554 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + +from __future__ import annotations + + +class SuffixTreeNode: + def __init__( + self, + children: dict[str, SuffixTreeNode] | None = None, + is_end_of_string: bool = False, + start: int | None = None, + end: int | None = None, + suffix_link: SuffixTreeNode | None = None, + ) -> None: + """ + Initializes a suffix tree node. + + Parameters: + children (dict[str, SuffixTreeNode] | None): The children of this node. + is_end_of_string (bool): Indicates if this node represents + the end of a string. + start (int | None): The start index of the suffix in the text. + end (int | None): The end index of the suffix in the text. + suffix_link (SuffixTreeNode | None): Link to another suffix tree node. + """ + self.children = children or {} + self.is_end_of_string = is_end_of_string + self.start = start + self.end = end + self.suffix_link = suffix_link diff --git a/data_structures/suffix_tree/tests/__init__.py b/data_structures/suffix_tree/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/tests/test_suffix_tree.py b/data_structures/suffix_tree/tests/test_suffix_tree.py new file mode 100644 index 000000000000..45c6790ac48a --- /dev/null +++ b/data_structures/suffix_tree/tests/test_suffix_tree.py @@ -0,0 +1,59 @@ +# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) +# in Pull Request: #11554 +# https://github.com/TheAlgorithms/Python/pull/11554 +# +# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request +# addressing bugs/corrections to this file. +# Thank you! + +import unittest + +from data_structures.suffix_tree.suffix_tree import SuffixTree + + +class TestSuffixTree(unittest.TestCase): + def setUp(self) -> None: + """Set up the initial conditions for each test.""" + self.text = "banana" + self.suffix_tree = SuffixTree(self.text) + + def test_search_existing_patterns(self) -> None: + """Test searching for patterns that exist in the suffix tree.""" + patterns = ["ana", "ban", "na"] + for pattern in patterns: + with self.subTest(pattern=pattern): + assert self.suffix_tree.search( + pattern + ), f"Pattern '{pattern}' should be found." + + def test_search_non_existing_patterns(self) -> None: + """Test searching for patterns that do not exist in the suffix tree.""" + patterns = ["xyz", "apple", "cat"] + for pattern in patterns: + with self.subTest(pattern=pattern): + assert not self.suffix_tree.search( + pattern + ), f"Pattern '{pattern}' should not be found." + + def test_search_empty_pattern(self) -> None: + """Test searching for an empty pattern.""" + assert self.suffix_tree.search(""), "An empty pattern should be found." + + def test_search_full_text(self) -> None: + """Test searching for the full text.""" + assert self.suffix_tree.search( + self.text + ), "The full text should be found in the suffix tree." + + def test_search_substrings(self) -> None: + """Test searching for substrings of the full text.""" + substrings = ["ban", "ana", "a", "na"] + for substring in substrings: + with self.subTest(substring=substring): + assert self.suffix_tree.search( + substring + ), f"Substring '{substring}' should be found." + + +if __name__ == "__main__": + unittest.main()