-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: string operations on cells (#791)
### Summary of Changes Add a first batch of string operations on cells: * `contains` * `ends_with` * `index_of` * `length` * `replace` * `starts_with` * `substring` * `to_date` * `to_datetime` * `to_float` * `to_int` * `to_lowercase` * `to_uppercase` * `trim` * `trim_end` * `trim_start` --------- Co-authored-by: megalinter-bot <[email protected]>
- Loading branch information
1 parent
4137131
commit 4a17f76
Showing
72 changed files
with
2,166 additions
and
267 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,6 @@ | ||
{ "a": { "0": 1, "1": 2, "2": 3 }, "b": { "0": 4, "1": 5, "2": 6 } } | ||
{ | ||
"columns": [ | ||
{ "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, | ||
{ "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } | ||
] | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,6 @@ | ||
{ "a": { "0": 1, "1": 2, "2": 3 }, "b": { "0": 4, "1": 5, "2": 6 } } | ||
{ | ||
"columns": [ | ||
{ "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, | ||
{ "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } | ||
] | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
101 changes: 101 additions & 0 deletions
101
src/safeds/data/tabular/containers/_lazy_string_cell.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
from safeds._utils import _structural_hash | ||
from safeds._validation import _check_bounds, _ClosedBound | ||
|
||
from ._lazy_cell import _LazyCell | ||
from ._string_cell import StringCell | ||
|
||
if TYPE_CHECKING: | ||
import datetime | ||
|
||
import polars as pl | ||
|
||
from ._cell import Cell | ||
|
||
|
||
class _LazyStringCell(StringCell): | ||
# ------------------------------------------------------------------------------------------------------------------ | ||
# Dunder methods | ||
# ------------------------------------------------------------------------------------------------------------------ | ||
|
||
def __init__(self, expression: pl.Expr) -> None: | ||
self._expression: pl.Expr = expression | ||
|
||
def __hash__(self) -> int: | ||
return _structural_hash(self._expression.meta.serialize()) | ||
|
||
def __sizeof__(self) -> int: | ||
return self._expression.__sizeof__() | ||
|
||
# ------------------------------------------------------------------------------------------------------------------ | ||
# String operations | ||
# ------------------------------------------------------------------------------------------------------------------ | ||
|
||
def contains(self, substring: str) -> Cell[bool]: | ||
return _LazyCell(self._expression.str.contains(substring, literal=True)) | ||
|
||
def length(self, optimize_for_ascii: bool = False) -> Cell[int]: | ||
if optimize_for_ascii: | ||
return _LazyCell(self._expression.str.len_bytes()) | ||
else: | ||
return _LazyCell(self._expression.str.len_chars()) | ||
|
||
def ends_with(self, suffix: str) -> Cell[bool]: | ||
return _LazyCell(self._expression.str.ends_with(suffix)) | ||
|
||
def index_of(self, substring: str) -> Cell[int | None]: | ||
return _LazyCell(self._expression.str.find(substring, literal=True)) | ||
|
||
def replace(self, old: str, new: str) -> Cell[str]: | ||
return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) | ||
|
||
def starts_with(self, prefix: str) -> Cell[bool]: | ||
return _LazyCell(self._expression.str.starts_with(prefix)) | ||
|
||
def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: | ||
_check_bounds("length", length, lower_bound=_ClosedBound(0)) | ||
|
||
return _LazyCell(self._expression.str.slice(start, length)) | ||
|
||
def to_date(self) -> Cell[datetime.date | None]: | ||
return _LazyCell(self._expression.str.to_date(format="%F", strict=False)) | ||
|
||
def to_datetime(self) -> Cell[datetime.datetime | None]: | ||
return _LazyCell(self._expression.str.to_datetime(format="%+", strict=False)) | ||
|
||
def to_int(self, *, base: int = 10) -> Cell[int | None]: | ||
return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) | ||
|
||
def to_float(self) -> Cell[float | None]: | ||
import polars as pl | ||
|
||
return _LazyCell(self._expression.cast(pl.Float64, strict=False)) | ||
|
||
def to_lowercase(self) -> Cell[str]: | ||
return _LazyCell(self._expression.str.to_lowercase()) | ||
|
||
def to_uppercase(self) -> Cell[str]: | ||
return _LazyCell(self._expression.str.to_uppercase()) | ||
|
||
def trim(self) -> Cell[str]: | ||
return _LazyCell(self._expression.str.strip_chars()) | ||
|
||
def trim_end(self) -> Cell[str]: | ||
return _LazyCell(self._expression.str.strip_chars_end()) | ||
|
||
def trim_start(self) -> Cell[str]: | ||
return _LazyCell(self._expression.str.strip_chars_start()) | ||
|
||
# ------------------------------------------------------------------------------------------------------------------ | ||
# Internal | ||
# ------------------------------------------------------------------------------------------------------------------ | ||
|
||
def _equals(self, other: object) -> bool: | ||
if not isinstance(other, _LazyStringCell): | ||
return NotImplemented | ||
if self is other: | ||
return True | ||
return self._expression.meta.eq(other._expression.meta) |
Oops, something went wrong.