Skip to content

Commit

Permalink
refactor: replace the way the birth_names data is generated
Browse files Browse the repository at this point in the history
  • Loading branch information
ofekisr committed Dec 30, 2021
1 parent 8ebec60 commit 0a5d2ae
Show file tree
Hide file tree
Showing 8 changed files with 365 additions and 88 deletions.
25 changes: 25 additions & 0 deletions tests/common/example_data_generator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
34 changes: 34 additions & 0 deletions tests/common/example_data_generator/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from abc import ABC, abstractmethod
from typing import Any, Dict, Iterable


class ExampleDataGenerator(ABC):
@abstractmethod
def generate(self) -> Iterable[Dict[Any, Any]]:
pass
80 changes: 80 additions & 0 deletions tests/common/example_data_generator/birth_names_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from datetime import datetime
from random import choice, randint
from typing import Any, Dict, Iterable

from tests.common.example_data_generator.base import ExampleDataGenerator
from tests.common.example_data_generator.consts import US_STATES
from tests.common.example_data_generator.string_generator import StringGenerator

GIRL = "girl"
BOY = "boy"


class BirthNamesGenerator(ExampleDataGenerator):
_names_generator: StringGenerator
_start_year: int
_until_not_include_year: int
_rows_per_year: int

def __init__(
self,
names_generator: StringGenerator,
start_year: int,
years_amount: int,
rows_per_year: int,
) -> None:
assert start_year > -1
assert years_amount > 0
self._names_generator = names_generator
self._start_year = start_year
self._until_not_include_year = start_year + years_amount
self._rows_per_year = rows_per_year

def generate(self) -> Iterable[Dict[Any, Any]]:
for year in range(self._start_year, self._until_not_include_year):
ds = self._make_year(year)
for _ in range(self._rows_per_year):
yield self.generate_row(ds)

def _make_year(self, year: int):
return datetime(year, 1, 1, 0, 0, 0)

def generate_row(self, dt: datetime) -> Dict[Any, Any]:
gender = choice([BOY, GIRL])
num = randint(1, 100000)
return {
"ds": dt,
"gender": gender,
"name": self._names_generator.generate(),
"num": num,
"state": choice(US_STATES),
"num_boys": num if gender == BOY else 0,
"num_girls": num if gender == GIRL else 0,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from abc import ABC, abstractmethod

from tests.common.example_data_generator.birth_names_generator import (
BirthNamesGenerator,
)
from tests.common.example_data_generator.string_generator_factory import (
StringGeneratorFactory,
)


class BirthNamesGeneratorFactory(ABC):
__factory: BirthNamesGeneratorFactory

@abstractmethod
def _make(self) -> BirthNamesGenerator:
pass

@classmethod
def make(cls) -> BirthNamesGenerator:
return cls._get_instance()._make()

@classmethod
def set_instance(cls, factory: BirthNamesGeneratorFactory) -> None:
cls.__factory = factory

@classmethod
def _get_instance(cls) -> BirthNamesGeneratorFactory:
if not hasattr(cls, "_BirthNamesGeneratorFactory__factory"):
cls.__factory = BirthNamesGeneratorFactoryImpl()
return cls.__factory


MIN_NAME_LEN = 3
MAX_NAME_SIZE = 10
START_YEAR = 1960
YEARS_AMOUNT = 60
ROW_PER_YEAR = 20


class BirthNamesGeneratorFactoryImpl(BirthNamesGeneratorFactory):
def _make(self) -> BirthNamesGenerator:
string_generator = StringGeneratorFactory.make_lowercase_based(
MIN_NAME_LEN, MAX_NAME_SIZE
)
return BirthNamesGenerator(
string_generator, START_YEAR, YEARS_AMOUNT, ROW_PER_YEAR
)
69 changes: 69 additions & 0 deletions tests/common/example_data_generator/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
US_STATES = [
"AL",
"AK",
"AZ",
"AR",
"CA",
"CO",
"CT",
"DE",
"FL",
"GA",
"HI",
"ID",
"IL",
"IN",
"IA",
"KS",
"KY",
"LA",
"ME",
"MD",
"MA",
"MI",
"MN",
"MS",
"MO",
"MT",
"NE",
"NV",
"NH",
"NJ",
"NM",
"NY",
"NC",
"ND",
"OH",
"OK",
"OR",
"PA",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VT",
"VA",
"WA",
"WV",
"WI",
"WY",
"other",
]
42 changes: 42 additions & 0 deletions tests/common/example_data_generator/string_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from random import choices, randint


class StringGenerator:
_seed_letters: str
_min_length: int
_max_length: int

def __init__(self, seed_letters: str, min_length: int, max_length: int):
self._seed_letters = seed_letters
self._min_length = min_length
self._max_length = max_length

def generate(self) -> str:
rv_string_length = randint(self._min_length, self._max_length)
randomized_letters = choices(self._seed_letters, k=rv_string_length)
return "".join(randomized_letters)
46 changes: 46 additions & 0 deletions tests/common/example_data_generator/string_generator_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import string

from tests.common.example_data_generator.string_generator import StringGenerator


class StringGeneratorFactory:
@classmethod
def make(
cls, seed_letters: str, min_length: int, max_length: int
) -> StringGenerator:
cls.__validate_arguments(seed_letters, min_length, max_length)
return StringGenerator(seed_letters, min_length, max_length)

@classmethod
def make_lowercase_based(cls, min_length: int, max_length: int) -> StringGenerator:
return cls.make(string.ascii_lowercase, min_length, max_length)

@classmethod
def make_ascii_letters_based(
cls, min_length: int, max_length: int
) -> StringGenerator:
return cls.make(string.ascii_letters, min_length, max_length)

@staticmethod
def __validate_arguments(
seed_letters: str, min_length: int, max_length: int
) -> None:
assert seed_letters, "seed_letters is empty"
assert min_length > -1, "min_length is negative"
assert max_length > min_length, "max_length is not bigger then min_length"
Loading

0 comments on commit 0a5d2ae

Please sign in to comment.