From e857da3955e542c0b6419f5b518ac56db413c975 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Sun, 20 Jun 2021 08:50:54 -0700 Subject: [PATCH 1/9] Allow fakers to build on previously populated fields. --- snowfakery/data_generator_runtime.py | 13 +- .../data_generator_runtime_object_model.py | 5 +- snowfakery/fakedata/fake_data_generator.py | 50 +++++-- snowfakery/plugins.py | 10 +- snowfakery/utils/template_utils.py | 24 ++- tests/test_data_generator_runtime_dom.py | 3 - tests/test_faker.py | 139 ++++++++++++++++++ tests/test_locales.py | 2 +- 8 files changed, 214 insertions(+), 32 deletions(-) diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index 80b37d5d..33ab1ae4 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -14,6 +14,7 @@ from .data_gen_exceptions import DataGenSyntaxError, DataGenNameError import snowfakery # noQA from snowfakery.object_rows import NicknameSlot, SlotState, ObjectRow +from snowfakery.plugins import PluginContext, SnowfakeryPlugin OutputStream = "snowfakery.output_streams.OutputStream" VariableDefinition = "snowfakery.data_generator_runtime_object_model.VariableDefinition" @@ -296,6 +297,8 @@ def __init__( raise DataGenNameError( f"No template creating {stop_table_name}", ) + faker_plugin = SnowfakeryPlugin(self) + self.faker_plugin_context = PluginContext(faker_plugin) self.faker_template_libraries = {} @@ -318,7 +321,11 @@ def execute(self): def faker_template_library(self, locale): rc = self.faker_template_libraries.get(locale) if not rc: - rc = FakerTemplateLibrary(self.faker_providers, locale) + rc = FakerTemplateLibrary( + self.faker_providers, + locale, + self.faker_plugin_context, + ) self.faker_template_libraries[locale] = rc return rc @@ -359,6 +366,7 @@ class RuntimeContext: obj: Optional[ObjectRow] = None template_evaluator_recipe = JinjaTemplateEvaluatorFactory() current_template = None + local_vars = None def __init__( self, @@ -454,6 +462,9 @@ def field_vars(self): return self.evaluation_namespace.field_vars() def context_vars(self, plugin_namespace): + """ "Variables which are inherited by child scopes""" + # This looks like a candidate for optimization. + # An unconditional object copy? local_plugin_vars = self._plugin_context_vars.get(plugin_namespace, {}).copy() self._plugin_context_vars[plugin_namespace] = local_plugin_vars return local_plugin_vars diff --git a/snowfakery/data_generator_runtime_object_model.py b/snowfakery/data_generator_runtime_object_model.py index a0f7741a..fae3a22e 100644 --- a/snowfakery/data_generator_runtime_object_model.py +++ b/snowfakery/data_generator_runtime_object_model.py @@ -136,7 +136,9 @@ def exception_handling(self, message: str): except DataGenError: raise except Exception as e: - raise DataGenError(f"{message} : {str(e)}", self.filename, self.line_num) + raise DataGenError( + f"{message} : {str(e)}", self.filename, self.line_num + ) from e def _evaluate_count(self, context: RuntimeContext) -> int: """Evaluate the count expression to an integer""" @@ -185,6 +187,7 @@ def _generate_row( def _generate_fields(self, context: RuntimeContext, row: Dict) -> None: """Generate all of the fields of a row""" + context.local_vars = {} for field in self.fields: with self.exception_handling("Problem rendering value"): row[field.name] = field.generate_value(context) diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index 41802566..11b51834 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -1,29 +1,41 @@ from difflib import get_close_matches from faker import Faker import typing as T +from snowfakery.plugins import PluginContext class FakeNames(T.NamedTuple): f: Faker + faker_context: PluginContext = None + + def Username(self, matching: bool = True): + already_created = self._already_have(("firstname", "lastname"), matching) + if all(already_created): + return f"{already_created[0]}.{already_created[1]}_{self.f.uuid4()}@{self.f.safe_domain_name()}" - def Username(self): return f"{self.f.first_name()}_{self.f.last_name()}_{self.f.uuid4()}@{self.f.hostname()}" def Alias(self): return self.f.first_name()[0:8] - def FirstName(self): - return self.f.first_name() - - def LastName(self): - return self.f.last_name() - - def Email(self): + def Email(self, matching: bool = True): + already_created = self._already_have(("firstname", "lastname"), matching) + if all(already_created): + return ( + f"{already_created[0]}.{already_created[1]}@{self.f.safe_domain_name()}" + ) return self.f.ascii_safe_email() def RealisticMaybeRealEmail(self): return self.f.email() + def _already_have(self, names: T.Sequence[str], matching: bool): + if not matching: + return + already_created = self.faker_context.local_vars() + vals = [already_created.get(name) for name in names] + return vals + # we will use this to exclude Faker's internal book-keeping methods # from our faker interface @@ -33,8 +45,20 @@ def RealisticMaybeRealEmail(self): class FakeData: """Wrapper for Faker which adds Salesforce names and case insensitivity.""" - def __init__(self, faker: Faker): - fake_names = FakeNames(faker) + def __init__( + self, + faker_providers: T.Sequence[object], + locale: str = None, + faker_context: PluginContext = None, + ): + # access to persistent state + self.faker_context = faker_context + + faker = Faker(locale, use_weighting=False) + for provider in faker_providers: + faker.add_provider(provider) + + fake_names = FakeNames(faker, faker_context) def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set): return { @@ -57,13 +81,17 @@ def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set): } def _get_fake_data(self, origname, *args, **kwargs): + local_faker_vars = self.faker_context.local_vars() + # faker names are all lower-case name = origname.lower() meth = self.fake_names.get(name) if meth: - return meth(*args, **kwargs) + ret = meth(*args, **kwargs) + local_faker_vars[name.replace("_", "")] = ret + return ret msg = f"No fake data type named {origname}." match_list = get_close_matches(name, self.fake_names.keys(), n=1) diff --git a/snowfakery/plugins.py b/snowfakery/plugins.py index 03256cc1..33a97cf3 100644 --- a/snowfakery/plugins.py +++ b/snowfakery/plugins.py @@ -101,8 +101,16 @@ def context_vars(self): self.plugin.__class__.__name__ ) + def local_vars(self): + return self.interpreter.current_context.local_vars.setdefault( + self.plugin.__class__.__name__, {} + ) + def unique_context_identifier(self) -> str: - "An identifier that will be unique across iterations (but not portion invocations)" + """An identifier representing a template context that will be + unique across iterations (but not portion invocations). It + allows templates that do counting or iteration for a particular + template context.""" return self.interpreter.current_context.unique_context_identifier def evaluate_raw(self, field_definition): diff --git a/snowfakery/utils/template_utils.py b/snowfakery/utils/template_utils.py index 1ef845a2..6507f68f 100644 --- a/snowfakery/utils/template_utils.py +++ b/snowfakery/utils/template_utils.py @@ -1,11 +1,9 @@ -from functools import lru_cache from typing import Sequence import string - -from faker import Faker -from jinja2 import Template from snowfakery.fakedata.fake_data_generator import FakeData +from snowfakery.plugins import PluginContext + class StringGenerator: """Sometimes in templates you want a reference to a variable to @@ -43,15 +41,16 @@ def __radd__(self, other): class FakerTemplateLibrary: """A Jinja template library to add the faker.xyz objects to templates""" - def __init__(self, faker_providers: Sequence[object], locale=None): + def __init__( + self, + faker_providers: Sequence[object], + locale: str = None, + context: PluginContext = None, + ): self.locale = locale + self.context = context - # TODO: Push this all down into FakeData - faker = Faker(self.locale, use_weighting=False) - for provider in faker_providers: - faker.add_provider(provider) - - self.fake_data = FakeData(faker) + self.fake_data = FakeData(faker_providers, locale, self.context) def _get_fake_data(self, name): return self.fake_data._get_fake_data(name) @@ -62,9 +61,6 @@ def __getattr__(self, name): ) -Template = lru_cache(512)(Template) - - number_chars = set(string.digits + ".") diff --git a/tests/test_data_generator_runtime_dom.py b/tests/test_data_generator_runtime_dom.py index e0cb5db5..8b0cd286 100644 --- a/tests/test_data_generator_runtime_dom.py +++ b/tests/test_data_generator_runtime_dom.py @@ -19,9 +19,6 @@ from snowfakery.output_streams import DebugOutputStream -from snowfakery.utils.template_utils import FakerTemplateLibrary - -ftl = FakerTemplateLibrary([]) line = {"filename": "abc.yml", "line_num": 42} diff --git a/tests/test_faker.py b/tests/test_faker.py index af959ea9..38ba87e9 100644 --- a/tests/test_faker.py +++ b/tests/test_faker.py @@ -217,3 +217,142 @@ def test_faker_internals_are_invisible(self): with pytest.raises(exc.DataGenError) as e: generate(StringIO(yaml), {}, None) assert "seed" in str(e.value) + + def test_context_aware(self, generated_rows): + yaml = """ + - object: X + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Email: + fake: Email + """ + generate(StringIO(yaml)) + assert generated_rows.table_values( + "X", 0, "FirstName" + ) in generated_rows.table_values("X", 0, "Email") + assert generated_rows.table_values( + "X", 0, "LastName" + ) in generated_rows.table_values("X", 0, "Email") + + def test_context_username(self, generated_rows): + yaml = """ + - object: X + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Username: + fake: Username + """ + generate(StringIO(yaml)) + assert generated_rows.table_values( + "X", 0, "FirstName" + ) in generated_rows.table_values("X", 0, "Username") + assert generated_rows.table_values( + "X", 0, "LastName" + ) in generated_rows.table_values("X", 0, "Username") + + def test_context_aware_multiple_values(self, generated_rows): + yaml = """ + - object: X + count: 3 + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Email: + fake: Email + """ + generate(StringIO(yaml)) + assert generated_rows.table_values( + "X", 2, "FirstName" + ) in generated_rows.table_values("X", 2, "Email") + assert ( + generated_rows.table_values("X", 2)["LastName"] + in generated_rows.table_values("X", 2)["Email"] + ) + + @mock.patch("faker.providers.person.en_US.Provider.first_name") + @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") + def test_context_aware_order_matters(self, email, first_name, generated_rows): + yaml = """ + - object: X + count: 3 + fields: + Email: + fake: Email + FirstName: + fake: FirstName + LastName: + fake: LastName + """ + generate(StringIO(yaml)) + assert first_name.mock_calls + assert email.mock_calls + + @mock.patch("faker.providers.person.en_US.Provider.first_name") + @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") + def test_context_aware_no_leakage_count(self, email, first_name, generated_rows): + yaml = """ + - object: X + count: 3 + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Email: + fake: Email + """ + generate(StringIO(yaml)) + assert first_name.mock_calls + assert not email.mock_calls + + @mock.patch("faker.providers.person.en_US.Provider.first_name") + @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") + def test_context_aware_no_leakage_templates( + self, email, first_name, generated_rows + ): + + # no leakage between templ + yaml = """ + - object: X + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Email: + fake: Email + - object: Y + fields: + Email: + fake: Email + """ + generate(StringIO(yaml)) + assert first_name.mock_calls + assert email.mock_calls + + @mock.patch("faker.providers.person.en_US.Provider.first_name") + @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") + def test_context_aware_alernate_names(self, email, first_name, generated_rows): + + # no leakage between templ + yaml = """ + - object: X + fields: + FirstName: + fake: first_name + LastName: + fake: last_name + Email: + fake: Email + """ + generate(StringIO(yaml)) + assert first_name.mock_calls + assert not email.mock_calls diff --git a/tests/test_locales.py b/tests/test_locales.py index 5adaa22f..764215eb 100644 --- a/tests/test_locales.py +++ b/tests/test_locales.py @@ -21,7 +21,7 @@ def test_locales(self, generated_rows): name: fake: name """ - with mock.patch("snowfakery.utils.template_utils.Faker") as f: + with mock.patch("snowfakery.fakedata.fake_data_generator.Faker") as f: class FakeFaker(Faker): def name(self): From 7463c39bf105cda96ef1f1c9ea28352819045aa5 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Sun, 20 Jun 2021 10:52:50 -0700 Subject: [PATCH 2/9] Add more templates --- snowfakery/fakedata/fake_data_generator.py | 18 ++++++++++++++++-- tests/test_faker.py | 6 ------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index 11b51834..f25f06b6 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -1,9 +1,18 @@ from difflib import get_close_matches from faker import Faker import typing as T +import random from snowfakery.plugins import PluginContext +email_templates = [ # .format language doesn't allow slicing. :( + f"{first_name}{first_name_separator}{{lastname}}{year}@{{domain}}" + for first_name in ("{firstname}", "{firstname[0]}", "{firstname[0]}{firstname[1]}") + for first_name_separator in ("", ".", "-", "_", "+") + for year in ("{year}", "{year[2]}{year[3]}", "{year[3]}", "") +] + + class FakeNames(T.NamedTuple): f: Faker faker_context: PluginContext = None @@ -21,8 +30,13 @@ def Alias(self): def Email(self, matching: bool = True): already_created = self._already_have(("firstname", "lastname"), matching) if all(already_created): - return ( - f"{already_created[0]}.{already_created[1]}@{self.f.safe_domain_name()}" + template = random.choice(email_templates) + + return template.format( + firstname=already_created[0], + lastname=already_created[1], + domain=self.f.safe_domain_name(), + year=str(random.randint(1955, 2020)), ) return self.f.ascii_safe_email() diff --git a/tests/test_faker.py b/tests/test_faker.py index 38ba87e9..880aec25 100644 --- a/tests/test_faker.py +++ b/tests/test_faker.py @@ -230,9 +230,6 @@ def test_context_aware(self, generated_rows): fake: Email """ generate(StringIO(yaml)) - assert generated_rows.table_values( - "X", 0, "FirstName" - ) in generated_rows.table_values("X", 0, "Email") assert generated_rows.table_values( "X", 0, "LastName" ) in generated_rows.table_values("X", 0, "Email") @@ -269,9 +266,6 @@ def test_context_aware_multiple_values(self, generated_rows): fake: Email """ generate(StringIO(yaml)) - assert generated_rows.table_values( - "X", 2, "FirstName" - ) in generated_rows.table_values("X", 2, "Email") assert ( generated_rows.table_values("X", 2)["LastName"] in generated_rows.table_values("X", 2)["Email"] From ba1c4fb652b65d5d15fbc2b6b25400d4bc333242 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Sun, 20 Jun 2021 11:29:30 -0700 Subject: [PATCH 3/9] Improve coverage --- snowfakery/plugins.py | 4 +-- tests/test_custom_plugins_and_providers.py | 31 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/snowfakery/plugins.py b/snowfakery/plugins.py index 33a97cf3..910934fe 100644 --- a/snowfakery/plugins.py +++ b/snowfakery/plugins.py @@ -252,8 +252,8 @@ def __init__(self, name, typ): def convert(self, value): try: return self.type(value) - except TypeError as e: - raise TypeError( + except (TypeError, ValueError) as e: + raise exc.DataGenTypeError( f"{self.name} option is wrong type {type(value)} rather than {self.type}", *e.args, ) diff --git a/tests/test_custom_plugins_and_providers.py b/tests/test_custom_plugins_and_providers.py index ace34444..b02869fd 100644 --- a/tests/test_custom_plugins_and_providers.py +++ b/tests/test_custom_plugins_and_providers.py @@ -4,7 +4,7 @@ from base64 import b64decode from snowfakery import SnowfakeryPlugin, lazy -from snowfakery.plugins import PluginResult +from snowfakery.plugins import PluginResult, PluginOption from snowfakery.data_gen_exceptions import ( DataGenError, DataGenTypeError, @@ -25,6 +25,15 @@ def row_values(write_row_mock, index, value): class SimpleTestPlugin(SnowfakeryPlugin): + allowed_options = [ + PluginOption( + "tests.test_custom_plugins_and_providers.SimpleTestPlugin.option_str", str + ), + PluginOption( + "tests.test_custom_plugins_and_providers.SimpleTestPlugin.option_int", int + ), + ] + class Functions: def double(self, value): return value * 2 @@ -200,6 +209,26 @@ def test_binary(self, generated_rows): assert rawdata.startswith(b"%PDF-1.3") assert b"Helvetica" in rawdata + def test_option__simple(self, generated_rows): + yaml = """- plugin: tests.test_custom_plugins_and_providers.SimpleTestPlugin""" + + generate_data(StringIO(yaml), plugin_options={"option_str": "AAA"}) + + def test_option__unknown(self, generated_rows): + yaml = """- plugin: tests.test_custom_plugins_and_providers.SimpleTestPlugin""" + + generate_data(StringIO(yaml), plugin_options={"option_str": "zzz"}) + + def test_option__bad_type(self, generated_rows): + yaml = """- plugin: tests.test_custom_plugins_and_providers.SimpleTestPlugin""" + with pytest.raises(DataGenTypeError): + generate_data(StringIO(yaml), plugin_options={"option_int": "abcd"}) + + def test_option_type_coercion_needed(self, generated_rows): + yaml = """- plugin: tests.test_custom_plugins_and_providers.SimpleTestPlugin""" + + generate_data(StringIO(yaml), plugin_options={"option_int": "5"}) + class PluginThatNeedsState(SnowfakeryPlugin): class Functions: From d0658c48fc142deb5cc7091649aba988c936c8ab Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Sun, 20 Jun 2021 11:48:06 -0700 Subject: [PATCH 4/9] Improve coverage --- snowfakery/fakedata/fake_data_generator.py | 2 +- tests/test_faker.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index f25f06b6..af23d37b 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -45,7 +45,7 @@ def RealisticMaybeRealEmail(self): def _already_have(self, names: T.Sequence[str], matching: bool): if not matching: - return + return [None] already_created = self.faker_context.local_vars() vals = [already_created.get(name) for name in names] return vals diff --git a/tests/test_faker.py b/tests/test_faker.py index 880aec25..06c9648c 100644 --- a/tests/test_faker.py +++ b/tests/test_faker.py @@ -350,3 +350,21 @@ def test_context_aware_alernate_names(self, email, first_name, generated_rows): generate(StringIO(yaml)) assert first_name.mock_calls assert not email.mock_calls + + @mock.patch("faker.providers.person.en_US.Provider.first_name") + @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") + def test_disable_matching(self, email, first_name, generated_rows): + + # no leakage between templ + yaml = """ + - object: X + fields: + FirstName: + fake: FirstName + LastName: + fake: last_name + Email: ${{fake.email(matching=False)}} + """ + generate(StringIO(yaml)) + assert first_name.mock_calls + assert email.mock_calls From 3710f99c9cfe2b168f1bb26cdfc885bd4edb822d Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Tue, 27 Jul 2021 01:50:27 -0700 Subject: [PATCH 5/9] Docs fixes --- snowfakery/data_generator_runtime.py | 2 +- tools/faker_docs_utils/faker_markdown.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index 33ab1ae4..bd377891 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -462,7 +462,7 @@ def field_vars(self): return self.evaluation_namespace.field_vars() def context_vars(self, plugin_namespace): - """ "Variables which are inherited by child scopes""" + """Variables which are inherited by child scopes""" # This looks like a candidate for optimization. # An unconditional object copy? local_plugin_vars = self._plugin_context_vars.get(plugin_namespace, {}).copy() diff --git a/tools/faker_docs_utils/faker_markdown.py b/tools/faker_docs_utils/faker_markdown.py index 87dd2c8e..957dda3d 100644 --- a/tools/faker_docs_utils/faker_markdown.py +++ b/tools/faker_docs_utils/faker_markdown.py @@ -68,7 +68,7 @@ def generate_markdown_for_fakers(outfile, locale: str, header: str = standard_he "Generate the Markdown page for a locale" faker = Faker(locale) language = language_codes[locale.split("_")[0]] - fd = FakeData(faker) + fd = FakeData([], locale) all_fakers = summarize_all_fakers(fd) From 357cc99190f35be6687180221eab130ed2b5fa44 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Tue, 27 Jul 2021 14:00:22 -0700 Subject: [PATCH 6/9] More reliable local variables --- snowfakery/data_generator_runtime.py | 2 ++ snowfakery/data_generator_runtime_object_model.py | 1 + 2 files changed, 3 insertions(+) diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index bd377891..85fe9bd0 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -319,6 +319,7 @@ def execute(self): return self.globals def faker_template_library(self, locale): + """Create a faker template library for locale, or retrieve it from a cache""" rc = self.faker_template_libraries.get(locale) if not rc: rc = FakerTemplateLibrary( @@ -386,6 +387,7 @@ def __init__( self._plugin_context_vars = ChainMap() locale = self.variable_definitions().get("snowfakery_locale") self.faker_template_library = self.interpreter.faker_template_library(locale) + self.local_vars = {} # TODO: move this into the interpreter object def check_if_finished(self): diff --git a/snowfakery/data_generator_runtime_object_model.py b/snowfakery/data_generator_runtime_object_model.py index fae3a22e..f1920068 100644 --- a/snowfakery/data_generator_runtime_object_model.py +++ b/snowfakery/data_generator_runtime_object_model.py @@ -258,6 +258,7 @@ def render(self, context: RuntimeContext) -> FieldValue: except jinja2.exceptions.UndefinedError as e: raise DataGenNameError(e.message, self.filename, self.line_num) from e except Exception as e: + raise raise DataGenValueError(str(e), self.filename, self.line_num) from e else: val = self.definition From fdc3f7ce79c1e7d163ace406f6dce3906088650c Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Wed, 28 Jul 2021 17:25:21 -0700 Subject: [PATCH 7/9] Minor cleanups --- snowfakery/data_generator_runtime.py | 7 ++++--- snowfakery/data_generator_runtime_object_model.py | 2 -- snowfakery/fakedata/fake_data_generator.py | 1 + snowfakery/plugins.py | 6 ++---- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index 85fe9bd0..628a62b5 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -297,8 +297,9 @@ def __init__( raise DataGenNameError( f"No template creating {stop_table_name}", ) - faker_plugin = SnowfakeryPlugin(self) - self.faker_plugin_context = PluginContext(faker_plugin) + + # make a plugin context for our Faker stuff to act like a plugin + self.faker_plugin_context = PluginContext(SnowfakeryPlugin(self)) self.faker_template_libraries = {} @@ -466,7 +467,7 @@ def field_vars(self): def context_vars(self, plugin_namespace): """Variables which are inherited by child scopes""" # This looks like a candidate for optimization. - # An unconditional object copy? + # An unconditional object copyseems expensive. local_plugin_vars = self._plugin_context_vars.get(plugin_namespace, {}).copy() self._plugin_context_vars[plugin_namespace] = local_plugin_vars return local_plugin_vars diff --git a/snowfakery/data_generator_runtime_object_model.py b/snowfakery/data_generator_runtime_object_model.py index f1920068..13cf9b2d 100644 --- a/snowfakery/data_generator_runtime_object_model.py +++ b/snowfakery/data_generator_runtime_object_model.py @@ -187,7 +187,6 @@ def _generate_row( def _generate_fields(self, context: RuntimeContext, row: Dict) -> None: """Generate all of the fields of a row""" - context.local_vars = {} for field in self.fields: with self.exception_handling("Problem rendering value"): row[field.name] = field.generate_value(context) @@ -258,7 +257,6 @@ def render(self, context: RuntimeContext) -> FieldValue: except jinja2.exceptions.UndefinedError as e: raise DataGenNameError(e.message, self.filename, self.line_num) from e except Exception as e: - raise raise DataGenValueError(str(e), self.filename, self.line_num) from e else: val = self.definition diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index 7ac7d826..1f1d7047 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -51,6 +51,7 @@ def realistic_maybe_real_email(self): return self.f.email() def _already_have(self, names: T.Sequence[str], matching: bool): + """Get a list of field values that we've already generated""" if not matching: return [None] already_created = self.faker_context.local_vars() diff --git a/snowfakery/plugins.py b/snowfakery/plugins.py index 9de1b413..16f4555c 100644 --- a/snowfakery/plugins.py +++ b/snowfakery/plugins.py @@ -97,13 +97,11 @@ def field_vars(self): return self.interpreter.current_context.field_vars() def context_vars(self): - return self.interpreter.current_context.context_vars( - self.plugin.__class__.__name__ - ) + return self.interpreter.current_context.context_vars(id(self.plugin)) def local_vars(self): return self.interpreter.current_context.local_vars.setdefault( - self.plugin.__class__.__name__, {} + id(self.plugin), {} ) def unique_context_identifier(self) -> str: From 6e369d4fe2a8f26b48489a71ffcd3190c157f0ab Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Fri, 6 Aug 2021 05:38:13 -0700 Subject: [PATCH 8/9] Suggestions from PR review --- snowfakery/data_generator_runtime.py | 2 +- snowfakery/fakedata/fake_data_generator.py | 37 ++++++++++++++-------- tests/test_faker.py | 11 ++----- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index 628a62b5..337f3a74 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -467,7 +467,7 @@ def field_vars(self): def context_vars(self, plugin_namespace): """Variables which are inherited by child scopes""" # This looks like a candidate for optimization. - # An unconditional object copyseems expensive. + # An unconditional object copy seems expensive. local_plugin_vars = self._plugin_context_vars.get(plugin_namespace, {}).copy() self._plugin_context_vars[plugin_namespace] = local_plugin_vars return local_plugin_vars diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index 25adc24a..87112c7d 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -2,26 +2,37 @@ import typing as T import random from snowfakery.plugins import PluginContext +from itertools import product +from datetime import datetime +from faker import Faker, Generator + +# .format language doesn't allow slicing. :( +first_name_patterns = ("{firstname}", "{firstname[0]}", "{firstname[0]}{firstname[1]}") +first_name_separators = ("", ".", "-", "_", "+") +year_patterns = ("{year}", "{year[2]}{year[3]}", "{year[3]}", "") -email_templates = [ # .format language doesn't allow slicing. :( +email_templates = [ f"{first_name}{first_name_separator}{{lastname}}{year}@{{domain}}" - for first_name in ("{firstname}", "{firstname[0]}", "{firstname[0]}{firstname[1]}") - for first_name_separator in ("", ".", "-", "_", "+") - for year in ("{year}", "{year[2]}{year[3]}", "{year[3]}", "") + for first_name, first_name_separator, year in product( + first_name_patterns, first_name_separators, year_patterns + ) ] -from faker import Faker, Generator +this_year = datetime.today().year class FakeNames(T.NamedTuple): f: Faker faker_context: PluginContext = None + # "matching" allows us to turn off the behaviour of + # trying to incorporate one field into another if we + # need to. def user_name(self, matching: bool = True): "Salesforce-style username in the form of an email address" - already_created = self._already_have(("firstname", "lastname"), matching) - if all(already_created): + already_created = self._already_have(("firstname", "lastname")) + if matching and all(already_created): return f"{already_created[0]}.{already_created[1]}_{self.f.uuid4()}@{self.f.safe_domain_name()}" return f"{self.f.first_name()}_{self.f.last_name()}_{self.f.uuid4()}@{self.f.hostname()}" @@ -33,15 +44,15 @@ def alias(self): def email(self, matching: bool = True): """Email address using one of the "example" domains""" - already_created = self._already_have(("firstname", "lastname"), matching) - if all(already_created): + already_created = self._already_have(("firstname", "lastname")) + if matching and all(already_created): template = random.choice(email_templates) return template.format( - firstname=already_created[0], + firstname=already_created[0].ljust(2, "_"), lastname=already_created[1], domain=self.f.safe_domain_name(), - year=str(random.randint(1955, 2020)), + year=str(random.randint(this_year - 80, this_year - 10)), ) return self.f.ascii_safe_email() @@ -52,10 +63,8 @@ def realistic_maybe_real_email(self): """ return self.f.email() - def _already_have(self, names: T.Sequence[str], matching: bool): + def _already_have(self, names: T.Sequence[str]): """Get a list of field values that we've already generated""" - if not matching: - return [None] already_created = self.faker_context.local_vars() vals = [already_created.get(name) for name in names] return vals diff --git a/tests/test_faker.py b/tests/test_faker.py index ee9a8b54..cfcf9048 100644 --- a/tests/test_faker.py +++ b/tests/test_faker.py @@ -246,7 +246,7 @@ def test_context_aware(self, generated_rows): "X", 0, "LastName" ) in generated_rows.table_values("X", 0, "Email") - def test_context_username(self, generated_rows): + def test_context_username_incorporates_fakes(self, generated_rows): yaml = """ - object: X fields: @@ -324,8 +324,7 @@ def test_context_aware_no_leakage_count(self, email, first_name, generated_rows) def test_context_aware_no_leakage_templates( self, email, first_name, generated_rows ): - - # no leakage between templ + # no leakage between templates yaml = """ - object: X fields: @@ -342,13 +341,11 @@ def test_context_aware_no_leakage_templates( """ generate(StringIO(yaml)) assert first_name.mock_calls - assert email.mock_calls + email.assert_called_once() @mock.patch("faker.providers.person.en_US.Provider.first_name") @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") def test_context_aware_alernate_names(self, email, first_name, generated_rows): - - # no leakage between templ yaml = """ - object: X fields: @@ -366,8 +363,6 @@ def test_context_aware_alernate_names(self, email, first_name, generated_rows): @mock.patch("faker.providers.person.en_US.Provider.first_name") @mock.patch("faker.providers.internet.en_US.Provider.ascii_safe_email") def test_disable_matching(self, email, first_name, generated_rows): - - # no leakage between templ yaml = """ - object: X fields: From cfffe65d12be976fc391b25bc773c94006bbe967 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Fri, 6 Aug 2021 05:49:51 -0700 Subject: [PATCH 9/9] Minor typo --- snowfakery/utils/template_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snowfakery/utils/template_utils.py b/snowfakery/utils/template_utils.py index 6507f68f..29b7c1a3 100644 --- a/snowfakery/utils/template_utils.py +++ b/snowfakery/utils/template_utils.py @@ -39,7 +39,7 @@ def __radd__(self, other): class FakerTemplateLibrary: - """A Jinja template library to add the faker.xyz objects to templates""" + """A Jinja template library to add the fake.xyz objects to templates""" def __init__( self,