Skip to content

Commit

Permalink
refactor: move the logic into try/except block
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetcanay committed Oct 23, 2024
1 parent 848bd30 commit fa37f1d
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 52 deletions.
55 changes: 29 additions & 26 deletions datastew/repository/sqllite.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,32 +38,35 @@ def store_all(self, model_object_instances: List[Union[Terminology, Concept, Map
self.session.commit()

def import_data_dictionary(self, data_dictionary: DataDictionarySource, terminology_name: str, embedding_model: Optional[EmbeddingModel] = None):
terminology = Terminology(terminology_name, terminology_name)
self.store(terminology)
data_frame = data_dictionary.to_dataframe()
descriptions = data_frame["description"].tolist()

if embedding_model is None:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
else:
embedding_model_name = embedding_model.get_model_name()

variable_to_embedding = data_dictionary.get_embeddings(embedding_model)

for variable, description in zip(variable_to_embedding.keys(), descriptions):
concept_id = f"{terminology_name}:{variable}"
concept = Concept(
terminology=terminology,
pref_label=variable,
concept_identifier=concept_id
)
mapping = Mapping(
concept=concept,
text=description,
embedding=variable_to_embedding[variable],
sentence_embedder=embedding_model_name
)
self.store_all([concept, mapping])
try:
model_object_instances: List[Union[Terminology, Concept, Mapping]] = []
data_frame = data_dictionary.to_dataframe()
descriptions = data_frame["description"].tolist()
if embedding_model is None:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
else:
embedding_model_name = embedding_model.get_model_name()
variable_to_embedding = data_dictionary.get_embeddings(embedding_model)
terminology = Terminology(terminology_name, terminology_name)
model_object_instances.append(terminology)
for variable, description in zip(variable_to_embedding.keys(), descriptions):
concept_id = f"{terminology_name}:{variable}"
concept = Concept(
terminology=terminology,
pref_label=variable,
concept_identifier=concept_id
)
mapping = Mapping(
concept=concept,
text=description,
embedding=variable_to_embedding[variable],
sentence_embedder=embedding_model_name
)
model_object_instances.append(concept)
model_object_instances.append(mapping)
self.store_all(model_object_instances)
except Exception as e:
raise RuntimeError(f"Failed to import data dictionary source: {e}")

def get_all_concepts(self) -> List[Concept]:
return self.session.query(Concept).all()
Expand Down
55 changes: 29 additions & 26 deletions datastew/repository/weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,32 +65,35 @@ def _create_schema_if_not_exists(self, schema):
raise RuntimeError(f"Failed to check/create schema for {class_name}: {e}")

def import_data_dictionary(self, data_dictionary: DataDictionarySource, terminology_name: str, embedding_model: Optional[EmbeddingModel] = None):
terminology = Terminology(terminology_name, terminology_name)
self.store(terminology)
data_frame = data_dictionary.to_dataframe()
descriptions = data_frame["description"].tolist()

if embedding_model is None:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
else:
embedding_model_name = embedding_model.get_model_name()

variable_to_embedding = data_dictionary.get_embeddings(embedding_model)

for variable, description in zip(variable_to_embedding.keys(), descriptions):
concept_id = f"{terminology_name}:{variable}"
concept = Concept(
terminology=terminology,
pref_label=variable,
concept_identifier=concept_id
)
mapping = Mapping(
concept=concept,
text=description,
embedding=variable_to_embedding[variable],
sentence_embedder=embedding_model_name
)
self.store_all([concept, mapping])
try:
model_object_instances: List[Union[Terminology, Concept, Mapping]] = []
data_frame = data_dictionary.to_dataframe()
descriptions = data_frame["description"].tolist()
if embedding_model is None:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
else:
embedding_model_name = embedding_model.get_model_name()
variable_to_embedding = data_dictionary.get_embeddings(embedding_model)
terminology = Terminology(terminology_name, terminology_name)
model_object_instances.append(terminology)
for variable, description in zip(variable_to_embedding.keys(), descriptions):
concept_id = f"{terminology_name}:{variable}"
concept = Concept(
terminology=terminology,
pref_label=variable,
concept_identifier=concept_id
)
mapping = Mapping(
concept=concept,
text=description,
embedding=variable_to_embedding[variable],
sentence_embedder=embedding_model_name
)
model_object_instances.append(concept)
model_object_instances.append(mapping)
self.store_all(model_object_instances)
except Exception as e:
raise RuntimeError(f"Failed to import data dictionary source: {e}")

def store_all(self, model_object_instances: List[Union[Terminology, Concept, Mapping]]):
for instance in model_object_instances:
Expand Down

0 comments on commit fa37f1d

Please sign in to comment.