Skip to content

Commit

Permalink
refactor: Generate JSON schema when missing (#3533)
Browse files Browse the repository at this point in the history
* removed unused script

* print info logs when generating openapi schema

* create json schema only when needed

* fix tests

* Remove leftover

Co-authored-by: ZanSara <[email protected]>
  • Loading branch information
2 people authored and bogdankostic committed Nov 17, 2022
1 parent 1be13df commit 893d2d4
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 49 deletions.
13 changes: 0 additions & 13 deletions .github/utils/generate_json_schema.py

This file was deleted.

7 changes: 6 additions & 1 deletion .github/utils/generate_openapi_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
import sys
import shutil

import logging

logging.basicConfig(level=logging.INFO)


sys.path.append(".")
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
from haystack import __version__ # pylint: disable=wrong-import-position
Expand All @@ -17,7 +22,7 @@

os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH

print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}")
logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH)

# To initialize the app and the pipelines
get_app()
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ saved_models
*_build
rest_api/file-upload/*
**/feedback_squad_direct.json
haystack/json-schemas

.DS_Store

# http cache (requests-cache)
Expand Down
3 changes: 0 additions & 3 deletions haystack/json-schemas/.gitignore

This file was deleted.

23 changes: 0 additions & 23 deletions haystack/json-schemas/generate_schema.py

This file was deleted.

24 changes: 23 additions & 1 deletion haystack/nodes/_json_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union

import os
import sys
import json
import inspect
Expand Down Expand Up @@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[

node_name = getattr(node_class, "__name__")

logger.info("Creating schema for '%s'", node_name)
logger.debug("Creating schema for '%s'", node_name)

# Read the relevant init parameters from __init__'s signature
init_method = getattr(node_class, "__init__", None)
Expand Down Expand Up @@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
return schema


def load_schema():
"""
Generate the json schema if it doesn't exist and load it
"""
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
if not os.path.exists(schema_file_path):
logging.info("Json schema not found, generating one at: %s", schema_file_path)
try:
update_json_schema(main_only=True)
except Exception as e:
# Be sure not to remain with an empty file if something went wrong
if schema_file_path.exists():
schema_file_path.unlink()
# This error is not recoverable
raise e

with open(schema_file_path, "r") as schema_file:
return json.load(schema_file)


def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
"""
Create (or update) a new schema.
Expand All @@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo
# commit from `main` or a release branch
filename = f"haystack-pipeline-main.schema.json"

os.makedirs(destination_path, exist_ok=True)
with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)

Expand Down
6 changes: 3 additions & 3 deletions haystack/pipelines/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from haystack import __version__
from haystack.nodes.base import BaseComponent, RootNode
from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH
from haystack.nodes._json_schema import load_schema, inject_definition_in_schema
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError


Expand Down Expand Up @@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
"and fix your configuration accordingly."
)

with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file:
schema = json.load(schema_file)
# Load the json schema, and create one if it doesn't exist yet
schema = load_schema()

# Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
del schema["properties"]["version"]["const"]
Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,6 @@ packages = [
"haystack",
]

[tool.hatch.build.targets.wheel.hooks.autorun]
dependencies = ["hatch-autorun"]
file = "haystack/json-schemas/generate_schema.py"

[tool.black]
line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
Expand Down
2 changes: 1 addition & 1 deletion test/pipelines/test_pipeline_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path):
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
)
# Point the JSON schema path to tmp_path
monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path)
monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path)

# Generate mock schema in tmp_path
filename = f"haystack-pipeline-main.schema.json"
Expand Down

0 comments on commit 893d2d4

Please sign in to comment.