diff --git a/compose-configs/airflow-marquez/Dockerfile b/compose-configs/airflow-marquez/Dockerfile
new file mode 100644
index 0000000..021afff
--- /dev/null
+++ b/compose-configs/airflow-marquez/Dockerfile
@@ -0,0 +1,19 @@
+USER root
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ wget libssl-dev libsasl2-dev libcurl4-openssl-dev \
+ software-properties-common gcc make libz-dev \
+ libzstd-dev g++ \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /root
+RUN wget -qO - https://github.com/confluentinc/librdkafka/archive/refs/tags/v2.0.2.tar.gz | tar -xz
+WORKDIR /root/librdkafka-2.0.2
+RUN ./configure
+RUN make && make check && make install
+WORKDIR /opt/airflow
+
+USER airflow
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/airflow.md b/compose-configs/airflow-marquez/airflow.md
new file mode 100644
index 0000000..3f8cb50
--- /dev/null
+++ b/compose-configs/airflow-marquez/airflow.md
@@ -0,0 +1,312 @@
+# Getting Started with Airflow and OpenLineage+Marquez
+
+> **Note:** For a modified version of this guide that uses [Astro](https://www.astronomer.io/try-astro/?referral=docs-what-astro-banner) instead of vanilla Airflow, visit see the OpenLineage [docs](https://openlineage.io/docs/guides/airflow-quickstart).
+
+In this example, we'll walk you through how to enable Airflow DAGs to send lineage metadata to [Marquez](https://marquezproject.ai/) using OpenLineage.
+
+### You’ll learn how to:
+
+* enable OpenLineage in Airflow
+* write your very first OpenLineage-enabled DAG
+* troubleshoot a failing DAG using Marquez
+
+# Prerequisites
+
+Before you begin, make sure you have installed:
+
+* [Docker 17.05](https://docs.docker.com/install)+
+* [Docker Compose](https://docs.docker.com/compose/install)
+
+> **Note:** We recommend that you have allocated at least **2 CPUs** and **8 GB** of memory to Docker.
+
+# Step 1: Setup
+
+First, if you haven't already, clone the Marquez repository and change into the [`examples/airflow`](https://github.com/MarquezProject/marquez/tree/main/examples/airflow) directory:
+
+```bash
+git clone https://github.com/MarquezProject/marquez && cd marquez/examples/airflow
+```
+
+To make sure the latest [`openlineage-airflow`](https://pypi.org/project/openlineage-airflow) library is downloaded and installed when starting Airflow, you'll need to create a `requirements.txt` file with the following content:
+
+```
+apache-airflow-providers-openlineage
+```
+
+Next, we'll need to specify where we want Airflow to send DAG metadata. To do so, create a config file named `openlineage.env` with the following environment variables and values:
+
+```bash
+OPENLINEAGE_URL=http://marquez:5000 # The URL of the HTTP backend
+OPENLINEAGE_NAMESPACE=example # The namespace associated with the dataset, job, and run metadata collected
+```
+> **Note:** The `openlineage.env` config file will be used by the `airflow`, `airflow_scheduler`, and `airflow_worker` containers to send lineage metadata to Marquez.
+
+> **Note:** The namespace refered above is the namespace of jobs, that model your DAG runs. The datasets itself reside in namespaces connected to their data sources, compatible with [OpenLineage naming.](https://github.com/OpenLineage/OpenLineage/blob/main/spec/Naming.md)
+
+Your `examples/airflow/` directory should now contain the following:
+
+ ```
+ .
+ ├── README.md
+ ├── docker
+ ├── docker-compose.yml
+ ├── docs
+ ├── openlineage.env
+ └── requirements.txt
+
+ ```
+
+# Step 2: Write Airflow DAGs using OpenLineage
+
+In this step, we'll create two new Airflow DAGs that perform simple tasks. The `counter` DAG generates a random number every minute, while the `sum` DAG calculates a sum every five minutes. This will result in a simple pipeline containing two jobs and two datasets.
+
+First, let's create the `dags/` folder where our example DAGs will be located:
+
+```bash
+$ mkdir dags
+```
+
+When writing our DAGs, we'll use [`openlineage-airflow`](https://pypi.org/project/openlineage-airflow), enabling OpenLineage to observe the DAG and automatically collect task-level metadata. If you're using Airflow 2.3+ no further changes to your DAG code or configuration are needed. If you're using an older version of Airflow, please read [this](https://github.com/OpenLineage/OpenLineage/blob/main/integration/airflow/README.md#setup) to understand how to configure the Airflow integration.
+
+## Step 2.1: Create `counter` DAG
+
+Under `dags/`, create a file named `counter.py` and add the following code:
+
+```python
+import random
+
+from airflow import DAG
+from airflow.providers.postgres.operators.postgres import PostgresOperator
+from airflow.utils.dates import days_ago
+
+default_args = {
+ 'owner': 'datascience',
+ 'depends_on_past': False,
+ 'start_date': days_ago(1),
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'email': ['datascience@example.com']
+}
+
+dag = DAG(
+ 'counter',
+ schedule_interval='*/1 * * * *',
+ catchup=False,
+ is_paused_upon_creation=False,
+ max_active_runs=1,
+ default_args=default_args,
+ description='DAG that generates a new count value between 1-10.'
+)
+
+t1 = PostgresOperator(
+ task_id='if_not_exists',
+ postgres_conn_id='example_db',
+ sql='''
+ CREATE TABLE IF NOT EXISTS counts (
+ value INTEGER
+ );''',
+ dag=dag
+)
+
+t2 = PostgresOperator(
+ task_id='inc',
+ postgres_conn_id='example_db',
+ sql='''
+ INSERT INTO counts (value)
+ VALUES (%(value)s)
+ ''',
+ parameters={
+ 'value': random.randint(1, 10)
+ },
+ dag=dag
+)
+
+t1 >> t2
+```
+
+## Step 2.2: Create `sum` DAG
+
+In `dags/`, create a file named `sum.py` and add the following code:
+
+```python
+from airflow import DAG
+from airflow.providers.postgres.operators.postgres import PostgresOperator
+from airflow.utils.dates import days_ago
+
+default_args = {
+ 'owner': 'datascience',
+ 'depends_on_past': False,
+ 'start_date': days_ago(1),
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'email': ['datascience@example.com']
+}
+
+dag = DAG(
+ 'sum',
+ schedule_interval='*/5 * * * *',
+ catchup=False,
+ is_paused_upon_creation=False,
+ max_active_runs=1,
+ default_args=default_args,
+ description='DAG that sums the total of generated count values.'
+)
+
+t1 = PostgresOperator(
+ task_id='if_not_exists',
+ postgres_conn_id='example_db',
+ sql='''
+ CREATE TABLE IF NOT EXISTS sums (
+ value INTEGER
+ );''',
+ dag=dag
+)
+
+t2 = PostgresOperator(
+ task_id='total',
+ postgres_conn_id='example_db',
+ sql='''
+ INSERT INTO sums (value)
+ SELECT SUM(c.value) FROM counts AS c;
+ ''',
+ dag=dag
+)
+
+t1 >> t2
+```
+
+At this point, your `examples/airflow/` directory should look like this:
+
+```
+.
+├── README.md
+├── dags
+│ ├── counter.py
+│ └── sum.py
+├── docker/
+├── docker-compose.yml
+├── docs/
+├── openlineage.env
+└── requirements.txt
+```
+
+# Step 3: Start Airflow with Marquez
+
+Now that we have our DAGs defined and OpenLineage is enabled in Airflow, we can run the example! To start Airflow, run:
+
+```bash
+$ docker-compose up
+```
+
+> **Tip:** Use `-d` to run in detached mode.
+
+**The above command will:**
+
+* start Airflow and install `openlineage-airflow`
+* start Marquez
+* start Postgres
+
+To view the Airflow UI and verify it's running, open [http://localhost:8080](http://localhost:8080). Then, log in using the username and password `airflow` / `airflow`. You can also browse to [http://localhost:3000](http://localhost:3000) to view the Marquez UI.
+
+# Step 4: View Collected Metadata
+
+To ensure that Airflow is executing `counter` and `sum`, navigate to the DAGs tab in Airflow and verify that they are both enabled and are in a _running_ state:
+
+
+
+To view DAG metadata collected by Marquez from Airflow, browse to the Marquez UI by visiting [http://localhost:3000](http://localhost:3000). Then, use the _search_ bar in the upper right-side of the page and search for the `counter.inc` job. To view lineage metadata for `counter.inc`, click on the job from the drop-down list:
+
+> **Note:** If the `counter.inc` job is not in the drop-down list, check to see if Airflow has successfully executed the DAG.
+
+
+
+
+
+If you take a quick look at the lineage graph for `counter.if_not_exists`, you should see `example.public.counts` as an output dataset and `sum.total` as a downstream job!
+
+
+
+# Step 5: Troubleshoot a Failing DAG with Marquez
+
+In this step, let's quickly walk through a simple troubleshooting scenario where DAG `sum` begins to fail as the result of an upstream schema change for table `counts`. So, let's get to it!
+
+> **Tip:** It's helpful to also apply the same code changes outlined below to your Airflow DAGs defined in **Step 2**.
+
+Let's say team `A` owns the DAG `counter`. Team `A` decides to update the `t1` task in `counter` to rename the `values` column in the `counts` table to `value_1_to_10` (without properly communicating the schema change!):
+
+```diff
+t1 = PostgresOperator(
+- task_id='if_not_exists',
++ task_id='alter_name_of_column',
+ postgres_conn_id='example_db',
+ sql='''
+- CREATE TABLE IF NOT EXISTS counts (
+- value INTEGER
+- );''',
++ DO $$
++ BEGIN
++ IF EXISTS(SELECT *
++ FROM information_schema.columns
++ WHERE table_name='counts' and column_name='value')
++ THEN
++ ALTER TABLE "counts" RENAME COLUMN "value" TO "value_1_to_10";
++ END IF;
++ END $$;
+ ''',
+ dag=dag
+)
+```
+
+```diff
+t2 = PostgresOperator(
+ task_id='inc',
+ postgres_conn_id='example_db',
+ sql='''
+- INSERT INTO counts (value)
++ INSERT INTO counts (value_1_to_10)
+ VALUES (%(value)s)
+ ''',
+ parameters={
+ 'value': random.randint(1, 10)
+ },
+ dag=dag
+)
+```
+
+Team `B`, unaware of the schema change, owns DAG `sum` and begins to see DAG run metadata with _failed_ run states:
+
+
+
+But, team `B` isn't sure what might have caused the DAG failure as no recent code changes have been made to DAG `sum`. So, team `B` decides to check the schema of the input dataset:
+
+
+
+Team `B` soon realizes that the schema has changed recently for the `counts` table! To fix the DAG `sum`, team `B` updates the `t2` task that calcuates the count total to use the new column name:
+
+```diff
+t2 = PostgresOperator(
+ task_id='total',
+ postgres_conn_id='example_db',
+ sql='''
+ INSERT INTO sums (value)
+- SELECT SUM(c.value) FROM counts AS c;
++ SELECT SUM(c.value_1_to_10) FROM counts AS c;
+ ''',
+ dag=dag
+)
+```
+
+With the code change, the DAG `sum` begins to run successfully:
+
+
+
+_Congrats_! You successfully step through a troubleshooting scenario of a failing DAG using metadata collected with Marquez! You can now add your own DAGs to `dags/` to build more complex data lineage graphs.
+
+# Next Steps
+
+* Review the Marquez [HTTP API](https://marquezproject.github.io/marquez/openapi.html) used to collect Airflow DAG metadata and learn how to build your own integrations using OpenLineage
+* Take a look at [`openlineage-spark`](https://openlineage.io/docs/integrations/spark/) integration that can be used with Airflow
+
+# Feedback
+
+What did you think of this example? You can reach out to us on [slack](https://bit.ly/Marquez_Slack_invite) and leave us feedback, or [open a pull request](https://github.com/MarquezProject/marquez/blob/main/CONTRIBUTING.md#submitting-a-pull-request) with your suggestions!
diff --git a/compose-configs/airflow-marquez/dags/counter.py b/compose-configs/airflow-marquez/dags/counter.py
new file mode 100644
index 0000000..09b80ae
--- /dev/null
+++ b/compose-configs/airflow-marquez/dags/counter.py
@@ -0,0 +1,49 @@
+import random
+
+from airflow import DAG
+from airflow.providers.postgres.operators.postgres import PostgresOperator
+from airflow.utils.dates import days_ago
+
+default_args = {
+ 'owner': 'datascience',
+ 'depends_on_past': False,
+ 'start_date': days_ago(1),
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'email': ['datascience@example.com']
+}
+
+dag = DAG(
+ 'counter',
+ schedule_interval='*/1 * * * *',
+ catchup=False,
+ is_paused_upon_creation=False,
+ max_active_runs=1,
+ default_args=default_args,
+ description='DAG that generates a new count value between 1-10.'
+)
+
+t1 = PostgresOperator(
+ task_id='if_not_exists',
+ postgres_conn_id='example_db',
+ sql='''
+ CREATE TABLE IF NOT EXISTS counts (
+ value INTEGER
+ );''',
+ dag=dag
+)
+
+t2 = PostgresOperator(
+ task_id='inc',
+ postgres_conn_id='example_db',
+ sql='''
+ INSERT INTO counts (value)
+ VALUES (%(value)s)
+ ''',
+ parameters={
+ 'value': random.randint(1, 10)
+ },
+ dag=dag
+)
+
+t1 >> t2
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/dags/first_test.py b/compose-configs/airflow-marquez/dags/first_test.py
new file mode 100644
index 0000000..0952778
--- /dev/null
+++ b/compose-configs/airflow-marquez/dags/first_test.py
@@ -0,0 +1,18 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.decorators import task
+from airflow.operators.bash import BashOperator
+
+# A DAG represents a workflow, a collection of tasks
+with DAG(dag_id="demo", start_date=datetime(2022, 1, 1), schedule="0 0 * * *") as dag:
+
+ # Tasks are represented as operators
+ hello = BashOperator(task_id="hello", bash_command="echo hello")
+
+ @task()
+ def airflow():
+ print("airflow")
+
+ # Set dependencies between tasks
+ hello >> airflow()
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/dags/produce_consume_treats.py b/compose-configs/airflow-marquez/dags/produce_consume_treats.py
new file mode 100644
index 0000000..5c491b9
--- /dev/null
+++ b/compose-configs/airflow-marquez/dags/produce_consume_treats.py
@@ -0,0 +1,110 @@
+"""
+### DAG which produces to and consumes from a Kafka cluster
+
+This DAG will produce messages consisting of several elements to a Kafka cluster and consume
+them.
+"""
+
+from airflow.decorators import dag, task
+from pendulum import datetime
+from airflow.providers.apache.kafka.operators.produce import ProduceToTopicOperator
+from airflow.providers.apache.kafka.operators.consume import ConsumeFromTopicOperator
+import json
+import random
+
+YOUR_NAME = "dan"
+YOUR_PET_NAME = "rover"
+NUMBER_OF_TREATS = 5
+KAFKA_TOPIC = "my test"
+
+
+def prod_function(num_treats, pet_name):
+ """Produces `num_treats` messages containing the pet's name, a randomly picked
+ pet mood post treat and whether or not it was the last treat in a series."""
+
+ for i in range(num_treats):
+ final_treat = False
+ pet_mood_post_treat = random.choices(
+ ["content", "happy", "zoomy", "bouncy"], weights=[2, 2, 1, 1], k=1
+ )[0]
+ if i + 1 == num_treats:
+ final_treat = True
+ yield (
+ json.dumps(i),
+ json.dumps(
+ {
+ "pet_name": pet_name,
+ "pet_mood_post_treat": pet_mood_post_treat,
+ "final_treat": final_treat,
+ }
+ ),
+ print("did prod")
+ )
+
+
+def consume_function(message, name):
+ "Takes in consumed messages and prints its contents to the logs."
+
+ key = json.loads(message.key())
+ message_content = json.loads(message.value())
+ pet_name = message_content["pet_name"]
+ pet_mood_post_treat = message_content["pet_mood_post_treat"]
+ print(
+ f"Message #{key}: Hello {name}, your pet {pet_name} has consumed another treat and is now {pet_mood_post_treat}!"
+ )
+
+
+@dag(
+ start_date=datetime(2023, 4, 1),
+ schedule=None,
+ catchup=False,
+ render_template_as_native_obj=True,
+)
+def produce_consume_treats():
+ @task
+ def get_your_pet_name(pet_name=None):
+ return pet_name
+
+ @task
+ def get_number_of_treats(num_treats=None):
+ return num_treats
+
+ @task
+ def get_pet_owner_name(your_name=None):
+ return your_name
+
+ produce_treats = ProduceToTopicOperator(
+ task_id="produce_treats",
+ kafka_config_id="kafka_default",
+ topic=KAFKA_TOPIC,
+ producer_function=prod_function,
+ producer_function_args=["{{ ti.xcom_pull(task_ids='get_number_of_treats')}}"],
+ producer_function_kwargs={
+ "pet_name": "{{ ti.xcom_pull(task_ids='get_your_pet_name')}}"
+ },
+ poll_timeout=10,
+ )
+
+ consume_treats = ConsumeFromTopicOperator(
+ task_id="consume_treats",
+ kafka_config_id="kafka_default",
+ topics=[KAFKA_TOPIC],
+ apply_function=consume_function,
+ apply_function_kwargs={
+ "name": "{{ ti.xcom_pull(task_ids='get_pet_owner_name')}}"
+ },
+ poll_timeout=20,
+ max_messages=20,
+ max_batch_size=20,
+ )
+
+ [
+ get_your_pet_name(YOUR_PET_NAME),
+ get_number_of_treats(NUMBER_OF_TREATS),
+ ] >> produce_treats
+ get_pet_owner_name(YOUR_NAME) >> consume_treats
+
+ produce_treats >> consume_treats
+
+
+produce_consume_treats()
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/dags/second.py b/compose-configs/airflow-marquez/dags/second.py
new file mode 100644
index 0000000..54f5a12
--- /dev/null
+++ b/compose-configs/airflow-marquez/dags/second.py
@@ -0,0 +1,19 @@
+from datetime import datetime
+from pyegeria import Platform
+from airflow import DAG
+from airflow.decorators import task
+from airflow.operators.bash import BashOperator
+
+
+# A DAG represents a workflow, a collection of tasks
+with DAG(dag_id="demo-e", start_date=datetime(2024, 8, 19), schedule="0 0 * * *") as dag:
+
+ # Tasks are represented as operators
+ hello = BashOperator(task_id="hello", bash_command="echo hello")
+
+ @task()
+ def origin():
+ p = Platform("active-metadata-store","https://laz.local:9443","garygeeke")
+ print(p.get_platform_origin())
+ # Set dependencies between tasks
+ hello >> origin()
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/dags/sum.py b/compose-configs/airflow-marquez/dags/sum.py
new file mode 100644
index 0000000..e48c65f
--- /dev/null
+++ b/compose-configs/airflow-marquez/dags/sum.py
@@ -0,0 +1,44 @@
+from airflow import DAG
+from airflow.providers.postgres.operators.postgres import PostgresOperator
+from airflow.utils.dates import days_ago
+
+default_args = {
+ 'owner': 'datascience',
+ 'depends_on_past': False,
+ 'start_date': days_ago(1),
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'email': ['datascience@example.com']
+}
+
+dag = DAG(
+ 'sum',
+ schedule_interval='*/5 * * * *',
+ catchup=False,
+ is_paused_upon_creation=False,
+ max_active_runs=1,
+ default_args=default_args,
+ description='DAG that sums the total of generated count values.'
+)
+
+t1 = PostgresOperator(
+ task_id='if_not_exists',
+ postgres_conn_id='example_db',
+ sql='''
+ CREATE TABLE IF NOT EXISTS sums (
+ value INTEGER
+ );''',
+ dag=dag
+)
+
+t2 = PostgresOperator(
+ task_id='total',
+ postgres_conn_id='example_db',
+ sql='''
+ INSERT INTO sums (value)
+ SELECT SUM(c.value) FROM counts AS c;
+ ''',
+ dag=dag
+)
+
+t1 >> t2
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/docker-compose.yml b/compose-configs/airflow-marquez/docker-compose.yml
new file mode 100644
index 0000000..3896a95
--- /dev/null
+++ b/compose-configs/airflow-marquez/docker-compose.yml
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+services:
+ airflow:
+ image: bitnami/airflow:2
+ ports:
+ - "8070:8070"
+ env_file:
+ - openlineage.env
+ environment:
+ - AIRFLOW_USERNAME=airflow
+ - AIRFLOW_PASSWORD=airflow
+ - AIRFLOW_EMAIL=airflow@example.com
+ - AIRFLOW_FERNET_KEY=Z2uDm0ZL60fXNkEXG8LW99Ki2zf8wkmIltaTz1iQPDU=
+ - AIRFLOW_DATABASE_HOST=postgres
+# - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5436/airflow
+ - AIRFLOW_DATABASE_NAME=airflow
+ - AIRFLOW_DATABASE_USERNAME=airflow
+ - AIRFLOW_DATABASE_PASSWORD=airflow
+ - AIRFLOW_EXECUTOR=CeleryExecutor
+ - AIRFLOW_LOAD_EXAMPLES=no
+ - AIRFLOW_CONN_EXAMPLE_DB=postgres://example:example@postgres:5432/example
+ - AIRFLOW_WEBSERVER_PORT_NUMBER=8070
+ - OPENLINEAGE_CONFIG=/opt/bitnami/airflow/openlineage.yml
+
+ volumes:
+ - ./dags:/opt/bitnami/airflow/dags
+ - ${PWD}/whl:/whl
+ - type: bind
+ source: ${PWD}/requirements.txt
+ target: /bitnami/python/requirements.txt
+ - type: bind
+ source: ${PWD}/openlineage.yml
+ target: /opt/bitnami/airflow/openlineage.yml
+
+ airflow_scheduler:
+ image: bitnami/airflow-scheduler:2
+ env_file:
+ - openlineage.env
+ environment:
+ - AIRFLOW_FERNET_KEY=Z2uDm0ZL60fXNkEXG8LW99Ki2zf8wkmIltaTz1iQPDU=
+ - AIRFLOW_DATABASE_HOST=postgres
+# - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5436/airflow
+ - AIRFLOW_DATABASE_NAME=airflow
+ - AIRFLOW_DATABASE_USERNAME=airflow
+ - AIRFLOW_DATABASE_PASSWORD=airflow
+ - AIRFLOW_EXECUTOR=CeleryExecutor
+ - AIRFLOW_LOAD_EXAMPLES=no
+ - AIRFLOW_CONN_EXAMPLE_DB=postgres://example:example@postgres:5432/example
+ - AIRFLOW_WEBSERVER_HOST=airflow
+ - AIRFLOW_WEBSERVER_PORT_NUMBER=8070
+ - OPENLINEAGE_CONFIG=/opt/bitnami/airflow/openlineage.yml
+
+ volumes:
+ - ./dags:/opt/bitnami/airflow/dags
+ - ${PWD}/whl:/whl
+ - type: bind
+ source: ${PWD}/requirements.txt
+ target: /bitnami/python/requirements.txt
+ - type: bind
+ source: ${PWD}/openlineage.yml
+ target: /opt/bitnami/airflow/openlineage.yml
+
+ airflow_worker:
+ image: bitnami/airflow-worker:2
+ env_file:
+ - openlineage.env
+ environment:
+ - AIRFLOW_FERNET_KEY=Z2uDm0ZL60fXNkEXG8LW99Ki2zf8wkmIltaTz1iQPDU=
+ - AIRFLOW_DATABASE_HOST=postgres
+# - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5436/airflow
+ - AIRFLOW_DATABASE_NAME=airflow
+ - AIRFLOW_DATABASE_USERNAME=airflow
+ - AIRFLOW_DATABASE_PASSWORD=airflow
+ - AIRFLOW_EXECUTOR=CeleryExecutor
+ - AIRFLOW_LOAD_EXAMPLES=no
+ - AIRFLOW_CONN_EXAMPLE_DB=postgres://example:example@postgres:5432/example
+ - AIRFLOW_WEBSERVER_HOST=airflow
+ - AIRFLOW_WEBSERVER_PORT_NUMBER=8070
+ - AIRFLOW__OPENLINEAGE__CONFIG_PATH='/opt/bitnami/airflow/openlineage.yml'
+
+
+ volumes:
+ - ./dags:/opt/bitnami/airflow/dags
+ - ${PWD}/whl:/whl
+ - type: bind
+ source: ${PWD}/requirements.txt
+ target: /bitnami/python/requirements.txt
+ - type: bind
+ source: ${PWD}/openlineage.yml
+ target: /opt/bitnami/airflow/openlineage.yml
+
+ marquez:
+ image: marquezproject/marquez:latest
+ environment:
+ - MARQUEZ_HOST=marquez
+ - MARQUEZ_PORT=5050
+ - MARQUEZ_ADMIN_PORT=5051
+
+ ports:
+ - "5050:5050"
+ - "5051:5051"
+ volumes:
+ - ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh
+ depends_on:
+ - postgres
+ entrypoint: ["./wait-for-it.sh", "postgres:5436", "--", "./entrypoint.sh"]
+ # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT)
+ # command: ["postgres", "-c", "log_statement=all"]
+
+ marquez_web:
+ image: marquezproject/marquez-web:latest
+ environment:
+ - MARQUEZ_HOST=marquez
+ - MARQUEZ_PORT=5050
+ - REACT_APP_ADVANCED_SEARCH=false
+ ports:
+ - "3000:3000"
+ stdin_open: true
+ tty: true
+ depends_on:
+ - marquez
+
+ postgres:
+ image: bitnami/postgresql:12.1.0
+ ports:
+ - "543:5432"
+# command: postgres -p 5436 --config-file=/var/lib/postgresql/data/pgdata/postgresql.conf
+ environment:
+ - POSTGRES_USER=postgres
+ - POSTGRES_PASSWORD=postgres
+ - AIRFLOW_USER=airflow
+ - AIRFLOW_PASSWORD=airflow
+ - AIRFLOW_DB=airflow
+ - MARQUEZ_USER=marquez
+ - MARQUEZ_PASSWORD=marquez
+ - MARQUEZ_DB=marquez
+ - EXAMPLE_USER=example
+ - EXAMPLE_PASSWORD=example
+ - EXAMPLE_DB=example
+ - ALLOW_EMPTY_PASSWORD=yes
+# - PGDATA=/var/lib/postgresql/data/pgdata
+
+ volumes:
+ - ./docker/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh
+# - ../../runtime-volumes/airflow-pg:/var/lib/postgresql/data/pgdata
+
+ redis:
+ image: bitnami/redis:6.0.6
+ environment:
+ - ALLOW_EMPTY_PASSWORD=yes
+
+#networks:
+# egeria_network:
+## driver: bridge
+# external: true
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/docker/build.sh b/compose-configs/airflow-marquez/docker/build.sh
new file mode 100755
index 0000000..6d04e66
--- /dev/null
+++ b/compose-configs/airflow-marquez/docker/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# Copyright 2018-2023 contributors to the Marquez project
+# SPDX-License-Identifier: Apache-2.0
+
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd ../../../ &> /dev/null && pwd )"
+cd $REPO_DIR/integrations/common && pip wheel --wheel-dir=$REPO_DIR/examples/airflow/tmp/whl .
+mkdir -p $REPO_DIR/examples/airflow/whl/
+cp $REPO_DIR/examples/airflow/tmp/whl/marquez_integration_common* $REPO_DIR/examples/airflow/whl/
+rm -rf $REPO_DIR/examples/airflow/tmp
+
+cd $REPO_DIR/integrations/airflow && pip wheel -e $REPO_DIR/integrations/common --wheel-dir=$REPO_DIR/examples/airflow/whl .
+ls $REPO_DIR/examples/airflow/whl | sed -e 's/^/\/whl\//' > $REPO_DIR/examples/airflow/requirements.txt
diff --git a/compose-configs/airflow-marquez/docker/init-db.sh b/compose-configs/airflow-marquez/docker/init-db.sh
new file mode 100755
index 0000000..1c8dc98
--- /dev/null
+++ b/compose-configs/airflow-marquez/docker/init-db.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Copyright 2018-2023 contributors to the Marquez project
+# SPDX-License-Identifier: Apache-2.0
+#
+# Usage: $ ./init-db.sh
+
+set -eu
+
+psql -v ON_ERROR_STOP=1 --username "${POSTGRES_USER}" > /dev/null <<-EOSQL
+ CREATE USER ${AIRFLOW_USER};
+ ALTER USER ${AIRFLOW_USER} WITH PASSWORD '${AIRFLOW_PASSWORD}';
+ CREATE DATABASE ${AIRFLOW_DB};
+ GRANT ALL PRIVILEGES ON DATABASE ${AIRFLOW_DB} TO ${AIRFLOW_USER};
+ CREATE USER ${MARQUEZ_USER};
+ ALTER USER ${MARQUEZ_USER} WITH PASSWORD '${MARQUEZ_PASSWORD}';
+ CREATE DATABASE ${MARQUEZ_DB};
+ GRANT ALL PRIVILEGES ON DATABASE ${MARQUEZ_DB} TO ${MARQUEZ_USER};
+ CREATE USER ${EXAMPLE_USER};
+ ALTER USER ${EXAMPLE_USER} WITH PASSWORD '${EXAMPLE_PASSWORD}';
+ CREATE DATABASE ${EXAMPLE_DB};
+ GRANT ALL PRIVILEGES ON DATABASE ${EXAMPLE_DB} TO ${EXAMPLE_USER};
+EOSQL
diff --git a/compose-configs/airflow-marquez/docker/wait-for-it.sh b/compose-configs/airflow-marquez/docker/wait-for-it.sh
new file mode 100755
index 0000000..a21bdc7
--- /dev/null
+++ b/compose-configs/airflow-marquez/docker/wait-for-it.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+#
+# Copyright 2018-2023 contributors to the Marquez project
+# SPDX-License-Identifier: Apache-2.0
+#
+# see: https://github.com/vishnubob/wait-for-it
+
+WAITFORIT_cmdname=${0##*/}
+
+echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
+
+usage()
+{
+ cat << USAGE >&2
+Usage:
+ $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
+ -h HOST | --host=HOST Host or IP under test
+ -p PORT | --port=PORT TCP port under test
+ Alternatively, you specify the host and port as host:port
+ -s | --strict Only execute subcommand if the test succeeds
+ -q | --quiet Don't output any status messages
+ -t TIMEOUT | --timeout=TIMEOUT
+ Timeout in seconds, zero for no timeout
+ -- COMMAND ARGS Execute command with args after the test finishes
+USAGE
+ exit 1
+}
+
+wait_for()
+{
+ if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+ echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+ else
+ echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
+ fi
+ WAITFORIT_start_ts=$(date +%s)
+ while :
+ do
+ if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
+ nc -z $WAITFORIT_HOST $WAITFORIT_PORT
+ WAITFORIT_result=$?
+ else
+ (echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
+ WAITFORIT_result=$?
+ fi
+ if [[ $WAITFORIT_result -eq 0 ]]; then
+ WAITFORIT_end_ts=$(date +%s)
+ echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
+ break
+ fi
+ sleep 1
+ done
+ return $WAITFORIT_result
+}
+
+wait_for_wrapper()
+{
+ # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
+ if [[ $WAITFORIT_QUIET -eq 1 ]]; then
+ timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+ else
+ timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+ fi
+ WAITFORIT_PID=$!
+ trap "kill -INT -$WAITFORIT_PID" INT
+ wait $WAITFORIT_PID
+ WAITFORIT_RESULT=$?
+ if [[ $WAITFORIT_RESULT -ne 0 ]]; then
+ echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+ fi
+ return $WAITFORIT_RESULT
+}
+
+# process arguments
+while [[ $# -gt 0 ]]
+do
+ case "$1" in
+ *:* )
+ WAITFORIT_hostport=(${1//:/ })
+ WAITFORIT_HOST=${WAITFORIT_hostport[0]}
+ WAITFORIT_PORT=${WAITFORIT_hostport[1]}
+ shift 1
+ ;;
+ --child)
+ WAITFORIT_CHILD=1
+ shift 1
+ ;;
+ -q | --quiet)
+ WAITFORIT_QUIET=1
+ shift 1
+ ;;
+ -s | --strict)
+ WAITFORIT_STRICT=1
+ shift 1
+ ;;
+ -h)
+ WAITFORIT_HOST="$2"
+ if [[ $WAITFORIT_HOST == "" ]]; then break; fi
+ shift 2
+ ;;
+ --host=*)
+ WAITFORIT_HOST="${1#*=}"
+ shift 1
+ ;;
+ -p)
+ WAITFORIT_PORT="$2"
+ if [[ $WAITFORIT_PORT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --port=*)
+ WAITFORIT_PORT="${1#*=}"
+ shift 1
+ ;;
+ -t)
+ WAITFORIT_TIMEOUT="$2"
+ if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --timeout=*)
+ WAITFORIT_TIMEOUT="${1#*=}"
+ shift 1
+ ;;
+ --)
+ shift
+ WAITFORIT_CLI=("$@")
+ break
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ echoerr "Unknown argument: $1"
+ usage
+ ;;
+ esac
+done
+
+if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
+ echoerr "Error: you need to provide a host and port to test."
+ usage
+fi
+
+WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
+WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
+WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
+WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
+
+# check to see if timeout is from busybox?
+WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
+WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
+if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
+ WAITFORIT_ISBUSY=1
+ WAITFORIT_BUSYTIMEFLAG="-t"
+
+else
+ WAITFORIT_ISBUSY=0
+ WAITFORIT_BUSYTIMEFLAG=""
+fi
+
+if [[ $WAITFORIT_CHILD -gt 0 ]]; then
+ wait_for
+ WAITFORIT_RESULT=$?
+ exit $WAITFORIT_RESULT
+else
+ if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+ wait_for_wrapper
+ WAITFORIT_RESULT=$?
+ else
+ wait_for
+ WAITFORIT_RESULT=$?
+ fi
+fi
+
+if [[ $WAITFORIT_CLI != "" ]]; then
+ if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
+ echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
+ exit $WAITFORIT_RESULT
+ fi
+ exec "${WAITFORIT_CLI[@]}"
+else
+ exit $WAITFORIT_RESULT
+fi
diff --git a/compose-configs/airflow-marquez/docs/airflow-view-dag.png b/compose-configs/airflow-marquez/docs/airflow-view-dag.png
new file mode 100644
index 0000000..50d0e85
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/airflow-view-dag.png differ
diff --git a/compose-configs/airflow-marquez/docs/current-lineage-view-job.png b/compose-configs/airflow-marquez/docs/current-lineage-view-job.png
new file mode 100644
index 0000000..4d78984
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/current-lineage-view-job.png differ
diff --git a/compose-configs/airflow-marquez/docs/current-search-count.png b/compose-configs/airflow-marquez/docs/current-search-count.png
new file mode 100644
index 0000000..c3cc14b
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/current-search-count.png differ
diff --git a/compose-configs/airflow-marquez/docs/lineage-view-dataset.png b/compose-configs/airflow-marquez/docs/lineage-view-dataset.png
new file mode 100644
index 0000000..945c5bc
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/lineage-view-dataset.png differ
diff --git a/compose-configs/airflow-marquez/docs/lineage-view-job-successful.png b/compose-configs/airflow-marquez/docs/lineage-view-job-successful.png
new file mode 100644
index 0000000..f1812f2
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/lineage-view-job-successful.png differ
diff --git a/compose-configs/airflow-marquez/docs/lineage-view-job.png b/compose-configs/airflow-marquez/docs/lineage-view-job.png
new file mode 100644
index 0000000..aa0757e
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/lineage-view-job.png differ
diff --git a/compose-configs/airflow-marquez/docs/search-job-failure.png b/compose-configs/airflow-marquez/docs/search-job-failure.png
new file mode 100644
index 0000000..c8b9113
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/search-job-failure.png differ
diff --git a/compose-configs/airflow-marquez/docs/search.png b/compose-configs/airflow-marquez/docs/search.png
new file mode 100644
index 0000000..a7c0b3a
Binary files /dev/null and b/compose-configs/airflow-marquez/docs/search.png differ
diff --git a/compose-configs/airflow-marquez/openlineage.env b/compose-configs/airflow-marquez/openlineage.env
new file mode 100644
index 0000000..5551fb1
--- /dev/null
+++ b/compose-configs/airflow-marquez/openlineage.env
@@ -0,0 +1,12 @@
+OPENLINEAGE_URL=http://marquez:5050 # The URL of the HTTP backend
+# OPENLINEAGE_URL=https://host.docker.internal:9443/servers/integration-daemon/open-metadata/integration-services/lineage-integrator/users/erinoverview/api/v1/lineage
+
+#OPENLINEAGE_URL=http://host.docker.internal:6000
+OPENLINEAGE_NAMESPACE=for_egeria
+OPENLINEAGE_AIRFLOW_LOGGING=DEBUG
+AIRFLOW_DATABASE_PORT=5436
+AIRFLOW__OPENLINEAGE__NAMESPACE=from_airflow
+#AIRFLOW__OPENLINEAGE__TRANSPORT='{"type" : "kafka", "topic": "open-lineage-events", "config": {"bootstrap.servers": "localhost:9192", "acks" : "all", "retries": 3}, "flush" : true}'
+#AIRFLOW__OPENLINEAGE__CONFIG_PATH=/opt/bitnami/airflow/openlineage.yml
+AIRFLOW__OPENLINEAGE__INCLUDE_FULL_TASK_INFO = True
+
diff --git a/compose-configs/airflow-marquez/openlineage.yml b/compose-configs/airflow-marquez/openlineage.yml
new file mode 100644
index 0000000..b832583
--- /dev/null
+++ b/compose-configs/airflow-marquez/openlineage.yml
@@ -0,0 +1,15 @@
+transport:
+# type: kafka
+# topic: open-lineage-events
+# config:
+# bootstrap.servers: kafka:9192
+# acks: all
+# retries: 3
+# flush: true
+ type: http
+ url: http://host.docker.internal:6000
+ endpoint: api/v1/lineage
+
+
+#transport:
+# type: console
\ No newline at end of file
diff --git a/compose-configs/airflow-marquez/requirements.txt b/compose-configs/airflow-marquez/requirements.txt
new file mode 100644
index 0000000..3d95439
--- /dev/null
+++ b/compose-configs/airflow-marquez/requirements.txt
@@ -0,0 +1,7 @@
+apache-airflow-providers-openlineage
+apache-airflow-providers-postgres
+confluent-kafka>=2.3.0
+openlineage-python[kafka]
+pyegeria
+apache-airflow-providers-apache-kafka
+asgiref
diff --git a/compose-configs/egeria-platform-jupyter-compose/Dockerfile-jupyter b/compose-configs/egeria-platform-jupyter-compose/Dockerfile-jupyter
index 6dfdf97..18217b3 100644
--- a/compose-configs/egeria-platform-jupyter-compose/Dockerfile-jupyter
+++ b/compose-configs/egeria-platform-jupyter-compose/Dockerfile-jupyter
@@ -45,12 +45,13 @@ ARG py_ver=3.12
# echo conda activate "${env_name}" >> "/home/${NB_USER}/.bashrc"
#
#USER ${NB_UID}
-RUN pip install --no-cache-dir 'pyegeria>=0.8.1'
+RUN pip install --no-cache-dir 'pyegeria>=0.8.4.29'
RUN pip install --no-cache-dir 'rich'
RUN pip install --no-cache-dir 'unitycatalog'
RUN pip install --no-cache-dir 'pipx'
RUN pip install --no-cache-dir 'textual'
RUN pip install --no-cache-dir 'textual-dev'
+RUN pip install --no-cache-dir 'apache-airflow-client'
RUN pipx ensurepath
RUN pipx install pyegeria
diff --git a/compose-configs/egeria-platform-jupyter-compose/egeria-platform-jupyter-compose.yaml b/compose-configs/egeria-platform-jupyter-compose/egeria-platform-jupyter-compose.yaml
index 9e43536..493305e 100644
--- a/compose-configs/egeria-platform-jupyter-compose/egeria-platform-jupyter-compose.yaml
+++ b/compose-configs/egeria-platform-jupyter-compose/egeria-platform-jupyter-compose.yaml
@@ -46,15 +46,20 @@ services:
image: 'bitnami/kafka:latest'
ports:
- '9192:9192'
-
+ - '9194:9194'
+ networks:
+ - egeria_network
environment:
- KAFKA_CFG_NODE_ID=0
- KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true
- KAFKA_CFG_PROCESS_ROLES=controller,broker
- KAFKA_CFG_LISTENERS=PLAINTEXT://:9192,CONTROLLER://:9193
+ - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9192
- KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
- KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka:9193
+
- KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER
+
egeria-main:
depends_on:
@@ -63,6 +68,8 @@ services:
image: 'docker.io/odpi/egeria-platform:latest'
ports:
- '9443:9443'
+ networks:
+ - egeria_network
environment:
- XTDB_ENABLE_BYTEUTILS_SHA1=True
- XTDB_DISABLE_LIBCRYPTO=True
@@ -87,6 +94,8 @@ services:
- ../../work:/deployments/work
+
+
# - "YOUR EXTRA LIBRARY DIRECTORY"/extra:/deployments/extra
# - "ANOTHER DIRECTOR TO MOUNT"/:/deployments/user_mount
@@ -101,3 +110,8 @@ volumes:
external: false
db_home:
external: false
+
+networks:
+ egeria_network:
+# driver: bridge
+ external: true
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/.env b/compose-configs/egeria-platform-jupyter-ol-pg-compose/.env
new file mode 100644
index 0000000..b87c2a9
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/.env
@@ -0,0 +1 @@
+ CONFIG_JSON=$(cat config.json)
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-jupyter b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-jupyter
new file mode 100644
index 0000000..18217b3
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-jupyter
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright Contributors to the Egeria project.
+
+# This build script corrects some permission issues needed to run
+# on some enterprise k8s environments. see https://github.com/odpi/egeria-jupyter-notebooks/issues/9
+
+# The published image tag is taken from the numerical version of
+# our base image, and appended with the contents of .tag-append (file)
+FROM quay.io/jupyter/scipy-notebook
+ARG env_name=python312
+ARG py_ver=3.12
+
+# Much of the following is commented out as it is a bit unstable - but it does allow you to install the latest
+# python in the jupyter container - which can then be selected as an alternate kernel within Jupyter.
+
+#RUN mamba create --yes -p "${CONDA_DIR}/envs/${env_name}" \
+# python=${py_ver} \
+# 'ipykernel' \
+# 'jupyterlab' && \
+# mamba clean --all -f -y
+
+#COPY --chown=${NB_UID}:${NB_GID} environment.yml /tmp/
+#RUN mamba env create -p "${CONDA_DIR}/envs/${env_name}" -f /tmp/environment.yml && \
+# mamba clean --all -f -y
+
+
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/python" -m ipykernel install --user --name="${env_name}" && \
+# fix-permissions "${CONDA_DIR}" && \
+# fix-permissions "/home/${NB_USER}"
+#
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pip3" install --no-cache-dir 'pyegeria>=0.5.5.16'
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pip3" install --no-cache-dir 'rich'
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pip3" install --no-cache-dir 'unitycatalog'
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pip3" install --no-cache-dir 'pipx'
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pipx" install pyegeria
+#RUN "${CONDA_DIR}/envs/${env_name}/bin/pipx" ensurepath
+#
+#USER root
+#RUN \
+# # This changes a startup hook, which will activate the custom environment for the process
+# echo conda activate "${env_name}" >> /usr/local/bin/before-notebook.d/10activate-conda-env.sh && \
+# # This makes the custom environment default in Jupyter Terminals for all users which might be created later
+# echo conda activate "${env_name}" >> /etc/skel/.bashrc && \
+# # This makes the custom environment default in Jupyter Terminals for already existing NB_USER
+# echo conda activate "${env_name}" >> "/home/${NB_USER}/.bashrc"
+#
+#USER ${NB_UID}
+RUN pip install --no-cache-dir 'pyegeria>=0.8.4.29'
+RUN pip install --no-cache-dir 'rich'
+RUN pip install --no-cache-dir 'unitycatalog'
+RUN pip install --no-cache-dir 'pipx'
+RUN pip install --no-cache-dir 'textual'
+RUN pip install --no-cache-dir 'textual-dev'
+RUN pip install --no-cache-dir 'apache-airflow-client'
+
+RUN pipx ensurepath
+RUN pipx install pyegeria
+RUN pipx ensurepath
+
+RUN mkdir -p /home/jovyan/workbooks
+RUN chmod +xrw /home/jovyan/workbooks
+COPY ../Start-Here.md /home/jovyan
+COPY ../custom.css /home/.jupyter
+
+
+#RUN echo "export EGERIA_PLATFORM_URL= \'https://host.docker.internal:9443\'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_METADATA_STORE = 'active-metadata-store'" >> "/home/$(NB_USER}/.bashrc"
+#
+#RUN echo "export EGERIA_KAFKA_ENDPOINT = 'localhost:9092'" >> "/home/$(NB_USER}/.bashrc"
+#
+#RUN echo "export EGERIA_VIEW_SERVER = 'view-server'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_VIEW_SERVER_URL = 'https://localhost:9443'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_INTEGRATION_DAEMON = 'integration-daemon'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_INTEGRATION_DAEMON_URL = 'https://localhost:9443'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_ADMIN_USER = 'garygeeke'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_ADMIN_PASSWORD = 'secret'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_USER = 'erinoverview'" >> "/home/$(NB_USER}/.bashrc"
+#RUN echo "export EGERIA_USER_PASSWORD = 'secret'" >> "/home/$(NB_USER}/.bashrc"
+
+# && \
+# fix-permissions "${CONDA_DIR}" && \
+# fix-permissions "/home/${NB_USER}" \
+
+#ENTRYPOINT ["jupyter","notebook"]
+#
+#CMD ["python3", "/config_coco_core.py"]
+#USER root
+
+# Needed to dynamically add the selected user on startup - see link below
+#RUN chmod g+w /etc/passwd
+#
+#RUN chown -R $NB_UID:$NB_GID $HOME
+#
+## https://cloud.redhat.com/blog/jupyter-on-openshift-part-6-running-as-an-assigned-user-id
+#RUN chgrp -Rf root /home/$NB_USER && chmod -Rf g+w /home/$NB_USER && chgrp -Rf root /opt/conda && chmod -Rf g+w /opt/conda
+#
+#USER 1000
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-proxy b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-proxy
new file mode 100644
index 0000000..1d7fb73
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Dockerfile-proxy
@@ -0,0 +1,21 @@
+FROM eclipse-temurin:11 AS base
+WORKDIR /usr/src/app
+COPY gradle gradle
+COPY gradle.properties gradle.properties
+COPY gradlew gradlew
+COPY settings.gradle settings.gradle
+RUN ./gradlew --version
+
+FROM base AS build
+WORKDIR /usr/src/app
+COPY src ./src
+COPY build.gradle build.gradle
+RUN ./gradlew --no-daemon shadowJar
+
+FROM eclipse-temurin:11
+WORKDIR /usr/src/app
+COPY --from=build /usr/src/app/build/libs/openlineage-proxy-*.jar /usr/src/app
+COPY proxy.dev.yml proxy.dev.yml
+COPY docker/entrypoint.sh entrypoint.sh
+EXPOSE 6000 6001
+ENTRYPOINT ["/usr/src/app/entrypoint.sh"]
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/README.md b/compose-configs/egeria-platform-jupyter-ol-pg-compose/README.md
new file mode 100644
index 0000000..0f519f8
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/README.md
@@ -0,0 +1,122 @@
+
+
+
+# Overview
+This directory contains sample Docker Compose scripts to support the deployment of Egeria for experimentation,
+development, and learning. Rather than having to install Egeria, prerequisites and tools separately, these scripts make
+it easy to get a stack running quickly. This deployment extends the **egeria-platform-compose** deployment by adding a Jupyter
+container[Project Jupyter](https://jupyter.org/) where users can use the **pyegeria** python client to work with Egeria.
+
+The git repo is called **egeria-workspaces** because in addition to the core configuration, it contains sample and demonstration content and a place for you
+to do your own experimentation. We've found it convenient to define a number of external mount points for your
+docker volumes to simplify loading sample data, viewing sample results and sharing code. It is easy to tailor the
+configuration for your own needs.
+
+These are not meant for production use. Please see the [Planning Guide](https://egeria-project.org/guides/planning/)
+for more information about designing Egeria deployments. The Egeria community has also created samples for other
+deployment styles, such as Cloud Native approaches and the use of Helm charts to configure Kubernetes clusters. These
+options may be better starting points for production deployments - depending upon your requirements.
+Please feel free to engage with the community on our slack channel - we'd love your feedback and participation.
+
+For a quick and simple environment to explore some of Egeria's base capabilities, the **egeria-platform-jupyter.yaml** Docker Compose
+deployment may be a good starting point. Once this script executes successfully, you will have three docker containers running.
+One for the Egeria platform, one for Kafkaand one for Jupyter. With this running configuration, you can work with any of Egeria's standard interfaces
+- java APIs, python APIs, or just plain RESTful http calls - and of course, to make use of tools and interfaces that have been built using these APIs.
+
+The set of **Docker Compose** configurations will grow and evolve over time to cover additional scenarios. For example,
+the folder `egeria-platform-postgres-compose` contains a docker compose configuration that adds a Postgres
+database along with the Egeria OMAG platform and Kafka servers. This sets the stage emerging scenarios that
+utilize a relational database to collect Egeria derived information such as Audit logs for additional analysis and dashboarding.
+Please see the embedded README.md files for more details.
+
+The docker compose script is called **egeria-platform-jupyter-compose.yaml**. After running this script, you will have a running environment
+that consists of a single Egeria runtime platform,the Apache Kafka event system and a Jupyter server. Information about configuring
+Egeria can be found at [Configuring Egeria](https://egeria-project.org/guides/admin/configuring-the-omag-server-platform/).
+We use standard, out-of-the-box configurations for both - a minimal amount of configuration for:
+
+## Egeria Platform - Default Configuration
+We use the Egeria platform docker image - [egeria-platform](https://hub.docker.com/r/odpi/egeria-platform).
+
+* Port - By default the platform uses port 9443 and exposes this port to the host environment, This means that Egeria requests
+can be made to platform URL **https://localhost:9443** or, if your environment is configured to support it, it can use
+the domain name of your host machine.
+* SSL - By default strict SSL is set to false
+* Content Packs - pre-constructed information sets that can be used to configure Egeria and pre-load metadata, reference data and glossary data. See [Content Packs](https://egeria-project.org/content-packs/).
+* Out-of-the-box Connectors - descriptions of the integration connectors can be found at [Integration Connectors](https://egeria-project.org/connectors/).
+
+* Auto-Started Servers - by default a useful set of Egeria Operational Metadata and Governance (OMAG) servers are pre-installed
+and started when the Egeria platform is started. A description of these servers can be found at [sample configs](open-metadata-resources/open-metadata-deployment/sample-configs/README.md).
+The pre-configured and started servers are:
+
+ * simple-metadata-store
+ * active-metadata-store
+ * engine-host
+ * integration-daemon
+ * view-server
+
+
+* Mounted volumes for:
+ * **distribution-hub**: an area where information created by Egeria (such as logs and survey information) can be easily exposed.
+ * **egeria-platform-data**: this is a default location to hold your metadata repository when using the out of the box repository configuration. This has been externalized so that you can easily preserve your repository independently of docker.
+ * **landing-area**: a convenient drop off point for files and folders you want to survey, analyze and perhaps catalog with Egeria.
+ * **landing-bay**:
+
+
+
+## Kafka - configured for Egeria
+We use the bitnami/kafka image described at [kafka](https://hub.docker.com/r/bitnami/kafka)
+* Port - We use the default port of 9192 for Kafka. This port is also exposed in the host environment. Changing this port also requires corresponding changes to the Egeria configuration.
+* Other configuration can be seen in the *egeria-platform.yaml* file.
+
+## Jupyter - configured for Egeria
+A standard Jupyter data science docker image is extended to pre-install **pyegeria** and simplify using Egeria from Jupyter notebooks.
+* Mounted volumes for:
+ * **landing-area**: a convenient drop off point for files and folders you want to survey, analyze and perhaps catalog with Egeria.
+ * **distribution-hub**: an area where information created by Egeria (such as logs and survey information) can be easily exposed.
+ * **work**: a place for you to put your code and other artifacts.
+ * **workbooks**: an area where we have put some Jupyter notebooks and related information to help you complete common tasks with Egeria.
+
+# Usage
+Follow these steps to use Docker Compose.
+
+1. Install and Configure Docker and Docker Compose.
+ * Docker and Docker compose must be installed and running - see https://docs.docker.com/install/
+ * Configure docker with at least 8GB memory
+2. Download or clone the egeria-workspaces repo at [**egeria-workspaces**](https://github.com/odpi/egeria-workspaces.git)
+3. In a terminal window, change directory to `/egeria-workspaces/egeria-platform-jupyter-compose`
+4. At the command line issue:
+
+ `docker compose -f egeria-platform-jupyter-compose.yaml up --build`
+
+This will:
+ a. build a jupyter image that is pre-configured to work with Egeria
+ b. download the docker images for Kafka and Egeria, and then create and start the two containers. Both kafka and Egeria will then automatically configure themselves.
+For Egeria, this means not only starting up the initial set of servers, but then loading the **CoreContentPack.omarchive** into the metadata repository, and then configuring all the servers. This can take several minutes the first time the containers are created. Subsequent startups will be much faster.
+ c. start the jupyter container
+4. Using either the **docker desktop** application or the docker command line you can see the two new containers running. To do this with the docker command line, you can issue:
+
+`docker ps`
+
+5. The environment is ready to be used.
+
+6. You can control the containers with docker compose commands - see [docker compose](https://docs.docker.com/reference/cli/docker/compose/). These commands can be used to administer and use the docker containers.
+7. To access jupyter, open a browser to `http://localhost:8888`. At the password prompt, enter `egeria`. This should open up your notebook environment.
+
+## Next Steps
+
+Now that your Egeria environment is running and configured it is waiting for you to make requests.
+Some tutorials for working with Egeria can be found at [Tutorials](https://egeria-project.org/education/tutorials/). For those that want to try the new python client, you can find a quick introduction at [pyegeria](https://getting-started-with-egeria.pdr-associates.com/recipe-6-charming-python.html).
+
+As always, your feedback and participation are welcome.
+
+
+License: CC BY 4.0, Copyright Contributors to the ODPi Egeria project.
+
+
+
+
+
+
+----
+License: [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/),
+Copyright Contributors to the ODPi Egeria project.
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/Start-Here.md b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Start-Here.md
new file mode 100644
index 0000000..4b85d22
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/Start-Here.md
@@ -0,0 +1,16 @@
+
+
+# Using the Egeria-Platform-Jupyter-Compose Environment
+
+If you are viewing this file in a Jupyter server then now you are ready to explore and use this basic Egeria environment.
+
+If you are running Jupyter, on the left hand navigation panel you should see two folders:
+* work - for your own notebooks.
+* workbooks - contains some starter jupyter notebooks and demos.
+
+These folders reside outside of the container and by default located in the `egeria-workspaces` directory that contains the scripts used to start up this environment. These locations can be changed by altering the **Docker Compose** script
+`egeria-platform-jupyter-compose.yaml` using a text editor.
+
+
+
+For more information, please see [Egeria Docker Compose](https://egeria-project.org/education/open-metadata-labs/overview)
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/application.properties b/compose-configs/egeria-platform-jupyter-ol-pg-compose/application.properties
new file mode 100755
index 0000000..5ce5b75
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/application.properties
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright Contributors to the ODPi Egeria project.
+
+# ========================================================================================================
+# The application.properties file is used to configure the OMAG Server Platform which is implemented as
+# a spring boot application.
+
+###############################################
+### Default port for the OMAG Server Platform
+################################################
+server.port=9443
+
+###############################################
+### Set up the configuration document store the OMAG Server Platform
+### The values below will set up the clear text config document store
+### (the default is the encrypted config document store).
+################################################
+#platform.configstore.provider=org.odpi.openmetadata.adapters.adminservices.configurationstore.file.FileBasedServerConfigStoreProvider
+#platform.configstore.endpoint=data/servers/{0}/config/{0}.config
+
+###############################################
+### Set up the platform metadata security connector that provides authorization
+### for platform administration, server operations and diagnostic calls.
+### By default, there is no platform metadata security connector.
+### The values below are for a sample platform metadata security connector where the only userId
+### that is permitted to use the administration and platform services is `garygeeke`.
+###############################################
+#platform.security.provider=org.odpi.openmetadata.metadatasecurity.samples.CocoPharmaPlatformSecurityProvider
+#platform.security.name=Coco Pharmaceuticals Platform
+
+###############################################
+### Set up the default configuration document for any new OMAG Server configurations.
+###############################################
+#platform.default.config.document=\
+# {\
+# "class": "OMAGServerConfig",\
+# "organizationName": "myOrg",\
+# "maxPageSize": 1200,\
+# "eventBusConfig": \
+# {\
+# "class": "EventBusConfig",\
+# "topicURLRoot": "egeria.omag",\
+# "configurationProperties":\
+# {\
+# "producer": {"bootstrap.servers": "{{kafkaEndpoint}}"},\
+# "consumer": {"bootstrap.servers": "{{kafkaEndpoint}}"}\
+# }\
+# }\
+# }
+
+################################################
+### Placeholder variables are added to field values in the configuration document
+### when the server is being configured using double curly braces. They are replaced by
+### the values specified in platform.placeholder.variables each time the server starts up.
+###
+### The "kafkaEndpoint" value is the Apache Kafka endpoint, and it is used in the active-metadata-store
+### sample configuration. If your Apache Kafka broker is listening on a different endpoint
+### and you want to use active-metadata-store, change this variable to your Apache Kafka's address.
+################################################
+platform.placeholder.variables=\
+ {\
+ "kafkaEndpoint" : "localhost:9092",\
+ "egeriaEndpoint" : "https://localhost:9443"\
+ }
+
+################################################
+### startup servers configuration
+################################################
+#userId used to start up the list of configured servers default is 'system'
+startup.user=system
+# Comma separated names of servers to be started. The server names should be unquoted.
+#git startup.server.list=active-metadata-store,engine-host,integration-daemon,view-server,simple-metadata-store
+
+################################################
+### SSL security.
+# The keystore determines the information sent out by the server to identify itself.
+# The truststore is where the certificates of trusted servers the platform is calling are located.
+# (Note SSL certificate checking is performed on client-side only.)
+################################################
+server.ssl.key-store=keystore.p12
+server.ssl.key-store-password=egeria
+server.ssl.keyStoreType=PKCS12
+server.ssl.keyAlias=egeriaserverchassis
+
+server.ssl.trust-store=truststore.p12
+server.ssl.trust-store-password=egeria
+
+# WARNING! setting 'strict.ssl=false' allows java clients to open https connections without checking the validity of
+# certificates from the servers it is calling.
+# Alternate you can import self-signed certificates into java truststore or set up a truststore only for this app
+# by adding the store into server.ssl.trust-store parameter
+strict.ssl=true
+
+
+################################################
+# User security
+################################################
+
+# Authentication source (possible values: demo, ldap, ad)
+authentication.source=demo
+# Authentication mode (possible values: session,token,redis)
+authentication.mode=token
+
+#token timeout in minutes
+token.timeout=15
+token.absolute.timeout=720
+token.secret=doNotTell
+
+#LDAP authentication
+
+ldap.domain=
+ldap.user.search.base=ou=people,dc=egeria,dc=com
+ldap.user.search.filter=uid={0}
+ldap.group.search.base=ou=Groups,dc=egeria,dc=com
+ldap.group.search.filter=member={0}
+ldap.url=ldap://localhost:389
+ldap.group.role.attribute=
+ldap.npa.dn=
+ldap.npa.password=
+#ldap.user.dn.patterns patterns is a list of values separated by ";" as comma is used in the ldap pattern
+ldap.user.dn.patterns=
+
+# Redis configuration
+#redis.host=localhost
+#redis.port=6379
+
+################################################
+### Additional demo users configuration for when authentication.source=demo
+### This file is located in the resources folder of the user-authn module and built into its runtime jar
+################################################
+spring.config.import=classpath:demo-users.yml
+
+################################################
+### Comma separated list of header names to extract from incoming HTTP requests and add to thread local.
+### The default value is null and adds no headers.
+### Setting the list to * means all headers are captured.
+### Otherwise, list the header name in use.
+################################################
+authn.header.name.list=
+
+################################################
+### CORS
+################################################
+# Comma-separated list of origins.
+# Example configuration below is for setting up local development environment where egeria-ui is hosted on one of the two urls.
+# cors.allowed-origins=http://localhost,http://localhost:8081
+cors.allowed-origins=*
+
+################################################
+# landing page (/api/public/app/info)
+################################################
+app.description=Have a question? || Get in touch via our Slack community https://slack.lfai.foundation/ @@What is Open Metadata? || Find out more on our website https://egeria-project.org/ @@Have more cool ideas? || Feel free to let us know your ideas so we can make it better.
+app.title=Egeria Open Metadata | Find the right data with governance
+
+# ##############################################################
+# Component visibility for Role based access ###################
+# ##############################################################
+# How it works?
+#
+# The roles are defined in external authentication source (provider) configured with `authentication.source`.
+# For demo purposes, we are providing simple file based authentication provider. See demo-users.yml
+# The matrix controlling what components are allowed on the UI views for specific role is defined in the 'role.visibleComponents' prefixed properties as follows"
+#
+# role.visibleComponents.{ROLE-1}={component-name-1}
+# role.visibleComponents.{ROLE-2}={component-name-1},{component-name-2}
+#
+# This will configure the application to show the component named 'component-name-1' for all uses assigned to 'ROLE-1'
+# In the same way, users that have assigned 'ROLE-2' can see more 'component-name-1' and 'component-name-2'.
+# It is also possible to use wildcard '*' to enable full visibility of all components to users in the given role.
+#
+# Complete list of components names that can be used:
+#
+# about
+# asset-catalog
+# asset-details
+# asset-details-print
+# glossary
+# repository-old-explorer
+# type-explorer
+# asset-lineage
+# asset-lineage-print
+# end-to-end
+# ultimate-source
+# ultimate-destination
+# vertical-lineage
+#
+# Below is the default configuration for the two COCO_PHARMA roles we use for demo:
+
+role.visibleComponents.COCO_PHARMA_USER=about,asset-catalog,asset-details,asset-details-print,asset-lineage,asset-lineage-print,end-to-end,ultimate-source,ultimate-destination,vertical-lineage,glossary,repository-explorer
+role.visibleComponents.COCO_PHARMA_ADMIN=*
+
+################################################
+### Which java packages should be scanned to locate the Spring resource definitions that define the REST APIs?
+################################################
+scan.packages=org.odpi.openmetadata.*
+
+################################################
+### Logging
+################################################
+logging.level.root=OFF
+logging.level.org.springframework=ERROR
+logging.level.org.springframework.boot.web.embedded.tomcat=INFO
+logging.level.org.odpi.openmetadata.platformchassis.springboot=INFO
+#tracing REST calls
+#logging.level.org.odpi.openmetadata.commonservices.ffdc.RESTCallLogger=DEBUG
+
+################################################
+### Swagger Docs
+################################################
+springdoc.version='@springdoc.version@'
+springdoc.api-docs.enabled=true
+springdoc.api-docs.path=/v3/api-docs
+springdoc.swagger-ui.path=/swagger-ui.html
+springdoc.swagger-ui.displayRequestDuration=true
+springdoc.swagger-ui.tagsSorter=alpha
+springdoc.swagger-ui.operationsSorter=alpha
+springdoc.swagger-ui.docExpansion=none
+
+################################################
+### Spring Boot Actuator
+################################################
+# Endpoints web configuration
+#management.endpoints.web.exposure.include=*
+management.health.cassandra.enabled=false
+management.health.redis.enabled=false
+management.health.ldap.enabled=false
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/before-notebook.d/config-jupyter.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/before-notebook.d/config-jupyter.sh
new file mode 100755
index 0000000..978287e
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/before-notebook.d/config-jupyter.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: Apache-2.0
+# Copyright Contributors to the ODPi Egeria project.
+#
+# Coco Pharmaceuticals Lab Docker Compose configuration
+#
+# This shell script is automatically invoked when the Jupyter Container is initiated as part of the Docker Compose
+# configuration.The script configures and activates the Egeria OMAG servers on each of the three Egeria OMAG Server
+# Platforms that are part of the sample Coco Pharmaceuticals deployment environment.
+#
+#
+# The following line is useful if you want to configure an alternate version of python - note that you need to make
+# corresponding changes in the Dockerfile-jupyter file.
+#
+#/opt/conda/bin/activate python312
+
+export EGERIA_METADATA_STORE="active-metadata-store"
+export EGERIA_KAFKA_ENDPOINT='host.docker.internal:9192'
+export EGERIA_PLATFORM_URL='https://host.docker.internal:9443'
+export EGERIA_VIEW_SERVER='view-server'
+export EGERIA_VIEW_SERVER_URL='https://host.docker.internal:9443'
+export EGERIA_INTEGRATION_DAEMON='integration-daemon'
+export EGERIA_INTEGRATION_DAEMON_URL='https://host.docker.internal:9443'
+export EGERIA_ENGINE_HOST='engine-host'
+export EGERIA_ENGINE_HOST_URL='https://host.docker.internal:9443'
+export EGERIA_ADMIN_USER='garygeeke'
+export EGERIA_ADMIN_PASSWORD='secret'
+export EGERIA_USER='erinoverview'
+export EGERIA_USER_PASSWORD='secret'
+export EGERIA_JUPYTER='True'
+export EGERIA_WIDTH='200'
+
+
+
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/build.gradle b/compose-configs/egeria-platform-jupyter-ol-pg-compose/build.gradle
new file mode 100644
index 0000000..3fd0cac
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/build.gradle
@@ -0,0 +1,221 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+import com.github.jengelman.gradle.plugins.shadow.transformers.ServiceFileTransformer
+import org.apache.tools.ant.filters.*
+
+plugins {
+ id 'application'
+ id 'jacoco'
+ id 'java'
+ id 'maven-publish'
+ id 'signing'
+ id 'com.adarshr.test-logger' version '2.1.0'
+ id 'com.diffplug.spotless' version '6.12.0'
+ id 'com.github.johnrengelman.shadow' version '8.1.1'
+ id "pmd"
+ id 'io.github.gradle-nexus.publish-plugin' version '2.0.0'
+}
+
+group = "io.openlineage"
+
+pmd {
+ consoleOutput = true
+ toolVersion = "6.46.0"
+ rulesMinimumPriority = 5
+ ruleSetFiles = rootProject.files("pmd-openlineage.xml")
+ ruleSets = []
+ ignoreFailures = true
+}
+
+pmdMain {
+ reports {
+ html.required = true
+ }
+}
+
+java {
+ sourceCompatibility = JavaVersion.VERSION_11
+ targetCompatibility = JavaVersion.VERSION_11
+}
+
+repositories {
+ mavenLocal()
+ mavenCentral()
+}
+
+ext {
+ dropwizardVersion = '2.0.28'
+ jacocoVersion = '0.8.12'
+ lombokVersion = '1.18.22'
+ kafkaVersion = '2.8.0'
+ jacksonVersion = '2.12.2'
+ isReleaseVersion = !version.endsWith('SNAPSHOT')
+}
+
+dependencies {
+ implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}"
+ implementation "io.dropwizard:dropwizard-json-logging:${dropwizardVersion}"
+ implementation 'com.google.guava:guava:30.1.1-jre'
+ implementation "org.projectlombok:lombok:${lombokVersion}"
+ implementation("org.apache.kafka:kafka-clients:2.8.0")
+ implementation("com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}")
+ implementation("com.fasterxml.jackson.core:jackson-annotations:${jacksonVersion}")
+ implementation("com.fasterxml.jackson.core:jackson-core:${jacksonVersion}")
+ annotationProcessor "org.projectlombok:lombok:${lombokVersion}"
+
+ testImplementation "io.dropwizard:dropwizard-testing:${dropwizardVersion}"
+}
+
+sourceSets {
+ main {
+ java {
+ srcDirs = ['src/main/java']
+ }
+ }
+ test {
+ java {
+ srcDirs = ['src/test/java']
+ }
+ }
+}
+
+compileJava {
+ options.incremental = true
+ options.compilerArgs << '-parameters'
+ options.encoding = 'UTF-8'
+}
+
+compileTestJava {
+ options.incremental = true
+ options.compilerArgs << '-parameters'
+ options.encoding = 'UTF-8'
+}
+
+task sourceJar(type: Jar) {
+ archiveClassifier = 'sources'
+ from sourceSets.main.allJava
+}
+
+task javadocJar(type: Jar, dependsOn: javadoc) {
+ archiveClassifier = 'javadoc'
+ from javadoc.destinationDir
+}
+
+publishing {
+ publications {
+ mavenJava(MavenPublication) {
+ groupId = 'io.openlineage'
+ artifactId = 'openlineage-proxy'
+
+ from components.java
+
+ artifact sourceJar
+ artifact javadocJar
+
+ pom {
+ name = 'openlineage-proxy'
+ description = 'Proxy backend for OpenLineage'
+ url = 'https://github.com/OpenLineage/OpenLineage'
+ licenses {
+ license {
+ name = 'The Apache License, Version 2.0'
+ url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
+ }
+ }
+ developers {
+ developer {
+ id = 'openlineage'
+ name = 'OpenLineage Project'
+ }
+ }
+ scm {
+ connection = 'scm:git:git://github.com/OpenLineage/OpenLineage.git'
+ developerConnection = 'scm:git:ssh://github.com:OpenLineage/OpenLineage.git'
+ url = 'https://github.com/OpenLineage/OpenLineage'
+ }
+ }
+ }
+ }
+
+ processResources {
+ filter ReplaceTokens, tokens: [
+ "version": project.property("version")
+ ]
+ }
+}
+
+nexusPublishing {
+ repositories {
+ sonatype {
+ username = System.getenv('RELEASE_USERNAME')
+ password = System.getenv('RELEASE_PASSWORD')
+ }
+ }
+}
+
+signing {
+ required { isReleaseVersion }
+ def signingKey = findProperty("signingKey")
+ def signingPassword = findProperty("signingPassword")
+ useInMemoryPgpKeys(signingKey, signingPassword)
+ sign publishing.publications.mavenJava
+}
+
+mainClassName = 'io.openlineage.proxy.ProxyApp'
+
+shadowJar {
+ archiveClassifier = ''
+ version = project.version
+ transform(ServiceFileTransformer)
+ dependsOn(distTar, distZip)
+ manifest {
+ attributes(
+ 'Created-By': "Gradle ${gradle.gradleVersion}",
+ 'Built-By': System.getProperty('user.name'),
+ 'Build-Jdk': System.getProperty('java.version'),
+ 'Implementation-Title': project.name,
+ 'Implementation-Version': project.version,
+ 'Main-Class': mainClassName)
+ }
+}
+
+tasks.named("assemble") {
+ dependsOn(tasks.named("shadowJar"))
+}
+
+tasks.named("shadowJar") {
+ dependsOn(tasks.named("jar"))
+}
+
+runShadow {
+ args = ['server', 'proxy.yml']
+}
+
+spotless {
+ java {
+ googleJavaFormat()
+ removeUnusedImports()
+ }
+}
+
+def reportsDir = "${buildDir}/reports";
+def coverageDir = "${reportsDir}/coverage";
+
+jacoco {
+ toolVersion = "${jacocoVersion}"
+ reportsDir = file(coverageDir)
+}
+
+jacocoTestReport {
+ reports {
+ xml {
+ enabled true
+ }
+ html {
+ enabled true
+ destination = file(coverageDir)
+ }
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/custom.css b/compose-configs/egeria-platform-jupyter-ol-pg-compose/custom.css
new file mode 100644
index 0000000..e4c7f2f
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/custom.css
@@ -0,0 +1,3 @@
+.container {
+ width: 90% !important;
+}
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql
new file mode 100644
index 0000000..34696ec
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql
@@ -0,0 +1,161 @@
+create user egeria_admin with superuser login password 'admin4egeria';
+create user egeria_user with login password 'user4egeria';
+create database egeria_observations;
+
+grant all privileges on database egeria_observations to egeria_admin, egeria_user;
+
+
+\c egeria_observations;
+create schema open_metadata;
+create schema audit_log;
+create schema surveys;
+grant all on schema open_metadata, audit_log, surveys to egeria_admin, egeria_user;
+
+
+DROP TABLE IF EXISTS audit_log.al_api_calls;
+CREATE TABLE audit_log.al_api_calls (thread_id BIGINT NOT NULL, server_name TEXT NOT NULL, user_name TEXT NOT NULL, operation_name TEXT NOT NULL, service_name TEXT NOT NULL, call_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL);
+COMMENT ON TABLE audit_log.al_api_calls IS 'Calls to Egeria REST APIs';
+COMMENT ON COLUMN audit_log.al_api_calls.thread_id IS 'Thread running the request';
+COMMENT ON COLUMN audit_log.al_api_calls.server_name IS 'Name of the called server';
+COMMENT ON COLUMN audit_log.al_api_calls.user_name IS 'Identifier of calling user';
+COMMENT ON COLUMN audit_log.al_api_calls.operation_name IS 'Name of the called method';
+COMMENT ON COLUMN audit_log.al_api_calls.service_name IS 'The service supporting the called method';
+COMMENT ON COLUMN audit_log.al_api_calls.call_time IS 'Time that the request was made.';
+DROP TABLE IF EXISTS audit_log.al_asset_activity;
+CREATE TABLE audit_log.al_asset_activity (thread_id BIGINT NOT NULL, server_name TEXT NOT NULL, call_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, asset_operation TEXT NOT NULL, asset_guid TEXT NOT NULL, asset_type TEXT NOT NULL, operation_name TEXT NOT NULL, service_name TEXT NOT NULL, user_name TEXT NOT NULL);
+COMMENT ON TABLE audit_log.al_asset_activity IS 'User activity around an om_asset';
+COMMENT ON COLUMN audit_log.al_asset_activity.thread_id IS 'Thread where the request ran';
+COMMENT ON COLUMN audit_log.al_asset_activity.server_name IS 'Name of the called server';
+COMMENT ON COLUMN audit_log.al_asset_activity.call_time IS 'Time that the request was made';
+COMMENT ON COLUMN audit_log.al_asset_activity.asset_operation IS 'Create, Update, Delete, Attachment, Feedback';
+COMMENT ON COLUMN audit_log.al_asset_activity.asset_guid IS 'Unique identifier of the om_asset';
+COMMENT ON COLUMN audit_log.al_asset_activity.asset_type IS 'Type of the om_asset';
+COMMENT ON COLUMN audit_log.al_asset_activity.operation_name IS 'Called method';
+COMMENT ON COLUMN audit_log.al_asset_activity.service_name IS 'Name of the called service';
+COMMENT ON COLUMN audit_log.al_asset_activity.user_name IS 'Name of the requesting user.';
+DROP TABLE IF EXISTS audit_log.al_audit_events;
+CREATE TABLE audit_log.al_audit_events (message_ts TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, server_name TEXT, action_description TEXT, severity_code TEXT, severity TEXT, message_id TEXT, message_text TEXT, message_parameters TEXT, system_action TEXT, user_action TEXT, exception_class_name TEXT, exception_message TEXT, exception_stacktrace TEXT, organization TEXT, component_name TEXT, additional_info TEXT, log_record_id TEXT NOT NULL, thread_id BIGINT, CONSTRAINT audit_events_pk PRIMARY KEY (log_record_id, message_ts));
+COMMENT ON COLUMN audit_log.al_audit_events.thread_id IS 'Thread where the request ran';
+DROP TABLE IF EXISTS audit_log.al_egeria_components;
+CREATE TABLE audit_log.al_egeria_components (component_id INTEGER NOT NULL, development_status CHARACTER VARYING(20), component_name TEXT, component_description TEXT, component_wiki_url TEXT, CONSTRAINT egeriacomponents_ix1 UNIQUE (component_id));
+DROP TABLE IF EXISTS audit_log.al_egeria_exceptions;
+CREATE TABLE audit_log.al_egeria_exceptions (exception_class_name TEXT NOT NULL, exception_message TEXT NOT NULL, system_action TEXT NOT NULL, user_action TEXT NOT NULL, message_ts TEXT NOT NULL, log_record_id TEXT NOT NULL, CONSTRAINT egeria_exceptions_pk PRIMARY KEY (log_record_id));
+COMMENT ON COLUMN audit_log.al_egeria_exceptions.message_ts IS 'Timestamp of log record';
+COMMENT ON COLUMN audit_log.al_egeria_exceptions.log_record_id IS 'Unique identifier of the reporting log record.';
+DROP TABLE IF EXISTS audit_log.al_omag_servers;
+CREATE TABLE audit_log.al_omag_servers (server_name TEXT NOT NULL, server_type TEXT, organization TEXT, metadata_collection_id TEXT, CONSTRAINT omag_servers_pk PRIMARY KEY (server_name));
+COMMENT ON COLUMN audit_log.al_omag_servers.server_name IS 'Name of the server';
+COMMENT ON COLUMN audit_log.al_omag_servers.server_type IS 'Type of server';
+COMMENT ON COLUMN audit_log.al_omag_servers.organization IS 'Name ofthe organization that runs this server.';
+COMMENT ON COLUMN audit_log.al_omag_servers.metadata_collection_id IS 'Identifier for the metadata collection beinf maintained by this server';
+
+DROP TABLE IF EXISTS open_metadata.om_asset;
+CREATE TABLE open_metadata.om_asset (resource_name TEXT, resource_description TEXT, version_id TEXT, display_name TEXT, display_description TEXT, asset_guid TEXT NOT NULL, qualified_name TEXT NOT NULL, display_summary TEXT, abbrev TEXT, usage TEXT, additional_properties TEXT, owner_guid TEXT, owner_type TEXT, origin_org_guid TEXT, origin_biz_cap_guid TEXT, zone_names TEXT, asset_type TEXT NOT NULL, resource_loc_guid TEXT, confidentiality INTEGER, confidence INTEGER, criticality INTEGER, metadata_collection_id TEXT NOT NULL, license_guid TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, last_update_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, last_updated_by TEXT, creation_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, created_by TEXT, maintained_by TEXT, archived TIMESTAMP(6) WITHOUT TIME ZONE, tags TEXT, semantic_term TEXT, PRIMARY KEY (asset_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_asset IS 'Assets catalogued in the Egeria ecosystem';
+COMMENT ON COLUMN open_metadata.om_asset.sync_time IS 'The time at which egeria update this row.';
+DROP TABLE IF EXISTS open_metadata.om_asset_types;
+CREATE TABLE open_metadata.om_asset_types (leaf_type TEXT NOT NULL, type_description TEXT, super_types JSONB, CONSTRAINT asset_types_pk PRIMARY KEY (leaf_type));
+DROP TABLE IF EXISTS open_metadata.om_certification_type;
+CREATE TABLE open_metadata.om_certification_type (certification_type_guid TEXT NOT NULL, certification_title TEXT NOT NULL, certification_summary TEXT, PRIMARY KEY (certification_type_guid));
+COMMENT ON TABLE open_metadata.om_certification_type IS 'map certification guids to names';
+DROP TABLE IF EXISTS open_metadata.om_certifications;
+CREATE TABLE open_metadata.om_certifications (referenceable_guid TEXT NOT NULL, certification_guid TEXT NOT NULL, certification_type_guid TEXT NOT NULL, start_date DATE, end_date DATE, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (certification_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_certifications IS 'om_certifications associated wtih assets';
+DROP TABLE IF EXISTS open_metadata.om_collaboration_activity;
+CREATE TABLE open_metadata.om_collaboration_activity (element_guid TEXT NOT NULL, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, num_comments INTEGER, num_ratings INTEGER, avg_rating INTEGER, num_tags INTEGER, num_likes INTEGER, PRIMARY KEY (element_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_collaboration_activity IS 'Track the user feedback over time';
+COMMENT ON COLUMN open_metadata.om_collaboration_activity.element_guid IS 'Either an om_asset or glossary element';
+COMMENT ON COLUMN open_metadata.om_collaboration_activity.sync_time IS 'The last time that the information was updated in the database by Egeria.';
+DROP TABLE IF EXISTS open_metadata.om_context_event_types;
+CREATE TABLE open_metadata.om_context_event_types (guid TEXT NOT NULL, qualified_name TEXT NOT NULL, ce_type_name TEXT NOT NULL, description TEXT);
+DROP TABLE IF EXISTS open_metadata.om_context_events;
+CREATE TABLE open_metadata.om_context_events (guid TEXT NOT NULL, qualified_name TEXT NOT NULL, display_name TEXT NOT NULL, description TEXT, event_effect TEXT, context_event_type TEXT NOT NULL, planned_start_date DATE, planned_duration NUMERIC, actual_duration NUMERIC, repeat_interval NUMERIC, planned_completion_date DATE, actual_completion_date DATE, reference_effective_from DATE, reference_effective_to DATE, additional_properties TEXT);
+DROP TABLE IF EXISTS open_metadata.om_contributions;
+CREATE TABLE open_metadata.om_contributions (user_guid TEXT NOT NULL, snapshot_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, karma_points BIGINT, PRIMARY KEY (user_guid, snapshot_timestamp));
+COMMENT ON TABLE open_metadata.om_contributions IS 'This reflects the om_contributions per user over time.';
+DROP TABLE IF EXISTS open_metadata.om_correlation_properties;
+CREATE TABLE open_metadata.om_correlation_properties (external_identifier TEXT NOT NULL, last_updated_by TEXT, last_update_time TIMESTAMP(6) WITHOUT TIME ZONE, created_by TEXT, version BIGINT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE, type_name TEXT, egeria_owned BOOLEAN NOT NULL, additional_properties TEXT, element_guid TEXT NOT NULL, external_source_guid TEXT NOT NULL, last_confirmed_sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (external_identifier, element_guid, external_source_guid, last_confirmed_sync_time));
+COMMENT ON TABLE open_metadata.om_correlation_properties IS 'most of the information comes from external_id entity that represents an instance in a 3rd party catalog. This includes the user information from that third party.';
+DROP TABLE IF EXISTS open_metadata.om_data_fields;
+CREATE TABLE open_metadata.om_data_fields (data_field_guid TEXT NOT NULL, data_field_name TEXT, version_id CHARACTER VARYING(80), semantic_term CHARACTER VARYING(80), has_profile BOOLEAN, confidentiality_level INTEGER, asset_qualified_name TEXT, asset_guid TEXT NOT NULL, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (data_field_guid, sync_time));
+DROP TABLE IF EXISTS open_metadata.om_department;
+CREATE TABLE open_metadata.om_department (dep_id CHARACTER VARYING(40) NOT NULL, dep_name TEXT, manager CHARACTER VARYING(40), parent_department TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (dep_id, sync_time));
+COMMENT ON TABLE open_metadata.om_department IS 'Maps Department codes to Department names';
+DROP TABLE IF EXISTS open_metadata.om_external_audit_logs;
+CREATE TABLE open_metadata.om_external_audit_logs (metadata_collection_id TEXT NOT NULL, external_identifier TEXT NOT NULL, activity_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, activity_type TEXT, user_id TEXT, event_key TEXT, PRIMARY KEY (metadata_collection_id, external_identifier, activity_timestamp));
+DROP TABLE IF EXISTS open_metadata.om_external_user;
+CREATE TABLE open_metadata.om_external_user (metadata_collection_id TEXT NOT NULL, external_user TEXT NOT NULL, user_id_guid TEXT, start_time TIMESTAMP(6) WITHOUT TIME ZONE, end_time TIMESTAMP(6) WITHOUT TIME ZONE, PRIMARY KEY (metadata_collection_id, external_user));
+COMMENT ON TABLE open_metadata.om_external_user IS 'Capture the user information from external systems that may or may not have mapped identities with Egeria.';
+DROP TABLE IF EXISTS open_metadata.om_glossary;
+CREATE TABLE open_metadata.om_glossary (glossary_name TEXT, glossary_language TEXT, classifications TEXT, glossary_description TEXT, glossary_guid TEXT NOT NULL, qualified_name TEXT NOT NULL, number_terms BIGINT, number_categories INTEGER, num_linked_terms BIGINT, usage TEXT, additional_properties TEXT, owner_guid TEXT, owner_type TEXT, metadata_collection_id TEXT, license_guid TEXT, last_update_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, creation_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (glossary_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_glossary IS 'Glossaries catalogued in the Egeria ecosystem';
+COMMENT ON COLUMN open_metadata.om_glossary.last_update_timestamp IS 'This is the last update time from the glossary element.';
+DROP TABLE IF EXISTS open_metadata.om_license;
+CREATE TABLE open_metadata.om_license (license_guid TEXT NOT NULL, license_name TEXT, license_description TEXT, PRIMARY KEY (license_guid));
+DROP TABLE IF EXISTS open_metadata.om_location;
+CREATE TABLE open_metadata.om_location (location_guid TEXT NOT NULL, location_name TEXT, location_type TEXT, PRIMARY KEY (location_guid));
+DROP TABLE IF EXISTS open_metadata.om_metadata_collection;
+CREATE TABLE open_metadata.om_metadata_collection (metadata_collection_id TEXT NOT NULL, metadata_collection_name TEXT, metadata_collection_type CHARACTER VARYING(40), deployed_impl_type TEXT, PRIMARY KEY (metadata_collection_id));
+COMMENT ON COLUMN open_metadata.om_metadata_collection.metadata_collection_type IS 'local cohort vs external source etc - instance provenance type';
+COMMENT ON COLUMN open_metadata.om_metadata_collection.deployed_impl_type IS 'This is the type of system (postgres vs db2 vs atlas)';
+DROP TABLE IF EXISTS open_metadata.om_reference_levels;
+CREATE TABLE open_metadata.om_reference_levels (identifier INTEGER NOT NULL, classification_name TEXT NOT NULL, display_name TEXT, text TEXT, PRIMARY KEY (identifier, classification_name));
+COMMENT ON TABLE open_metadata.om_reference_levels IS 'A table to hold the different reference levels for confidentiality, confidence, criticality etc.';
+DROP TABLE IF EXISTS open_metadata.om_related_assets;
+CREATE TABLE open_metadata.om_related_assets (end1_guid TEXT NOT NULL, end2_guid TEXT NOT NULL, end1_attribute_nm TEXT, end2_attribute_nm TEXT, rel_type_nm TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, relationship_guid TEXT NOT NULL, PRIMARY KEY (relationship_guid, sync_time));
+DROP TABLE IF EXISTS open_metadata.om_role;
+CREATE TABLE open_metadata.om_role (role_guid TEXT NOT NULL, role_name TEXT, role_type TEXT, headcount INTEGER, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (role_guid, sync_time));
+DROP TABLE IF EXISTS open_metadata.om_role2user;
+CREATE TABLE open_metadata.om_role2user (role_guid TEXT NOT NULL, user_guid TEXT NOT NULL, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, start_date TIMESTAMP(6) WITHOUT TIME ZONE, end_date TIMESTAMP(6) WITHOUT TIME ZONE, relationship_guid TEXT NOT NULL, PRIMARY KEY (relationship_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_role2user IS 'Mapping of roles to users';
+DROP TABLE IF EXISTS open_metadata.om_term_activity;
+CREATE TABLE open_metadata.om_term_activity (term_name TEXT, term_guid TEXT NOT NULL, qualified_name TEXT NOT NULL, term_summary TEXT, version_id TEXT, owner_guid TEXT, owner_type TEXT, confidentiality INTEGER, confidence INTEGER, criticality INTEGER, last_feedback_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, creation_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, number_linked_element INTEGER, last_link_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, glossary_guid TEXT NOT NULL, PRIMARY KEY (term_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_term_activity IS 'Term activity - ';
+COMMENT ON COLUMN open_metadata.om_term_activity.last_feedback_timestamp IS 'Time of last feedback update on this term';
+COMMENT ON COLUMN open_metadata.om_term_activity.glossary_guid IS 'This is the owning glossary rather than where the term might show up.';
+DROP TABLE IF EXISTS open_metadata.om_todo;
+CREATE TABLE open_metadata.om_todo (todo_guid TEXT NOT NULL, qualified_name TEXT, display_name TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE, todo_type TEXT, priority INTEGER, due_time TIMESTAMP(6) WITHOUT TIME ZONE, completion_time TIMESTAMP(6) WITHOUT TIME ZONE, status TEXT, todo_source_guid TEXT, todo_source_type TEXT, last_reviewed_time TIMESTAMP(6) WITHOUT TIME ZONE, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, actor_guid TEXT, actor_type TEXT, CONSTRAINT om_todo_pk PRIMARY KEY (sync_time, todo_guid));
+COMMENT ON TABLE open_metadata.om_todo IS 'Represent all todos independent of source. The actual implementation of the todo might be in an external system, in which case we would map their external identifiers to our todos through the om_correlation_properties table.';
+COMMENT ON COLUMN open_metadata.om_todo.actor_guid IS 'The unique identifier of the actor assigned to perform this ToDo. An Actor is either a UserId, Profile, or PersonRole.';
+COMMENT ON COLUMN open_metadata.om_todo.actor_type IS 'Type name of actor';
+DROP TABLE IF EXISTS open_metadata.om_user_identity;
+CREATE TABLE open_metadata.om_user_identity (employee_num CHARACTER VARYING(80), user_id CHARACTER VARYING(80) NOT NULL, preferred_name CHARACTER VARYING(80), org_name CHARACTER VARYING(80), resident_country CHARACTER VARYING(80), location CHARACTER VARYING(80), distinguished_name CHARACTER VARYING(80), user_id_guid CHARACTER VARYING(80) NOT NULL, profile_guid CHARACTER VARYING(80), department_id CHARACTER VARYING(40), sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, PRIMARY KEY (user_id_guid, sync_time));
+COMMENT ON TABLE open_metadata.om_user_identity IS 'registered users';
+DROP TABLE IF EXISTS open_metadata.rd_file_classifiers;
+CREATE TABLE open_metadata.rd_file_classifiers (sr_guid TEXT NOT NULL, filename TEXT NOT NULL, file_extension TEXT, pathname TEXT NOT NULL, file_type TEXT, asset_type TEXT, deployed_implementation_type TEXT, encoding TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, CONSTRAINT rd_file_classifiers_pk PRIMARY KEY (pathname, sync_time));
+
+
+DROP TABLE IF EXISTS surveys.sr_database_measurements;
+CREATE TABLE surveys.sr_database_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, subject_type TEXT);
+COMMENT ON TABLE surveys.sr_database_measurements IS 'Information about a database and its use.';
+COMMENT ON COLUMN surveys.sr_database_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS surveys.sr_file_measurements;
+CREATE TABLE surveys.sr_file_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, file_system TEXT, filename TEXT NOT NULL, pathname TEXT NOT NULL, file_extension TEXT, file_type TEXT, deployed_implementation_type TEXT, encoding TEXT, asset_type_name TEXT, can_read BOOLEAN, can_write BOOLEAN, can_execute BOOLEAN, is_sym_link BOOLEAN, file_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_modified_time TIMESTAMP(6) WITHOUT TIME ZONE, last_accessed_time TIMESTAMP(6) WITHOUT TIME ZONE, file_size NUMERIC, record_count NUMERIC, is_hidden BOOLEAN, subject_type TEXT, PRIMARY KEY (sr_guid, creation_time, annotation_guid, pathname));
+COMMENT ON TABLE surveys.sr_file_measurements IS 'Capturing details about a specific file';
+COMMENT ON COLUMN surveys.sr_file_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS surveys.sr_folder_measurements;
+CREATE TABLE surveys.sr_folder_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, file_system TEXT, directory_name TEXT NOT NULL, file_count NUMERIC, total_file_size NUMERIC, sub_directory_count NUMERIC, readable_file_count NUMERIC, writeable_file_count NUMERIC, executable_file_count NUMERIC, sym_link_file_count NUMERIC, hidden_file_count NUMERIC, file_name_count NUMERIC, file_extension_count NUMERIC, file_type_count NUMERIC, asset_type_count NUMERIC, deployed_implementation_type_count NUMERIC, unclassified_file_count NUMERIC, inaccessible_file_count NUMERIC, last_file_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_file_modification_time TIMESTAMP(6) WITHOUT TIME ZONE, last_file_accessed_time TIMESTAMP(6) WITHOUT TIME ZONE, subject_type TEXT, PRIMARY KEY (sr_guid, annotation_guid, directory_name, creation_time));
+COMMENT ON TABLE surveys.sr_folder_measurements IS 'Measurements describing a directory (file folder).';
+COMMENT ON COLUMN surveys.sr_folder_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS surveys.sr_missing_file_classifiers;
+CREATE TABLE surveys.sr_missing_file_classifiers (sr_guid TEXT NOT NULL, file_system TEXT, filename TEXT NOT NULL, file_extension TEXT, pathname TEXT NOT NULL, file_type TEXT, asset_type TEXT, deployed_implementation_type TEXT, file_encoding TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, CONSTRAINT rd_file_classifiers_pk PRIMARY KEY (pathname, sync_time));
+COMMENT ON TABLE surveys.sr_missing_file_classifiers IS 'Different kinds of classifiers for files';
+DROP TABLE IF EXISTS surveys.sr_profile_measures;
+CREATE TABLE surveys.sr_profile_measures (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT NOT NULL, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, measurement_name TEXT NOT NULL, measurement_category TEXT NOT NULL, subject_type TEXT, measurement_value NUMERIC, json_properties JSON, PRIMARY KEY (sr_guid, annotation_guid, measurement_category));
+COMMENT ON TABLE surveys.sr_profile_measures IS 'Holds statistics that classify or describe elements within the resource.';
+COMMENT ON COLUMN surveys.sr_profile_measures.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS surveys.sr_report;
+CREATE TABLE surveys.sr_report (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, qualified_name TEXT NOT NULL, asset_guid TEXT NOT NULL, asset_type TEXT NOT NULL, end_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, start_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, engine_action_guid TEXT NOT NULL, initiator TEXT, governance_engine_name TEXT, display_name TEXT, description TEXT, purpose TEXT, request_type TEXT, engine_host_user_id TEXT, CONSTRAINT sr_report_pk PRIMARY KEY (sr_guid));
+COMMENT ON TABLE surveys.sr_report IS 'Core information about a survey report';
+COMMENT ON COLUMN surveys.sr_report.sr_guid IS 'Unique identifier of a survey report.';
+COMMENT ON COLUMN surveys.sr_report.initiator IS 'We are assuming that this is the user_id of the requestor.';
+DROP TABLE IF EXISTS surveys.sr_request_for_action;
+CREATE TABLE surveys.sr_request_for_action (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, subject_guid TEXT NOT NULL, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, action_request_name TEXT NOT NULL, action_target_guid TEXT NOT NULL, subject_type TEXT, action_target_type TEXT, CONSTRAINT sr_request_for_action_pk PRIMARY KEY (annotation_guid, sr_guid));
+COMMENT ON TABLE surveys.sr_request_for_action IS 'Describes a request for action annotation generated by a survey report. The result of this annotation will link to triage prioritization and further activity.';
+COMMENT ON COLUMN surveys.sr_request_for_action.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS surveys.sr_resource_measurement;
+CREATE TABLE surveys.sr_resource_measurement (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, measurement_name TEXT NOT NULL, subject_type TEXT, measurement_category TEXT NOT NULL, measurement_value NUMERIC, measurement_display_value TEXT, resource_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_modified_time TIMESTAMP(6) WITHOUT TIME ZONE, resource_size NUMERIC, PRIMARY KEY (sr_guid, annotation_guid, measurement_category, metadata_collection_id));
+COMMENT ON TABLE surveys.sr_resource_measurement IS 'Holds summary statistics about the whole resource surveyed.';
+COMMENT ON COLUMN surveys.sr_resource_measurement.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+COMMENT ON COLUMN surveys.sr_resource_measurement.measurement_display_value IS 'String version of the display value';
+
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/build-and-push-proxy.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/build-and-push-proxy.sh
new file mode 100755
index 0000000..4e15d10
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/build-and-push-proxy.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# Copyright 2018-2024 contributors to the OpenLineage project
+# SPDX-License-Identifier: Apache-2.0
+#
+# Usage: $ ./build-and-push-proxy.sh
+
+set -eu
+
+readonly SEMVER_REGEX="^[0-9]+(\.[0-9]+){2}(-rc\.[0-9]+)?$" # X.Y.Z
+readonly ORG="openlineage"
+
+# Change working directory to proxy module
+project_root=$(git rev-parse --show-toplevel)
+cd "${project_root}/proxy/backend"
+
+# Version X.Y.Z of proxy image to build
+version="${1}"
+
+# Ensure valid version
+if [[ ! "${version}" =~ ${SEMVER_REGEX} ]]; then
+ echo "Version must match ${SEMVER_REGEX}"
+ exit 1
+fi
+
+echo "Building image (tag: ${version})..."
+
+# Build, tag and push proxy image
+docker build --no-cache --tag "${ORG}/proxy:${version}" .
+docker tag "${ORG}/proxy:${version}" "${ORG}/proxy:latest"
+
+docker push "${ORG}/proxy:${version}"
+docker push "${ORG}/proxy:latest"
+
+echo "DONE!"
+
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/entrypoint.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/entrypoint.sh
new file mode 100755
index 0000000..d1a5c53
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/entrypoint.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright 2018-2024 contributors to the OpenLineage project
+# SPDX-License-Identifier: Apache-2.0
+#
+# Usage: $ ./entrypoint.sh
+
+set -e
+
+if [[ -z "${OPENLINEAGE_PROXY_CONFIG}" ]]; then
+ OPENLINEAGE_PROXY_CONFIG='orig_proxy.yml'
+ echo "WARNING 'OPENLINEAGE_PROXY_CONFIG' not set, using development configuration."
+fi
+
+# Adjust java options for the http server
+JAVA_OPTS="${JAVA_OPTS} -Duser.timezone=UTC -Dlog4j2.formatMsgNoLookups=true"
+
+# Start http server with java options (if any) and configuration
+java ${JAVA_OPTS} -jar openlineage-proxy-*.jar server ${OPENLINEAGE_PROXY_CONFIG}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/init-db.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/init-db.sh
new file mode 100755
index 0000000..7d90122
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/init-db.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# Copyright 2018-2024 contributors to the OpenLineage project
+# SPDX-License-Identifier: Apache-2.0
+#
+# Usage: $ ./init-db.sh
+
+set -eu
+
+psql -v ON_ERROR_STOP=1 --username "${POSTGRES_USER}" > /dev/null <<-EOSQL
+ CREATE USER ${MARQUEZ_USER};
+ ALTER USER ${MARQUEZ_USER} WITH PASSWORD '${MARQUEZ_PASSWORD}';
+ CREATE DATABASE ${MARQUEZ_DB};
+ GRANT ALL PRIVILEGES ON DATABASE ${MARQUEZ_DB} TO ${MARQUEZ_USER};
+EOSQL
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/login.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/login.sh
new file mode 100755
index 0000000..ea14099
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/login.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+# Copyright 2018-2024 contributors to the OpenLineage project
+# SPDX-License-Identifier: Apache-2.0
+#
+# Usage: $ ./login.sh
+
+set -eu
+
+docker login --username "${DOCKER_LOGIN}" --password "${DOCKER_PASSWORD}"
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/wait-for-it.sh b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/wait-for-it.sh
new file mode 100755
index 0000000..87325c6
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/docker/wait-for-it.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+#
+# Copyright 2018-2024 contributors to the OpenLineage project
+# SPDX-License-Identifier: Apache-2.0
+#
+# see: https://github.com/vishnubob/wait-for-it
+
+WAITFORIT_cmdname=${0##*/}
+
+echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
+
+usage()
+{
+ cat << USAGE >&2
+Usage:
+ $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
+ -h HOST | --host=HOST Host or IP under test
+ -p PORT | --port=PORT TCP port under test
+ Alternatively, you specify the host and port as host:port
+ -s | --strict Only execute subcommand if the test succeeds
+ -q | --quiet Don't output any status messages
+ -t TIMEOUT | --timeout=TIMEOUT
+ Timeout in seconds, zero for no timeout
+ -- COMMAND ARGS Execute command with args after the test finishes
+USAGE
+ exit 1
+}
+
+wait_for()
+{
+ if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+ echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+ else
+ echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
+ fi
+ WAITFORIT_start_ts=$(date +%s)
+ while :
+ do
+ if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
+ nc -z $WAITFORIT_HOST $WAITFORIT_PORT
+ WAITFORIT_result=$?
+ else
+ (echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
+ WAITFORIT_result=$?
+ fi
+ if [[ $WAITFORIT_result -eq 0 ]]; then
+ WAITFORIT_end_ts=$(date +%s)
+ echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
+ break
+ fi
+ sleep 1
+ done
+ return $WAITFORIT_result
+}
+
+wait_for_wrapper()
+{
+ # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
+ if [[ $WAITFORIT_QUIET -eq 1 ]]; then
+ timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+ else
+ timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+ fi
+ WAITFORIT_PID=$!
+ trap "kill -INT -$WAITFORIT_PID" INT
+ wait $WAITFORIT_PID
+ WAITFORIT_RESULT=$?
+ if [[ $WAITFORIT_RESULT -ne 0 ]]; then
+ echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+ fi
+ return $WAITFORIT_RESULT
+}
+
+# process arguments
+while [[ $# -gt 0 ]]
+do
+ case "$1" in
+ *:* )
+ WAITFORIT_hostport=(${1//:/ })
+ WAITFORIT_HOST=${WAITFORIT_hostport[0]}
+ WAITFORIT_PORT=${WAITFORIT_hostport[1]}
+ shift 1
+ ;;
+ --child)
+ WAITFORIT_CHILD=1
+ shift 1
+ ;;
+ -q | --quiet)
+ WAITFORIT_QUIET=1
+ shift 1
+ ;;
+ -s | --strict)
+ WAITFORIT_STRICT=1
+ shift 1
+ ;;
+ -h)
+ WAITFORIT_HOST="$2"
+ if [[ $WAITFORIT_HOST == "" ]]; then break; fi
+ shift 2
+ ;;
+ --host=*)
+ WAITFORIT_HOST="${1#*=}"
+ shift 1
+ ;;
+ -p)
+ WAITFORIT_PORT="$2"
+ if [[ $WAITFORIT_PORT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --port=*)
+ WAITFORIT_PORT="${1#*=}"
+ shift 1
+ ;;
+ -t)
+ WAITFORIT_TIMEOUT="$2"
+ if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --timeout=*)
+ WAITFORIT_TIMEOUT="${1#*=}"
+ shift 1
+ ;;
+ --)
+ shift
+ WAITFORIT_CLI=("$@")
+ break
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ echoerr "Unknown argument: $1"
+ usage
+ ;;
+ esac
+done
+
+if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
+ echoerr "Error: you need to provide a host and port to test."
+ usage
+fi
+
+WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-25}
+WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
+WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
+WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
+
+# check to see if timeout is from busybox?
+WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
+WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
+if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
+ WAITFORIT_ISBUSY=1
+ WAITFORIT_BUSYTIMEFLAG="-t"
+
+else
+ WAITFORIT_ISBUSY=0
+ WAITFORIT_BUSYTIMEFLAG=""
+fi
+
+if [[ $WAITFORIT_CHILD -gt 0 ]]; then
+ wait_for
+ WAITFORIT_RESULT=$?
+ exit $WAITFORIT_RESULT
+else
+ if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+ wait_for_wrapper
+ WAITFORIT_RESULT=$?
+ else
+ wait_for
+ WAITFORIT_RESULT=$?
+ fi
+fi
+
+if [[ $WAITFORIT_CLI != "" ]]; then
+ if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
+ echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
+ exit $WAITFORIT_RESULT
+ fi
+ exec "${WAITFORIT_CLI[@]}"
+else
+ exit $WAITFORIT_RESULT
+fi
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/egeria-platform-jupyter-proxy-pg-compose.yaml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/egeria-platform-jupyter-proxy-pg-compose.yaml
new file mode 100644
index 0000000..731a0be
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/egeria-platform-jupyter-proxy-pg-compose.yaml
@@ -0,0 +1,138 @@
+---
+# SPDX-License-Identifier: Apache-2.0
+# Copyright Contributors to the Egeria project
+
+
+# To run
+# * Ensure Docker is installed and running
+# * Start Egeria Platform stack from the 'coco-lab-docker-compose' folder using:
+# 'docker compose -f ./egeria-platform-jupyter-uc-compose.yaml up --build'
+# * Subsequent startups can be performed without the '--build' at the end
+# * Uninstall the containers by issuing 'docker compose -f ./egeria-platform-jupyter-uc-compose.yaml down'
+#
+# Assumptions:
+# * Ports 7443, 7444, 7445 and 9192 are available on the host system - these are the default ports for Kafka and Egeria.
+# * by default, the jupyter notebooks in the 'coco-jupyter-labs' folder are mounted and available for use by the jupyter notebooks
+# * by default, strict SSL validation is turned off
+#
+#
+
+services:
+ jupyter-hub:
+ depends_on:
+ egeria-main:
+ condition: service_healthy
+
+ image: quay.io/jupyter/scipy-notebook
+ container_name: jupyter-work-c
+ ports:
+ - 8888:8888
+ environment:
+ JUPYTER_ENABLE_LAB: "yes"
+ JUPYTER_TOKEN: "egeria"
+ build:
+ dockerfile: Dockerfile-jupyter
+
+ volumes:
+ - ./before-notebook.d:/usr/local/bin/before-notebook.d
+ - ../../work:/home/jovyan/work
+ - ../../workspaces:/home/jovyan/workbooks
+ - ../../exchange/distribution-hub:/home/jovyan/distribution-hub
+ - ../../exchange/loading-bay:/home/jovyan/loading-bay
+ - ../../exchange/landing-area:/home/jovyan/landing-area
+
+
+ kafka:
+ image: 'bitnami/kafka:latest'
+ ports:
+ - '9192:9192'
+ - '9194:9194'
+ environment:
+ - KAFKA_CFG_NODE_ID=0
+ - KAFKA_CFG_PROCESS_ROLES=controller,broker
+ - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@:9193
+ - KAFKA_CFG_LISTENERS=PLAINTEXT://:9192,CONTROLLER://:9193,EXTERNAL://:9194
+ - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9192,EXTERNAL://localhost:9194
+ - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,EXTERNAL:PLAINTEXT,PLAINTEXT:PLAINTEXT
+ - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER
+
+
+ # Proxy service for OpenLineage
+ proxy:
+ build:
+ dockerfile: Dockerfile-proxy
+ container_name: openlineage-proxy-backend
+ depends_on:
+ - kafka
+ ports:
+ - "6000:6000"
+ - "6001:6001"
+ environment:
+# - MARQUEZ_HTTP_ENDPOINT=http://marquez-api:5050/api/v1/lineage
+ - OPENLINEAGE_PROXY_CONFIG=/usr/src/app/proxy.yml
+# - OPENLINEAGE_PROXY_SOURCE=/usr/src/app/proxy.yml
+ volumes:
+ - ./usr/src/app/build/libs:/usr/src/app/build/libs
+ - ./proxy.yml:/usr/src/app/proxy.yml
+
+
+
+ egeria-main:
+ depends_on:
+ - kafka
+ image: 'docker.io/odpi/egeria-platform:latest'
+ ports:
+ - '9443:9443'
+ environment:
+ - XTDB_ENABLE_BYTEUTILS_SHA1=True
+ - XTDB_DISABLE_LIBCRYPTO=True
+ - startup.server.list=active-metadata-store,engine-host,integration-daemon,view-server,simple-metadata-store
+ - server.port=9443
+
+ healthcheck:
+ test: curl -k -X GET "https://localhost:9443/open-metadata/platform-services/users/garygeeke/server-platform/origin" || exit 1
+ interval: 20s
+ timeout: 10s
+ retries: 3
+ start_period: 10s
+
+
+ volumes:
+ - ../../exchange/landing-area:/deployments/landing-area
+ - ../../exchange/distribution-hub:/deployments/distribution-hub
+ - ../../exchange/distribution-hub/surveys:/deployments/surveys
+ - ../../exchange/distribution-hub/logs:/deployments/logs
+ - ../../runtime-volumes/egeria-platform-data/data:/deployments/data
+ - ../../exchange/loading-bay:/deployments/loading-bay
+ - ../../work:/deployments/work
+ postgres:
+ image: postgres:16.4
+ container_name: postgres_for_egeria
+ restart: always
+ ports:
+ - "127.0.0.1:5442:5442"
+ shm_size: 128mb
+ environment:
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: egeria
+ PGDATA: /var/lib/postgresql/data/pgdata
+ volumes:
+ - ../../runtime-volumes/egeria-pg:/var/lib/postgresql/data/pgdata
+ - ./docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d
+
+#
+# Change external to true and create volumes manually if you wish to persist between runs
+#
+volumes:
+ zookeeper-data:
+ external: false
+ kafka-data:
+ external: false
+ db_home:
+ external: false
+
+#networks:
+# egeria_network:
+## driver: bridge
+# name: egeria_network
+# external: true
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle.properties b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle.properties
new file mode 100644
index 0000000..a5beacd
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle.properties
@@ -0,0 +1 @@
+version=1.23.0-SNAPSHOT
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle/wrapper/gradle-wrapper.properties b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..19cfad9
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew
new file mode 100755
index 0000000..744e882
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MSYS* | MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew.bat b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez-example.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez-example.yml
new file mode 100644
index 0000000..39bbe63
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez-example.yml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: Apache-2.0
+
+### HTTP SERVER CONFIG ###
+
+# Enables HTTP server configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#servers)
+server:
+ applicationConnectors:
+ - type: http
+ port: ${MARQUEZ_PORT:-5050}
+ httpCompliance: RFC7230_LEGACY
+ adminConnectors:
+ - type: http
+ port: ${MARQUEZ_ADMIN_PORT:-5051}
+ # Enables access logs formatted as JSON
+ # requestLog:
+ # appenders:
+ # - type: console
+ # layout:
+ # type: access-json
+
+### DATABASE CONFIG ###
+
+# Enables database configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#database)
+db:
+ driverClass: org.postgresql.Driver
+ url: jdbc:postgresql://${POSTGRES_HOST:-localhost}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
+ user: ${POSTGRES_USER}
+ password: ${POSTGRES_PASSWORD}
+
+# Adjusts retention policy
+# dbRetention:
+ # Apply retention policy at a frequency of every 'X' minutes (default: 15)
+ # frequencyMins: ${DB_RETENTION_FREQUENCY_MINS:-15}
+ # Maximum number of rows deleted per batch (default: 1000)
+ # numberOfRowsPerBatch: ${DB_RETENTION_NUMBER_OF_ROWS_PER_BATCH:-1000}
+ # Maximum retention days (default: 7)
+ # retentionDays: ${DB_RETENTION_DAYS:-7}
+
+# Enables flyway configuration overrides (see: https://flywaydb.org/documentation/configfiles)
+# flyway:
+# connectRetries: 3
+# cleanDisabled: true
+
+# Enables database migration on startup (default: true)
+migrateOnStartup: ${MIGRATE_ON_STARTUP:-true}
+
+# Enabled the Graphql endpoint
+graphql:
+ enabled: ${GRAPHQL_ENABLED:-true}
+
+### LOGGING CONFIG ###
+
+# Enables logging configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#logging)
+logging:
+ # Levels: TRACE, DEBUG, INFO, WARN, ERROR, FATAL, ALL, OFF
+ level: ${LOG_LEVEL:-INFO}
+ appenders:
+ - type: console
+ # Enables app logs formatted as JSON
+ # layout:
+ # type: json
+ # Enables capturing app error logs with sentry (see: https://github.com/dhatim/dropwizard-sentry#configuration)
+ # - type: sentry
+ # threshold: ${SENTRY_THRESHOLD:-ERROR}
+ # dsn: ${SENTRY_DSN}
+ # environment: ${SENTRY_ENVIRONMENT}
+ # stacktraceAppPackages: ['marquez']
+
+search:
+ enabled: true
+ scheme: http
+ host: marquez-opensearch
+ port: 9200
+ username: admin
+ password: admin
+
+### TRACING ###
+
+# Enables tracing with sentry (see: https://docs.sentry.io/product/sentry-basics/tracing/distributed-tracing)
+# sentry:
+# environment: ${SENTRY_ENVIRONMENT}
+# tracesSampleRate: ${SENTRY_TRACES_SAMPLE_RATE}
+# dsn: ${SENTRY_DSN}
+# debug: ${SENTRY_DEBUG}
+
+### METRICS CONFIG ###
+
+# Enables metric reporting to the console (see: https://www.dropwizard.io/en/stable/manual/configuration.html#metrics)
+# metrics:
+# frequency: ${METRICS_FREQUENCY:-1 minute}
+# reporters:
+# - type: console
+# timeZone: UTC
+# output: stdout
+
+### CUSTOMIZATION ###
+
+# Fill in with custom tags. Below, we have provided some sample tags to get started.
+tags:
+ - name: PII
+ description: Personally identifiable information
+ - name: SENSITIVE
+ description: Contains sensitive information
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez.yml
new file mode 100644
index 0000000..32941e0
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/marquez.yml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: Apache-2.0
+
+### HTTP SERVER CONFIG ###
+
+# Enables HTTP server configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#servers)
+server:
+ applicationConnectors:
+ - type: http
+ port: ${MARQUEZ_PORT:-5050}
+ httpCompliance: RFC7230_LEGACY
+ adminConnectors:
+ - type: http
+ port: ${MARQUEZ_ADMIN_PORT:-5051}
+ # Enables access logs formatted as JSON
+ # requestLog:
+ # appenders:
+ # - type: console
+ # layout:
+ # type: access-json
+
+### DATABASE CONFIG ###
+
+# Enables database configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#database)
+db:
+ driverClass: org.postgresql.Driver
+ url: jdbc:postgresql://${POSTGRES_HOST:-localhost}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
+ user: ${POSTGRES_USER}
+ password: ${POSTGRES_PASSWORD}
+
+# Adjusts retention policy
+# dbRetention:
+ # Apply retention policy at a frequency of every 'X' minutes (default: 15)
+ # frequencyMins: ${DB_RETENTION_FREQUENCY_MINS:-15}
+ # Maximum number of rows deleted per batch (default: 1000)
+ # numberOfRowsPerBatch: ${DB_RETENTION_NUMBER_OF_ROWS_PER_BATCH:-1000}
+ # Maximum retention days (default: 7)
+ # retentionDays: ${DB_RETENTION_DAYS:-7}
+
+# Enables flyway configuration overrides (see: https://flywaydb.org/documentation/configfiles)
+# flyway:
+# connectRetries: 3
+# cleanDisabled: true
+
+# Enables database migration on startup (default: true)
+migrateOnStartup: ${MIGRATE_ON_STARTUP:-true}
+
+# Enabled the Graphql endpoint
+graphql:
+ enabled: ${GRAPHQL_ENABLED:-true}
+
+### LOGGING CONFIG ###
+
+# Enables logging configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#logging)
+logging:
+ # Levels: TRACE, DEBUG, INFO, WARN, ERROR, FATAL, ALL, OFF
+ level: ${LOG_LEVEL:-INFO}
+ appenders:
+ - type: console
+ # Enables app logs formatted as JSON
+ # layout:
+ # type: json
+ # Enables capturing app error logs with sentry (see: https://github.com/dhatim/dropwizard-sentry#configuration)
+ # - type: sentry
+ # threshold: ${SENTRY_THRESHOLD:-ERROR}
+ # dsn: ${SENTRY_DSN}
+ # environment: ${SENTRY_ENVIRONMENT}
+ # stacktraceAppPackages: ['marquez']
+
+#search:
+# enabled: true
+# scheme: http
+# host: marquez-opensearch
+# port: 9200
+# username: admin
+# password: admin
+
+### TRACING ###
+
+# Enables tracing with sentry (see: https://docs.sentry.io/product/sentry-basics/tracing/distributed-tracing)
+# sentry:
+# environment: ${SENTRY_ENVIRONMENT}
+# tracesSampleRate: ${SENTRY_TRACES_SAMPLE_RATE}
+# dsn: ${SENTRY_DSN}
+# debug: ${SENTRY_DEBUG}
+
+### METRICS CONFIG ###
+
+# Enables metric reporting to the console (see: https://www.dropwizard.io/en/stable/manual/configuration.html#metrics)
+# metrics:
+# frequency: ${METRICS_FREQUENCY:-1 minute}
+# reporters:
+# - type: console
+# timeZone: UTC
+# output: stdout
+
+### CUSTOMIZATION ###
+
+# Fill in with custom tags. Below, we have provided some sample tags to get started.
+tags:
+ - name: PII
+ description: Personally identifiable information
+ - name: SENSITIVE
+ description: Contains sensitive information
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/orig_proxy.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/orig_proxy.yml
new file mode 100644
index 0000000..83cb1cc
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/orig_proxy.yml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0.
+
+### HTTP SERVER CONFIG ###
+
+# Enables HTTP server configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#servers)
+server:
+ applicationConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_PORT:-6000}
+ adminConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_ADMIN_PORT:-6001}
+# Enables access logs formatted as JSON
+# requestLog:
+# appenders:
+# - type: console
+# layout:
+# type: access-json
+
+### LOGGING CONFIG ###
+
+logging:
+ level: ${LOG_LEVEL:-INFO}
+ loggers:
+ "io.dropwizard": INFO
+ appenders:
+ - type: console
+
+
+# Enables logging configuration overrides (see: https://www.dropwizard.io/en/stable/manual/configuration.html#logging)
+# logging:
+# Levels: TRACE, DEBUG, INFO, WARN, ERROR, FATAL, ALL, OFF
+# level: ${LOG_LEVEL:-INFO}
+# appenders:
+# - type: console
+# Enables app logs formatted as JSON
+# layout:
+# type: json
+
+### PROXY CONFIG ###
+
+proxy:
+ streams:
+ - type: Http
+ url: http://marquez-api:5050/api/v1/lineage
+ # The proxy source is used to identify the lineage source in the different distribution mechanisms. It can be used in
+ # further routing of the lineage events without needing to unpack the event contents. In this example, it identifies
+ # that the lineage event has gone through the proxy backend. However, if a different proxy backend is serving each engine
+ # that is producing lineage events, this value can be set up to represent the source engine.
+ # source: ${OPENLINEAGE_PROXY_SOURCE:-openLineageProxyBackend}
+# streams:
+# - type: Console
+# Enables proxying OpenLineage events to a Kafka topic
+# - type: Kafka
+# Kafka topic (note: avoid mixing underscores ("-") and periods (".") as delimiters in the topic name - use one or other.)
+# topicName: openlineage.topic
+# Kafka broker location
+# bootstrapServerUrl: localhost:9092
+# Kafka properties (see: http://kafka.apache.org/0100/documentation.html#producerconfigs)
+# properties:
+# acks: all
+# retries : 1
+# batch.size : 16384
+# linger.ms : 0
+# buffer.memory : 33554432
+# max.request.size : 10485760
+# key.serializer : org.apache.kafka.common.serialization.StringSerializer
+# value.serializer : org.apache.kafka.common.serialization.StringSerializer
+# bring.up.retries : 10
+# bring.up.minSleepTime : 5000
+# Enables proxying OpenLineage events to a HTTP backend
+# apiKey: abcdefghijklmnopqrstuvwxyz (optional)
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/pmd-openlineage.xml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/pmd-openlineage.xml
new file mode 100644
index 0000000..6c8f348
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/pmd-openlineage.xml
@@ -0,0 +1,24 @@
+
+
+
+
+ Rules for Debugging OpenLineage
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.dev.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.dev.yml
new file mode 100644
index 0000000..b1c7a1f
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.dev.yml
@@ -0,0 +1,52 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+server:
+ applicationConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_PORT:-5000}
+ adminConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_ADMIN_PORT:-5001}
+
+logging:
+ level: ${LOG_LEVEL:-INFO}
+ appenders:
+ - type: console
+
+proxy:
+ source: openLineageProxyBackend
+ streams:
+ - type: Console
+
+##### Please read the message below #####
+# if you are running the docker-compose file, comment out the above configuration and uncomment the below configuration
+
+# server:
+# applicationConnectors:
+# - type: http
+# port: ${OPENLINEAGE_PROXY_PORT:-6000}
+# adminConnectors:
+# - type: http
+# port: ${OPENLINEAGE_PROXY_ADMIN_PORT:-6001}
+
+# logging:
+# level: ${LOG_LEVEL:-INFO}
+# loggers:
+# "io.dropwizard": INFO
+# appenders:
+# - type: console
+
+# proxy:
+# streams:
+# - type: Http
+# url: http://marquez-api:5000/api/v1/lineage
\ No newline at end of file
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.yml
new file mode 100644
index 0000000..feb08f2
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy.yml
@@ -0,0 +1,38 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+server:
+ applicationConnectors:
+ - type: http
+ port: 6000 #5000
+ adminConnectors:
+ - type: http
+ port: 6001 #5001
+
+logging:
+ level: ${LOG_LEVEL:-DEBUG}
+ appenders:
+ - type: console
+
+proxy:
+ source: openLineageProxyBackend
+ streams:
+ - type: Kafka
+ topicName: openlineage.events
+# bootstrapServerUrl: broker:29092
+ bootstrapServerUrl: host.docker.internal:9192
+ # Kafka properties (see: http://kafka.apache.org/0100/documentation.html#producerconfigs)
+ properties:
+ acks: all
+ retries: 3
+ key.serializer: org.apache.kafka.common.serialization.StringSerializer
+ value.serializer: org.apache.kafka.common.serialization.StringSerializer
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy2.yml b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy2.yml
new file mode 100644
index 0000000..b0e6aa1
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/proxy2.yml
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+server:
+ applicationConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_PORT:-6000}
+ adminConnectors:
+ - type: http
+ port: ${OPENLINEAGE_PROXY_ADMIN_PORT:-6001}
+# Enables access logs formatted as JSON
+# requestLog:
+# appenders:
+# - type: console
+# layout:
+# type: access-json
+
+### LOGGING CONFIG ###
+
+logging:
+ level: ${LOG_LEVEL:-DEBUG}
+ appenders:
+ - type: console
+
+proxy:
+ source: openLineageProxyBackend
+ streams:
+ - type: Kafka
+ topicName: openlineage.events
+ bootstrapServerUrl: localhost:9092
+ # Kafka properties (see: http://kafka.apache.org/0100/documentation.html#producerconfigs)
+ properties:
+ acks: all
+ retries: 3
+ key.serializer: org.apache.kafka.common.serialization.StringSerializer
+ value.serializer: org.apache.kafka.common.serialization.StringSerializer
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/settings.gradle b/compose-configs/egeria-platform-jupyter-ol-pg-compose/settings.gradle
new file mode 100644
index 0000000..7e1c4f4
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/settings.gradle
@@ -0,0 +1,5 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+rootProject.name = 'openlineage-proxy'
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyApp.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyApp.java
new file mode 100644
index 0000000..d56a6dd
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyApp.java
@@ -0,0 +1,73 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy;
+
+import com.fasterxml.jackson.databind.SerializationFeature;
+import io.dropwizard.Application;
+import io.dropwizard.configuration.EnvironmentVariableSubstitutor;
+import io.dropwizard.configuration.SubstitutingSourceProvider;
+import io.dropwizard.setup.Bootstrap;
+import io.dropwizard.setup.Environment;
+import io.openlineage.proxy.api.ProxyResource;
+import io.openlineage.proxy.service.ProxyService;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+
+/** ProxyApp is the main class of the Proxy Backend. */
+@Slf4j
+public final class ProxyApp extends Application {
+ private static final String APP_NAME = "OpenLineageProxyBackend";
+ private static final boolean ERROR_ON_UNDEFINED = false;
+
+ /**
+ * The main function receives the config file which is used in the initialization of the proxy
+ * backend.
+ *
+ * @param args commandline arguments
+ * @throws Exception issues with initialization
+ */
+ public static void main(final String[] args) throws Exception {
+ new ProxyApp().run(args);
+ }
+
+ /**
+ * Standard dropwizard function to return fixed name of the application at the endpoint.
+ *
+ * @return name of this application
+ */
+ @Override
+ public String getName() {
+ return APP_NAME;
+ }
+
+ /**
+ * Initialize the application.
+ *
+ * @param bootstrap combination of the yml file and environment variables
+ */
+ @Override
+ public void initialize(@NonNull Bootstrap bootstrap) {
+ // Enable variable substitution with environment variables.
+ bootstrap.setConfigurationSourceProvider(
+ new SubstitutingSourceProvider(
+ bootstrap.getConfigurationSourceProvider(),
+ new EnvironmentVariableSubstitutor(ERROR_ON_UNDEFINED)));
+
+ bootstrap.getObjectMapper().disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
+ }
+
+ /**
+ * Called from main.
+ *
+ * @param config yml file
+ * @param env runtime platform environment
+ */
+ @Override
+ public void run(@NonNull ProxyConfig config, @NonNull Environment env) {
+ log.debug("Registering resources...");
+ env.jersey().register(new ProxyResource(new ProxyService(config)));
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyAppException.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyAppException.java
new file mode 100644
index 0000000..f012c8b
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyAppException.java
@@ -0,0 +1,26 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy;
+
+import javax.annotation.Nullable;
+import lombok.NoArgsConstructor;
+
+@NoArgsConstructor
+public class ProxyAppException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ public ProxyAppException(@Nullable final String message) {
+ super(message);
+ }
+
+ public ProxyAppException(@Nullable final Throwable cause) {
+ super(cause);
+ }
+
+ public ProxyAppException(@Nullable final String message, @Nullable final Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyConfig.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyConfig.java
new file mode 100644
index 0000000..3aa474b
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyConfig.java
@@ -0,0 +1,19 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.dropwizard.Configuration;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+
+/** ProxyConfig defines the structure of the configuration file proxy.yml */
+@NoArgsConstructor
+public final class ProxyConfig extends Configuration {
+ @Getter
+ @JsonProperty("proxy")
+ private final ProxyStreamFactory proxyStreamFactory = new ProxyStreamFactory();
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamConfig.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamConfig.java
new file mode 100644
index 0000000..dcb5863
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamConfig.java
@@ -0,0 +1,20 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import io.openlineage.proxy.api.models.ConsoleConfig;
+import io.openlineage.proxy.api.models.HttpConfig;
+import io.openlineage.proxy.api.models.KafkaConfig;
+
+@JsonSubTypes({
+ @JsonSubTypes.Type(value = ConsoleConfig.class, name = "Console"),
+ @JsonSubTypes.Type(value = KafkaConfig.class, name = "Kafka"),
+ @JsonSubTypes.Type(value = HttpConfig.class, name = "Http")
+})
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
+public interface ProxyStreamConfig {}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamFactory.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamFactory.java
new file mode 100644
index 0000000..b33a623
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/ProxyStreamFactory.java
@@ -0,0 +1,60 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import io.openlineage.proxy.api.models.ConsoleConfig;
+import io.openlineage.proxy.api.models.ConsoleLineageStream;
+import io.openlineage.proxy.api.models.HttpConfig;
+import io.openlineage.proxy.api.models.HttpLineageStream;
+import io.openlineage.proxy.api.models.KafkaConfig;
+import io.openlineage.proxy.api.models.KafkaLineageStream;
+import io.openlineage.proxy.api.models.LineageStream;
+import java.util.List;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * A factory for creating {@link LineageStream} instances. A {@code LineageStream} must define a
+ * {@link ProxyStreamConfig} defining the set of parameters needed to construct a new {@code
+ * LineageStream} instance. For example, {@link KafkaConfig} defines the parameters for constructing
+ * a new {@link KafkaLineageStream} instance when invoking {@link ProxyStreamFactory#build()}.
+ * Below, we define a list of supported {@code LineageStream}s. Note, when defining your own {@code
+ * ProxyStreamConfig}, the {@code type} parameter must be specified.
+ *
+ *
+ * - A default {@link ConsoleLineageStream} stream
+ *
- A {@link KafkaLineageStream} stream
+ *
+ */
+@Slf4j
+public final class ProxyStreamFactory {
+ private static final String DEFAULT_PROXY_LINEAGE_SOURCE = "openLineageProxyBackend";
+ private static final List DEFAULT_STREAMS =
+ Lists.newArrayList(new ConsoleConfig());
+
+ @Getter @Setter private String source = DEFAULT_PROXY_LINEAGE_SOURCE;
+ @Getter @Setter private List streams = DEFAULT_STREAMS;
+
+ public ImmutableSet build() {
+ final ImmutableSet.Builder lineageStreams = ImmutableSet.builder();
+ for (final ProxyStreamConfig config : streams) {
+ if (config instanceof ConsoleConfig) {
+ lineageStreams.add(new ConsoleLineageStream());
+ } else if (config instanceof KafkaConfig) {
+ final KafkaConfig kafkaConfig = (KafkaConfig) config;
+ kafkaConfig.getProperties().put("bootstrap.servers", kafkaConfig.getBootstrapServerUrl());
+ lineageStreams.add(new KafkaLineageStream((KafkaConfig) config));
+ } else if (config instanceof HttpConfig) {
+ final HttpConfig httpConfig = (HttpConfig) config;
+ lineageStreams.add(new HttpLineageStream((HttpConfig) config));
+ }
+ }
+ return lineageStreams.build();
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/ProxyResource.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/ProxyResource.java
new file mode 100644
index 0000000..6958a5f
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/ProxyResource.java
@@ -0,0 +1,46 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api;
+
+import static javax.ws.rs.core.MediaType.APPLICATION_JSON;
+
+import io.openlineage.proxy.service.ProxyService;
+import javax.validation.Valid;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.container.AsyncResponse;
+import javax.ws.rs.container.Suspended;
+import javax.ws.rs.core.Response;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Path("/api/v1/lineage")
+public class ProxyResource {
+ private final ProxyService service;
+
+ public ProxyResource(@NonNull final ProxyService service) {
+ this.service = service;
+ }
+
+ @POST
+ @Consumes(APPLICATION_JSON)
+ public void proxyEvent(
+ @Valid String eventAsString, @Suspended final AsyncResponse asyncResponse) {
+ service
+ .proxyEventAsync(eventAsString)
+ .whenComplete(
+ (result, err) -> {
+ if (err != null) {
+ log.error("Failed to proxy OpenLineage event!", err);
+ asyncResponse.resume(Response.status(500).build());
+ } else {
+ asyncResponse.resume(Response.status(200).build());
+ }
+ });
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleConfig.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleConfig.java
new file mode 100644
index 0000000..8e19832
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleConfig.java
@@ -0,0 +1,12 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import io.openlineage.proxy.ProxyStreamConfig;
+import lombok.ToString;
+
+@ToString
+public final class ConsoleConfig implements ProxyStreamConfig {}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleLineageStream.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleLineageStream.java
new file mode 100644
index 0000000..a490c93
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/ConsoleLineageStream.java
@@ -0,0 +1,37 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+
+/** ConsoleLineageStream pushes events to stdout */
+@Slf4j
+public class ConsoleLineageStream extends LineageStream {
+ public ConsoleLineageStream() {
+ super(Type.CONSOLE);
+ }
+
+ @Override
+ public void collect(@NonNull String eventAsString) {
+ eventAsString = eventAsString.trim();
+ if (eventAsString.startsWith("{") && eventAsString.endsWith("}")) {
+ // assume the payload is int JSON, and perform json formatting.
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ Object jsonObject = mapper.readValue(eventAsString, Object.class);
+ String prettyJson = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonObject);
+ log.info(prettyJson);
+ } catch (JsonProcessingException jpe) {
+ log.info(eventAsString);
+ }
+ } else {
+ log.info(eventAsString);
+ }
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpConfig.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpConfig.java
new file mode 100644
index 0000000..6b7d60f
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpConfig.java
@@ -0,0 +1,19 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import io.openlineage.proxy.ProxyStreamConfig;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+import lombok.ToString;
+
+@NoArgsConstructor
+@ToString
+public final class HttpConfig implements ProxyStreamConfig {
+ @Getter @Setter private String url;
+ @Getter @Setter private String apiKey;
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpLineageStream.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpLineageStream.java
new file mode 100644
index 0000000..8c4d786
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/HttpLineageStream.java
@@ -0,0 +1,51 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.client.Client;
+import javax.ws.rs.client.ClientBuilder;
+import javax.ws.rs.client.Entity;
+import javax.ws.rs.client.Invocation;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+
+/** HttpLineageStream pushes events to http endpoint */
+@Slf4j
+public class HttpLineageStream extends LineageStream {
+
+ private final String url;
+ private final String apiKey;
+ private Invocation.Builder invocation;
+
+ public HttpLineageStream(@NonNull final HttpConfig httpConfig) {
+ super(Type.HTTP);
+ this.url = httpConfig.getUrl();
+ this.apiKey = httpConfig.getApiKey();
+ ClientBuilder cb = ClientBuilder.newBuilder();
+ Client client = cb.build();
+ this.invocation = client.target(this.url).request(MediaType.APPLICATION_JSON);
+ if (this.apiKey != null && this.apiKey.trim().length() > 0) {
+ this.invocation = this.invocation.header(HttpHeaders.AUTHORIZATION, "Bearer " + this.apiKey);
+ }
+ }
+
+ @Override
+ public void collect(@NonNull String eventAsString) {
+ eventAsString = eventAsString.trim();
+ try {
+ Response response = this.invocation.post(Entity.json(eventAsString));
+ int status = response.getStatus();
+ log.debug("Received lineage event: {} \n response code: {}", eventAsString, status);
+ } catch (WebApplicationException ex) {
+ log.error("Exception occurred during HttpLineageStream's collect() call.");
+ log.error(ex.getMessage());
+ }
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaConfig.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaConfig.java
new file mode 100644
index 0000000..5ab3628
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaConfig.java
@@ -0,0 +1,22 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import io.openlineage.proxy.ProxyStreamConfig;
+import java.util.Properties;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+import lombok.ToString;
+
+@NoArgsConstructor
+@ToString
+public final class KafkaConfig implements ProxyStreamConfig {
+ @Getter @Setter private String topicName;
+ @Getter @Setter private String messageKey;
+ @Getter @Setter private String bootstrapServerUrl;
+ @Getter @Setter private Properties properties;
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaLineageStream.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaLineageStream.java
new file mode 100644
index 0000000..9b36af8
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/KafkaLineageStream.java
@@ -0,0 +1,42 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+
+/**
+ * KafkaLineageStream is responsible for sending open lineage events to kafka. The collect() method
+ * is called each time an open lineage event is emitted by the data platform.
+ */
+@Slf4j
+public class KafkaLineageStream extends LineageStream {
+ private final String topicName;
+ private final String messageKey;
+ private final KafkaProducer producer;
+
+ public KafkaLineageStream(@NonNull final KafkaConfig kafkaConfig) {
+ super(Type.KAFKA);
+ this.topicName = kafkaConfig.getTopicName();
+ this.messageKey = kafkaConfig.getMessageKey();
+ this.producer = new KafkaProducer<>(kafkaConfig.getProperties());
+ }
+
+ @Override
+ public void collect(@NonNull String eventAsString) {
+ log.debug("Received lineage event: {}", eventAsString);
+ // if messageKey is not set, then the event will be sent to a random partition
+ final ProducerRecord record =
+ new ProducerRecord<>(topicName, messageKey, eventAsString);
+ try {
+ producer.send(record);
+ } catch (Exception e) {
+ log.error("Failed to collect lineage event: {}", eventAsString, e);
+ }
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/LineageStream.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/LineageStream.java
new file mode 100644
index 0000000..778cf28
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/api/models/LineageStream.java
@@ -0,0 +1,43 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.api.models;
+
+import lombok.NonNull;
+
+/**
+ * LineageStream provides the generic implementation of the backend destinations supported by the
+ * proxy backend.
+ */
+public abstract class LineageStream {
+ /**
+ * The Type enum (and JsonSubTypes above) are extended for each new type of destination that the
+ * proxy backend supports. There is a subtype class for each of these destination types.
+ */
+ enum Type {
+ CONSOLE,
+ HTTP,
+ KAFKA
+ }
+
+ private final Type type; // NOPMD
+ /**
+ * The constructor sets up the type for destination for logging purposes.
+ *
+ * @param type type of destination implemented by the subtype.
+ */
+ LineageStream(@NonNull final Type type) {
+ this.type = type;
+ }
+
+ /**
+ * This is the method that is called when a new lineage event is emitted from the data platform.
+ * The specific destination class implements this method with the logic to send the event to its
+ * supported destination.
+ *
+ * @param eventAsString the OpenLineage event as a {code string} value
+ */
+ public abstract void collect(String eventAsString);
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/service/ProxyService.java b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/service/ProxyService.java
new file mode 100644
index 0000000..f352ca6
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/java/io/openlineage/proxy/service/ProxyService.java
@@ -0,0 +1,44 @@
+/*
+/* Copyright 2018-2024 contributors to the OpenLineage project
+/* SPDX-License-Identifier: Apache-2.0
+*/
+
+package io.openlineage.proxy.service;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import io.openlineage.proxy.ProxyConfig;
+import io.openlineage.proxy.api.models.LineageStream;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public final class ProxyService {
+ private final ImmutableSet lineageStreams;
+
+ /**
+ * Constructor reviews the content of the config file and sets up the appropriate lineage streams.
+ *
+ * @param config configuration properties supplied to the application
+ */
+ public ProxyService(@NonNull final ProxyConfig config) {
+ this.lineageStreams = config.getProxyStreamFactory().build();
+ }
+
+ /**
+ * process an incoming event by sending it to all configured lineage streams.
+ *
+ * @param eventAsString incoming event
+ * @return completion future
+ */
+ public CompletableFuture proxyEventAsync(@NonNull String eventAsString) {
+ final List collectionFutures = Lists.newArrayList();
+ lineageStreams.forEach(
+ lineageStream ->
+ collectionFutures.add(
+ CompletableFuture.runAsync(() -> lineageStream.collect(eventAsString))));
+ return CompletableFuture.allOf(collectionFutures.toArray(CompletableFuture[]::new));
+ }
+}
diff --git a/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/resources/banner.txt b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/resources/banner.txt
new file mode 100644
index 0000000..8da89db
--- /dev/null
+++ b/compose-configs/egeria-platform-jupyter-ol-pg-compose/src/main/resources/banner.txt
@@ -0,0 +1,8 @@
+SPDX-License-Identifier: Apache-2.0
+
+ ____ __ _ ____
+ / __ \____ ___ ____ / / (_)___ ___ ____ _____ ____ / ___ \___ ___ _ _ _ _
+ / / / / __ \/ _ \/ __ \/ / / / __ \/ _ \/ __ `/ __ `/ _ \ / /__/ / __\/ _ \/ |/ | \/ /
+/ /_/ / /_/ / __/ / / / /___/ / / / / __/ /_/ / /_/ / __// .____/ / / // /| / \ /
+\____/ .___/\___/_/ /_/_____/_/_/ /_/\___/\__,_/\__, /\___//_/ /_/ \___//_/\_\ / /
+ /_/ /____/ /_/
diff --git a/compose-configs/egeria-platform-postgres-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql b/compose-configs/egeria-platform-postgres-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql
index 9e01408..07eefc3 100644
--- a/compose-configs/egeria-platform-postgres-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql
+++ b/compose-configs/egeria-platform-postgres-compose/docker-entrypoint-initdb.d/init_egeria_observations.sql
@@ -8,7 +8,9 @@ grant all privileges on database egeria_observations to egeria_admin, egeria_use
\c egeria_observations;
create schema open_metadata;
create schema audit_log;
-grant all on schema open_metadata, audit_log to egeria_admin, egeria_user;
+create schema surveys;
+grant all on schema open_metadata, audit_log, surveys to egeria_admin, egeria_user;
+
DROP TABLE IF EXISTS audit_log.al_api_calls;
CREATE TABLE audit_log.al_api_calls (thread_id BIGINT NOT NULL, server_name TEXT NOT NULL, user_name TEXT NOT NULL, operation_name TEXT NOT NULL, service_name TEXT NOT NULL, call_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL);
@@ -24,7 +26,7 @@ CREATE TABLE audit_log.al_asset_activity (thread_id BIGINT NOT NULL, server_name
COMMENT ON TABLE audit_log.al_asset_activity IS 'User activity around an om_asset';
COMMENT ON COLUMN audit_log.al_asset_activity.thread_id IS 'Thread where the request ran';
COMMENT ON COLUMN audit_log.al_asset_activity.server_name IS 'Name of the called server';
-COMMENT ON COLUMN audit_log.al_asset_activity.call_time IS 'Time thatthe request was made';
+COMMENT ON COLUMN audit_log.al_asset_activity.call_time IS 'Time that the request was made';
COMMENT ON COLUMN audit_log.al_asset_activity.asset_operation IS 'Create, Update, Delete, Attachment, Feedback';
COMMENT ON COLUMN audit_log.al_asset_activity.asset_guid IS 'Unique identifier of the om_asset';
COMMENT ON COLUMN audit_log.al_asset_activity.asset_type IS 'Type of the om_asset';
@@ -123,4 +125,38 @@ DROP TABLE IF EXISTS open_metadata.rd_file_classifiers;
CREATE TABLE open_metadata.rd_file_classifiers (sr_guid TEXT NOT NULL, filename TEXT NOT NULL, file_extension TEXT, pathname TEXT NOT NULL, file_type TEXT, asset_type TEXT, deployed_implementation_type TEXT, encoding TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, CONSTRAINT rd_file_classifiers_pk PRIMARY KEY (pathname, sync_time));
+DROP TABLE IF EXISTS sr_database_measurements;
+CREATE TABLE sr_database_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, subject_type TEXT);
+COMMENT ON TABLE sr_database_measurements IS 'Information about a database and its use.';
+COMMENT ON COLUMN sr_database_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS sr_file_measurements;
+CREATE TABLE sr_file_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, file_system TEXT, filename TEXT NOT NULL, pathname TEXT NOT NULL, file_extension TEXT, file_type TEXT, deployed_implementation_type TEXT, encoding TEXT, asset_type_name TEXT, can_read BOOLEAN, can_write BOOLEAN, can_execute BOOLEAN, is_sym_link BOOLEAN, file_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_modified_time TIMESTAMP(6) WITHOUT TIME ZONE, last_accessed_time TIMESTAMP(6) WITHOUT TIME ZONE, file_size NUMERIC, record_count NUMERIC, is_hidden BOOLEAN, subject_type TEXT, PRIMARY KEY (sr_guid, creation_time, annotation_guid, pathname));
+COMMENT ON TABLE sr_file_measurements IS 'Capturing details about a specific file';
+COMMENT ON COLUMN sr_file_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS sr_folder_measurements;
+CREATE TABLE sr_folder_measurements (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, file_system TEXT, directory_name TEXT NOT NULL, file_count NUMERIC, total_file_size NUMERIC, sub_directory_count NUMERIC, readable_file_count NUMERIC, writeable_file_count NUMERIC, executable_file_count NUMERIC, sym_link_file_count NUMERIC, hidden_file_count NUMERIC, file_name_count NUMERIC, file_extension_count NUMERIC, file_type_count NUMERIC, asset_type_count NUMERIC, deployed_implementation_type_count NUMERIC, unclassified_file_count NUMERIC, inaccessible_file_count NUMERIC, last_file_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_file_modification_time TIMESTAMP(6) WITHOUT TIME ZONE, last_file_accessed_time TIMESTAMP(6) WITHOUT TIME ZONE, subject_type TEXT, PRIMARY KEY (sr_guid, annotation_guid, directory_name, creation_time));
+COMMENT ON TABLE sr_folder_measurements IS 'Measurements describing a directory (file folder).';
+COMMENT ON COLUMN sr_folder_measurements.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS sr_missing_file_classifiers;
+CREATE TABLE sr_missing_file_classifiers (sr_guid TEXT NOT NULL, file_system TEXT, filename TEXT NOT NULL, file_extension TEXT, pathname TEXT NOT NULL, file_type TEXT, asset_type TEXT, deployed_implementation_type TEXT, file_encoding TEXT, sync_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, CONSTRAINT rd_file_classifiers_pk PRIMARY KEY (pathname, sync_time));
+COMMENT ON TABLE sr_missing_file_classifiers IS 'Different kinds of classifiers for files';
+DROP TABLE IF EXISTS sr_profile_measures;
+CREATE TABLE sr_profile_measures (sr_guid TEXT NOT NULL, metadata_collection_id TEXT NOT NULL, subject_guid TEXT NOT NULL, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, measurement_name TEXT NOT NULL, measurement_category TEXT NOT NULL, subject_type TEXT, measurement_value NUMERIC, json_properties JSON, PRIMARY KEY (sr_guid, annotation_guid, measurement_category));
+COMMENT ON TABLE sr_profile_measures IS 'Holds statistics that classify or describe elements within the resource.';
+COMMENT ON COLUMN sr_profile_measures.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS sr_report;
+CREATE TABLE sr_report (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, qualified_name TEXT NOT NULL, asset_guid TEXT NOT NULL, asset_type TEXT NOT NULL, end_timestamp TIMESTAMP(6) WITHOUT TIME ZONE, start_timestamp TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, engine_action_guid TEXT NOT NULL, initiator TEXT, governance_engine_name TEXT, display_name TEXT, description TEXT, purpose TEXT, request_type TEXT, engine_host_user_id TEXT, CONSTRAINT sr_report_pk PRIMARY KEY (sr_guid));
+COMMENT ON TABLE sr_report IS 'Core information about a survey report';
+COMMENT ON COLUMN sr_report.sr_guid IS 'Unique identifier of a survey report.';
+COMMENT ON COLUMN sr_report.initiator IS 'We are assuming that this is the user_id of the requestor.';
+DROP TABLE IF EXISTS sr_request_for_action;
+CREATE TABLE sr_request_for_action (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, subject_guid TEXT NOT NULL, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, action_request_name TEXT NOT NULL, action_target_guid TEXT NOT NULL, subject_type TEXT, action_target_type TEXT, CONSTRAINT sr_request_for_action_pk PRIMARY KEY (annotation_guid, sr_guid));
+COMMENT ON TABLE sr_request_for_action IS 'Describes a request for action annotation generated by a survey report. The result of this annotation will link to triage prioritization and further activity.';
+COMMENT ON COLUMN sr_request_for_action.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+DROP TABLE IF EXISTS sr_resource_measurement;
+CREATE TABLE sr_resource_measurement (metadata_collection_id TEXT NOT NULL, sr_guid TEXT NOT NULL, subject_guid TEXT, creation_time TIMESTAMP(6) WITHOUT TIME ZONE NOT NULL, annotation_guid TEXT NOT NULL, measurement_name TEXT NOT NULL, subject_type TEXT, measurement_category TEXT NOT NULL, measurement_value NUMERIC, measurement_display_value TEXT, resource_creation_time TIMESTAMP(6) WITHOUT TIME ZONE, last_modified_time TIMESTAMP(6) WITHOUT TIME ZONE, resource_size NUMERIC, PRIMARY KEY (sr_guid, annotation_guid, measurement_category, metadata_collection_id));
+COMMENT ON TABLE sr_resource_measurement IS 'Holds summary statistics about the whole resource surveyed.';
+COMMENT ON COLUMN sr_resource_measurement.metadata_collection_id IS 'This is the metadata_collection_id of the annotation';
+COMMENT ON COLUMN sr_resource_measurement.measurement_display_value IS 'String version of the display value';
+commit;