From c64f48e03b6351dcb00bb41cd4e410a4db8f0ed3 Mon Sep 17 00:00:00 2001 From: Mrunmay Shelar Date: Thu, 18 Jan 2024 08:42:13 +0530 Subject: [PATCH 1/3] docs: removed UI lite --- docs/getting_started.md | 1 - .../core/monitoring-your-application.md | 106 ------------------ 2 files changed, 107 deletions(-) delete mode 100644 docs/getting_started/core/monitoring-your-application.md diff --git a/docs/getting_started.md b/docs/getting_started.md index c470330..501dfa2 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -9,7 +9,6 @@ Dozer is available in two flavours: an Open Source Core version and a Cloud vers - [Connecting to data sources](getting_started/core/connecting-to-sources) - [Adding transformations](getting_started/core/adding-transformations) - [Querying data](getting_started/core/querying-data) -- [Monitoring your application](getting_started/core/monitoring-your-application) ## Dozer Cloud diff --git a/docs/getting_started/core/monitoring-your-application.md b/docs/getting_started/core/monitoring-your-application.md deleted file mode 100644 index 3654935..0000000 --- a/docs/getting_started/core/monitoring-your-application.md +++ /dev/null @@ -1,106 +0,0 @@ -# Monitoring Applications - -## Dozer UI Lite Setup -To install the Dozer UI Lite and to start monitoring your applications locally, follow these steps. - -### Installation - -Download ***dozer-ui-lite***, by running following commands. -```bash -# MacOS -curl -sLO https://raw.githubusercontent.com/getdozer/dozer-docs/main/static/examples/3_ui_lite/dozer-ui-lite.tar.gz -tar -zxvf dozer-ui-lite.tar.gz -cd dozer-ui-lite -``` -```bash -# Ubuntu -curl -sLO https://raw.githubusercontent.com/getdozer/dozer-docs/main/static/examples/3_ui_lite/dozer-ui-lite-linux.tar.gz -tar -zxvf dozer-ui-lite.tar.gz -cd dozer-ui-lite -``` - -### Docker Compose -Dozer UI Lite works as a Docker Container. You can find more guides in [Docker Overview](https://docs.docker.com/get-started/overview/). - -Make sure `dozer-config.yaml` contains following lines to configure telemetry. - -> ```docker -> telemetry: -> metrics: !Prometheus -> ``` - -Run following command to bring up the docker image of Dozer UI Lite. - -```bash -docker-compose up -``` -To check if the ***dozer-ui-lite*** is running properly with your dozer application, Dozer UI Lite should be available at [`localhost:3000`](http://localhost:3000/). - -![Login](./img/login.png) - - - -### Run Dozer -Then let's run your dozer application! - -```bash -dozer -``` - -
- -*Prometheus* is a standalone open-source project that collects and stores its metrics as time series data. You can find an introduction in the [Prometheus Overview](https://prometheus.io/docs/introduction/overview/). - -Also, you can access the raw Prometheus metrics at [`localhost:9090/graph`](http://localhost:9090/graph). - -![Promemtheus](./img/prom.png) - -### Authentication - -By signing up at [`localhost:3000/signup`](http://localhost:3000/signup), you can log in using your credential at [`localhost:3000/login`](http://localhost:3000/login). - -## Dashboard - -### Overview - -This dashboard shows graphical representations of `Sources`, `Stores`, and `Pods`. You can monitor: -* `Sources`: Total Operations, Operations per second, and Pipeline Latency. -* `Store`: Total Operations, Operations per second, Data Latency. -* `Pod`: Instance, Status, Disk Usage, RAM Usage, AVG/R Sec, Restart, and Created. - -![Dashboard](./img/dashboard.png) - -Each of these components of Overview is represented more in detail in a separate tab in the menu. - - -### Source - -The second tab of the menu will show you a dashboard of data sources in your application. For each graph, you can select the table and the operation type you want to monitor. - -![Source](./img/source.png) - - -### Pipeline -The third tab of the menu consists in monitoring the pipelines of your application. You can view detailed Pipeline latency with pipeline execution plan for each operation, for example of JOIN operations performed in the data source. - -![Pipeline1](./img/pipeline1.png) -![Pipeline2](./img/pipeline2.png) - -A third monitor is about the Containers in the pipeline, such as Instance, Status, Disk Usage, and RAM usage. - -### API -The fourth tab consists in monitoring the API endpoints of your application. You will get a detailed view of: -* `Cache`: graphical representation of Data Latency and Total Operations -* `Indexing`: graphical representation of Total Indexed Records and Indexing Rate per Second -* `API`: graphical representation of API Latency and API Throughput. - -![](./img/api.png) - -### Errors - -Within any of the part in this dashboard you will have detailed information about all errors which might encounter in the application. - -![](./img/errors.png) - - - From c5191305c0fe22f49b6d4d83a2fa3eaa4b91f84b Mon Sep 17 00:00:00 2001 From: Mrunmay Shelar Date: Fri, 19 Jan 2024 08:19:32 +0530 Subject: [PATCH 2/3] docs: added js lambda and onnx --- docs/{ => udfs}/lambda-functions.md | 0 docs/udfs/onnx.md | 56 +++++++++++++++++++++++++++++ sidebars.js | 26 +++++++------- 3 files changed, 68 insertions(+), 14 deletions(-) rename docs/{ => udfs}/lambda-functions.md (100%) create mode 100644 docs/udfs/onnx.md diff --git a/docs/lambda-functions.md b/docs/udfs/lambda-functions.md similarity index 100% rename from docs/lambda-functions.md rename to docs/udfs/lambda-functions.md diff --git a/docs/udfs/onnx.md b/docs/udfs/onnx.md new file mode 100644 index 0000000..6b5397d --- /dev/null +++ b/docs/udfs/onnx.md @@ -0,0 +1,56 @@ +# ONNX + +ONNX, or Open Neural Network Exchange, is an open-source format designed to represent machine learning models. It provides a standardized way to describe models so that they can be easily exchanged between different deep learning frameworks. ONNX is supported by various frameworks such as PyTorch, TensorFlow, Microsoft Cognitive Toolkit (CNTK), and others, allowing interoperability and flexibility in deploying models across different platforms. + +Dozer supports ONNX models and allows you to deploy them as APIs. This enables you to use your models in production without having to write any additional code. For instance, you can use a pre-trained model to predict probabilities of a particular event, such as a customer credit score, or other use cases. + +## Configuration + +Add the following block to your YAML file to register ONNX models. + +```yaml +sql : | + SELECT torch_jit(col1, col2) INTO output FROM input; +``` + +```yaml +udfs: + - name: torch_jit + config: !Onnx + path: ./model.onnx +``` + +`torch_jit` is the function which would run the ONNX model on `col1, col2` as input returning the output in output` column. + +### Parameters + +| **Parameter Name** | **Type** | **Description** | +|--------------------|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `path` | String | Path to the ONNX model. | +## Running the ONNX Model + +### Pre-requisites + +- Enabling ONNX feature while building Dozer. + ```bash + cargo install --path dozer-cli --features onnx --locked + ``` +- Installing ONNX runtime. + ```bash + pip install onnxruntime + ``` + +Run App to start ingesting data into Dozer. + +```bash +dozer run app +``` +### To use ONNX on Dozer Cloud + +``` +dozer cloud deploy -c dozer-config.yaml -c model.onnx +``` + +## Trying it out + +To test a ONNX sample, clone the `dozer-samples` GitHub repo and follow the steps described [here](https://github.com/getdozer/dozer-samples/tree/main/usecases/onnx) \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index e30ffb0..7d5849b 100644 --- a/sidebars.js +++ b/sidebars.js @@ -128,8 +128,18 @@ const sidebars = { 'transforming-data/windowing' ] }, - - + { + type: 'category', + label: 'User Defined Functions', + link: { + type: 'generated-index', + title: 'User Defined Functions', + }, + items: [ + 'udfs/lambda-functions', + 'udfs/onnx', + ] + }, { type: 'category', label: 'Accessing Data', @@ -145,18 +155,6 @@ const sidebars = { 'accessing-data/authorization' ] }, - //'lambda-functions', - // { - // type: 'category', - // label: 'Deployment', - // link: { - // type: 'doc', - // id: 'deployment', - // }, - // items: [] - // } - - ], }; From b44a90a3d12687316904e2e7870b370338333239 Mon Sep 17 00:00:00 2001 From: Mrunmay Shelar Date: Mon, 22 Jan 2024 12:09:28 +0530 Subject: [PATCH 3/3] docs: added new endpoints docs --- docs/configuration/api-endpoints.md | 19 +++++++------------ .../cloud/adding-transformations.mdx | 17 +++++++++-------- .../cloud/connecting-to-sources.mdx | 5 ++--- .../core/adding-transformations.mdx | 5 ++--- .../core/connecting-to-sources.mdx | 16 ++++++---------- 5 files changed, 26 insertions(+), 36 deletions(-) diff --git a/docs/configuration/api-endpoints.md b/docs/configuration/api-endpoints.md index cbf8ffe..8553e54 100644 --- a/docs/configuration/api-endpoints.md +++ b/docs/configuration/api-endpoints.md @@ -3,23 +3,18 @@ The endpoint configuration defines how Dozer should expose gRPC/REST endpoints. ```yaml endpoints: - - name: trips_cache - path: /trips - table_name: trips_cache - index: - ... - conflict_resolution: - ... + - table_name: trips_cache + kind: !Api + path: /trips ``` ### Parameters | Name | Type | Description | |-----------------------|--------------|-------------------------------------------------------------------------------------------------------------------------------------| -| `name` | String | The designated name of the endpoint. | -| `path` | String | Determines the route or path for the REST endpoint. | -| `table_name` | String | Identifies the name of the table in the source or in the SQL that this endpoint is set to expose. | -| [`index`](#indexes) | Object | An optional section that describes the index configuration for this endpoint, specifying primary and secondary indexes and whether to skip default configurations. | -| [`conflict_resolution`](#conflicts-resolution) | Object | An optional section that outlines the strategies to handle potential data conflicts for this endpoint. | +| `table_name` | String | Identifies the name of the table in the source or in the SQL that this endpoint is set to expose. +| `kind` | String | Determines the sink used for the endpoint. For example, `!Dummy`, `!Aerospike`, `!Snowflake` | +| `path` | String | Determines the route or path for the REST endpoint. + ## Indexes The `index` section of the endpoint configuration in Dozer determines how indexing is managed for the exposed endpoint. Appropriate indexing ensures quick data retrieval and can greatly improve query performance. diff --git a/docs/getting_started/cloud/adding-transformations.mdx b/docs/getting_started/cloud/adding-transformations.mdx index 897f8c9..0d10e31 100644 --- a/docs/getting_started/cloud/adding-transformations.mdx +++ b/docs/getting_started/cloud/adding-transformations.mdx @@ -41,16 +41,17 @@ To expose the result of this query as an API we will also need to add an additio ```yaml endpoints: - - name: ticker_analysis - path: /analysis/ticker - table_name: ticker_analysis + - table_name: ticker_analysis + kind: !Dummy - - name: daily_analysis - path: /analysis/daily - table_name: daily_analysis + - table_name: daily_analysis + kind: !Dummy - - name: highest_daily_close - path: /analysis/highest_daily_close + - table_name: highest_daily_close + kind: !Dummy + + - table_name: lowest_daily_close + kind: !Dummy ``` diff --git a/docs/getting_started/cloud/connecting-to-sources.mdx b/docs/getting_started/cloud/connecting-to-sources.mdx index a0276b5..cb047c9 100644 --- a/docs/getting_started/cloud/connecting-to-sources.mdx +++ b/docs/getting_started/cloud/connecting-to-sources.mdx @@ -40,9 +40,8 @@ connections: extension: .csv name: s3 endpoints: -- name: stocks - table_name: stocks - path: /stocks + - table_name: stocks + kind: !Dummy ``` diff --git a/docs/getting_started/core/adding-transformations.mdx b/docs/getting_started/core/adding-transformations.mdx index de38729..3aa043e 100644 --- a/docs/getting_started/core/adding-transformations.mdx +++ b/docs/getting_started/core/adding-transformations.mdx @@ -26,9 +26,8 @@ To expose the result of this query as an API we will also need to add an additio ```yaml endpoints: - - name: avg_fares - path: /avg_fares - table_name: avg_fares + - table_name: avg_fares + kind: !Dummy ``` diff --git a/docs/getting_started/core/connecting-to-sources.mdx b/docs/getting_started/core/connecting-to-sources.mdx index c3810db..4d490a2 100644 --- a/docs/getting_started/core/connecting-to-sources.mdx +++ b/docs/getting_started/core/connecting-to-sources.mdx @@ -47,9 +47,8 @@ sources: connection: local_storage endpoints: - - name: trips - path: /trips - table_name: trips + - table_name: trips + kind: !Dummy ``` Now download some sample trip data and copy it to the `data/trips` directory: @@ -162,14 +161,11 @@ sources: connection: pg endpoints: - - name: trips - path: /trips - table_name: trips - - - name: zones - path: /zoness - table_name: zones + - table_name: trips + kind: !Dummy + - table_name: zones + kind: !Dummy ```