diff --git a/docker/.env.docker b/docker/.env.docker deleted file mode 100644 index 4c1b64550..000000000 --- a/docker/.env.docker +++ /dev/null @@ -1,3 +0,0 @@ -API_URL=http://starlake-api:9000 -SL_PORT=80 -ORCHESTRATOR_URL=http://starlake-airflow:8080 diff --git a/docker/Dockerfile_airflow b/docker/Dockerfile_airflow deleted file mode 100644 index 38936412a..000000000 --- a/docker/Dockerfile_airflow +++ /dev/null @@ -1,31 +0,0 @@ -# Use the official Apache Airflow image as the base -FROM apache/airflow:latest - -# Switch to root user to install additional packages -USER root - -# Install docker client and NFS client utilities -RUN apt-get update \ - && apt-get install -y nfs-common \ - docker.io \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -ADD conf/airflow/webserver_config.py /opt/airflow/webserver_config.py - -# Required to mount NFS volumes -RUN echo "airflow ALL=(ALL:ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow - -# Install SL CLI -RUN mkdir -p /app/starlake -COPY scripts/docker/starlake.sh /app/starlake/starlake -RUN chmod +x /app/starlake/starlake -RUN ln -s /app/starlake/starlake /usr/local/bin/starlake - -# Switch back to the airflow user -USER airflow - -# Install SL Python libraries -RUN pip install --no-cache-dir \ - starlake-orchestration[airflow] --upgrade - diff --git a/docker/Dockerfile_dagster b/docker/Dockerfile_dagster deleted file mode 100644 index e57a9dba3..000000000 --- a/docker/Dockerfile_dagster +++ /dev/null @@ -1,50 +0,0 @@ -FROM debian:11-slim - -# Update package list and install docker client, NFS client utilities and Python 3 -RUN apt-get update \ - && apt-get install -y \ - nfs-common \ - docker.io \ - python3 \ - python3-pip \ - vim \ - cron \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN pip3 install \ - dagster \ - dagster-webserver \ - dagster-postgres \ - dagster_shell \ - dagster-docker - -# Change working directory -ENV DAGSTER_HOME=/opt/dagster/home - -RUN mkdir -p ${DAGSTER_HOME} - -WORKDIR ${DAGSTER_HOME} - -COPY conf/dagster/dagster.yaml . - -# Install SL CLI -RUN mkdir -p /app/starlake -COPY scripts/docker/starlake.sh /app/starlake/starlake -RUN chmod +x /app/starlake/starlake -RUN ln -s /app/starlake/starlake /usr/local/bin/starlake - -RUN mkdir -p /opt/dagster/app/dags - -WORKDIR /opt/dagster/app - -COPY conf/dagster/pyproject.toml . -COPY scripts/dagster/code_locations.py ./dagster_code_locations.py -RUN chmod +x dagster_code_locations.py - -# Add the cron job -RUN echo "* * * * * root cd /opt/dagster/app && python3 dagster_code_locations.py >> /tmp/dagster_code_locations.log 2>&1" >> /etc/cron.d/dagster_code_locations - -EXPOSE 3000 - -ENTRYPOINT ["dagster-webserver", "-h", "0.0.0.0", "-p", "3000"] \ No newline at end of file diff --git a/docker/Dockerfile_nas b/docker/Dockerfile_nas deleted file mode 100644 index f44da09c3..000000000 --- a/docker/Dockerfile_nas +++ /dev/null @@ -1,25 +0,0 @@ -# Utiliser une image de base légère -FROM alpine:latest - -# Installer le serveur NFS -RUN apk add --no-cache nfs-utils - -# Créer les répertoires à partager -RUN mkdir -p /projects /projects/dags - -# Accorder les permissions nécessaires -RUN chmod 777 /projects /projects/dags - -# Configurer les exports NFS -RUN echo "/projects *(rw,sync,no_subtree_check,no_root_squash,insecure)" > /etc/exports && \ - echo "/projects/dags *(rw,sync,no_subtree_check,no_root_squash,insecure)" >> /etc/exports - -# Script de démarrage pour les services NFS -COPY scripts/nfs/start.sh /start-nfs.sh -RUN chmod +x /start-nfs.sh - -# Exposer le port NFS -EXPOSE 2049 - -# Commande pour démarrer le serveur NFS -CMD ["/start-nfs.sh"] diff --git a/docker/Dockerfile_projects b/docker/Dockerfile_projects deleted file mode 100644 index 65c0b64ed..000000000 --- a/docker/Dockerfile_projects +++ /dev/null @@ -1,14 +0,0 @@ -FROM alpine:latest - -RUN apk add --no-cache --no-progress \ - nfs-utils \ - postgresql-client \ - unzip \ - bash - -RUN mkdir -p /projects - -COPY scripts/projects/entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh - -ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/docker/conf/airflow/webserver_config.py b/docker/conf/airflow/webserver_config.py deleted file mode 100644 index 4d1714db7..000000000 --- a/docker/conf/airflow/webserver_config.py +++ /dev/null @@ -1,133 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Default configuration for the Airflow webserver.""" - -from __future__ import annotations - -import os - -from flask_appbuilder.const import AUTH_DB - -# from airflow.www.fab_security.manager import AUTH_LDAP -# from airflow.www.fab_security.manager import AUTH_OAUTH -# from airflow.www.fab_security.manager import AUTH_OID -# from airflow.www.fab_security.manager import AUTH_REMOTE_USER - - -basedir = os.path.abspath(os.path.dirname(__file__)) - -# Flask-WTF flag for CSRF -WTF_CSRF_ENABLED = True -WTF_CSRF_TIME_LIMIT = None - -# ---------------------------------------------------- -# AUTHENTICATION CONFIG -# ---------------------------------------------------- -# For details on how to set up each of the following authentication, see -# http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods -# for details. - -# The authentication type -# AUTH_OID : Is for OpenID -# AUTH_DB : Is for database -# AUTH_LDAP : Is for LDAP -# AUTH_REMOTE_USER : Is for using REMOTE_USER from web server -# AUTH_OAUTH : Is for OAuth -AUTH_TYPE = AUTH_DB - -# Uncomment to setup Full admin role name -# AUTH_ROLE_ADMIN = 'Admin' - -# Uncomment and set to desired role to enable access without authentication -#AUTH_ROLE_PUBLIC = 'Viewer' -AUTH_ROLE_PUBLIC = 'Admin' - -# Will allow user self registration -# AUTH_USER_REGISTRATION = True - -# The recaptcha it's automatically enabled for user self registration is active and the keys are necessary -# RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY -# RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY - -# Config for Flask-Mail necessary for user self registration -# MAIL_SERVER = 'smtp.gmail.com' -# MAIL_USE_TLS = True -# MAIL_USERNAME = 'yourappemail@gmail.com' -# MAIL_PASSWORD = 'passwordformail' -# MAIL_DEFAULT_SENDER = 'sender@gmail.com' - -# The default user self registration role -# AUTH_USER_REGISTRATION_ROLE = "Public" - -# When using OAuth Auth, uncomment to setup provider(s) info -# Google OAuth example: -# OAUTH_PROVIDERS = [{ -# 'name':'google', -# 'token_key':'access_token', -# 'icon':'fa-google', -# 'remote_app': { -# 'api_base_url':'https://www.googleapis.com/oauth2/v2/', -# 'client_kwargs':{ -# 'scope': 'email profile' -# }, -# 'access_token_url':'https://accounts.google.com/o/oauth2/token', -# 'authorize_url':'https://accounts.google.com/o/oauth2/auth', -# 'request_token_url': None, -# 'client_id': GOOGLE_KEY, -# 'client_secret': GOOGLE_SECRET_KEY, -# } -# }] - -# When using LDAP Auth, setup the ldap server -# AUTH_LDAP_SERVER = "ldap://ldapserver.new" - -# When using OpenID Auth, uncomment to setup OpenID providers. -# example for OpenID authentication -# OPENID_PROVIDERS = [ -# { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' }, -# { 'name': 'AOL', 'url': 'http://openid.aol.com/' }, -# { 'name': 'Flickr', 'url': 'http://www.flickr.com/' }, -# { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }] - -# ---------------------------------------------------- -# Theme CONFIG -# ---------------------------------------------------- -# Flask App Builder comes up with a number of predefined themes -# that you can use for Apache Airflow. -# http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes -# Please make sure to remove "navbar_color" configuration from airflow.cfg -# in order to fully utilize the theme. (or use that property in conjunction with theme) -# APP_THEME = "bootstrap-theme.css" # default bootstrap -# APP_THEME = "amelia.css" -# APP_THEME = "cerulean.css" -# APP_THEME = "cosmo.css" -# APP_THEME = "cyborg.css" -# APP_THEME = "darkly.css" -# APP_THEME = "flatly.css" -# APP_THEME = "journal.css" -# APP_THEME = "lumen.css" -# APP_THEME = "paper.css" -# APP_THEME = "readable.css" -# APP_THEME = "sandstone.css" -# APP_THEME = "simplex.css" -# APP_THEME = "slate.css" -# APP_THEME = "solar.css" -# APP_THEME = "spacelab.css" -# APP_THEME = "superhero.css" -# APP_THEME = "united.css" -# APP_THEME = "yeti.css" diff --git a/docker/conf/dagster/dagster.yaml b/docker/conf/dagster/dagster.yaml deleted file mode 100644 index fdc7565ef..000000000 --- a/docker/conf/dagster/dagster.yaml +++ /dev/null @@ -1,86 +0,0 @@ -run_storage: - module: dagster_postgres.run_storage - class: PostgresRunStorage - config: - postgres_db: - username: - env: DAGSTER_PG_USERNAME - password: - env: DAGSTER_PG_PASSWORD - hostname: - env: DAGSTER_PG_HOST - db_name: - env: DAGSTER_PG_DB - port: 5432 -event_log_storage: - module: dagster_postgres.event_log - class: PostgresEventLogStorage - config: - postgres_db: - username: - env: DAGSTER_PG_USERNAME - password: - env: DAGSTER_PG_PASSWORD - hostname: - env: DAGSTER_PG_HOST - db_name: - env: DAGSTER_PG_DB - port: 5432 -schedule_storage: - module: dagster_postgres.schedule_storage - class: PostgresScheduleStorage - config: - postgres_db: - username: - env: DAGSTER_PG_USERNAME - password: - env: DAGSTER_PG_PASSWORD - hostname: - env: DAGSTER_PG_HOST - db_name: - env: DAGSTER_PG_DB - port: 5432 -compute_logs: - module: dagster.core.storage.local_compute_log_manager - class: LocalComputeLogManager - config: - base_dir: /opt/dagster/app/logs -# run_coordinator: -# module: dagster.core.run_coordinator -# class: QueuedRunCoordinator -# config: -# max_concurrent_runs: 5 -telemetry: - enabled: false -nux: - enabled: false - -# storage: -# postgres: -# postgres_db: -# username: -# env: DAGSTER_PG_USERNAME -# password: -# env: DAGSTER_PG_PASSWORD -# hostname: -# env: DAGSTER_PG_HOST -# db_name: -# env: DAGSTER_PG_DB -# port: 5432 -# run_launcher: -# module: dagster_docker -# class: DockerRunLauncher -# retention: -# schedule: -# purge_after_days: 90 # sets retention policy for schedule ticks of all types -# sensor: -# purge_after_days: -# skipped: 7 -# failure: 30 -# success: -1 # keep success ticks indefinitely -# sensors: -# use_threads: true -# num_workers: 8 -# schedules: -# use_threads: true -# num_workers: 8 \ No newline at end of file diff --git a/docker/conf/dagster/pyproject.toml b/docker/conf/dagster/pyproject.toml deleted file mode 100644 index 2765dd0c4..000000000 --- a/docker/conf/dagster/pyproject.toml +++ /dev/null @@ -1 +0,0 @@ -[tool.dagster] diff --git a/docker/conf/hydra/postgresql.conf b/docker/conf/hydra/postgresql.conf deleted file mode 100644 index 267745c35..000000000 --- a/docker/conf/hydra/postgresql.conf +++ /dev/null @@ -1,815 +0,0 @@ -# ----------------------------- -# PostgreSQL configuration file -# ----------------------------- -# -# This file consists of lines of the form: -# -# name = value -# -# (The "=" is optional.) Whitespace may be used. Comments are introduced with -# "#" anywhere on a line. The complete list of parameter names and allowed -# values can be found in the PostgreSQL documentation. -# -# The commented-out settings shown in this file represent the default values. -# Re-commenting a setting is NOT sufficient to revert it to the default value; -# you need to reload the server. -# -# This file is read on server startup and when the server receives a SIGHUP -# signal. If you edit the file on a running system, you have to SIGHUP the -# server for the changes to take effect, run "pg_ctl reload", or execute -# "SELECT pg_reload_conf()". Some parameters, which are marked below, -# require a server shutdown and restart to take effect. -# -# Any parameter can also be given as a command-line option to the server, e.g., -# "postgres -c log_connections=on". Some parameters can be changed at run time -# with the "SET" SQL command. -# -# Memory units: B = bytes Time units: us = microseconds -# kB = kilobytes ms = milliseconds -# MB = megabytes s = seconds -# GB = gigabytes min = minutes -# TB = terabytes h = hours -# d = days - - -#------------------------------------------------------------------------------ -# FILE LOCATIONS -#------------------------------------------------------------------------------ - -# The default values of these variables are driven from the -D command-line -# option or PGDATA environment variable, represented here as ConfigDir. - -#data_directory = 'ConfigDir' # use data in another directory - # (change requires restart) -#hba_file = 'ConfigDir/pg_hba.conf' # host-based authentication file - # (change requires restart) -#ident_file = 'ConfigDir/pg_ident.conf' # ident configuration file - # (change requires restart) - -# If external_pid_file is not explicitly set, no extra PID file is written. -#external_pid_file = '' # write an extra PID file - # (change requires restart) - - -#------------------------------------------------------------------------------ -# CONNECTIONS AND AUTHENTICATION -#------------------------------------------------------------------------------ - -# - Connection Settings - - -listen_addresses = '*' - # comma-separated list of addresses; - # defaults to 'localhost'; use '*' for all - # (change requires restart) -#port = 5432 # (change requires restart) -#max_connections = 100 # (change requires restart) -#superuser_reserved_connections = 3 # (change requires restart) -#unix_socket_directories = '/tmp' # comma-separated list of directories - # (change requires restart) -#unix_socket_group = '' # (change requires restart) -#unix_socket_permissions = 0777 # begin with 0 to use octal notation - # (change requires restart) -#bonjour = off # advertise server via Bonjour - # (change requires restart) -#bonjour_name = '' # defaults to the computer name - # (change requires restart) - -# - TCP settings - -# see "man tcp" for details - -#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds; - # 0 selects the system default -#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds; - # 0 selects the system default -#tcp_keepalives_count = 0 # TCP_KEEPCNT; - # 0 selects the system default -#tcp_user_timeout = 0 # TCP_USER_TIMEOUT, in milliseconds; - # 0 selects the system default - -#client_connection_check_interval = 0 # time between checks for client - # disconnection while running queries; - # 0 for never - -# - Authentication - - -#authentication_timeout = 1min # 1s-600s -#password_encryption = scram-sha-256 # scram-sha-256 or md5 -#db_user_namespace = off - -# GSSAPI using Kerberos -#krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab' -#krb_caseins_users = off - -# - SSL - - -#ssl = off -#ssl_ca_file = '' -#ssl_cert_file = 'server.crt' -#ssl_crl_file = '' -#ssl_crl_dir = '' -#ssl_key_file = 'server.key' -#ssl_ciphers = 'HIGH:MEDIUM:+3DES:!aNULL' # allowed SSL ciphers -#ssl_prefer_server_ciphers = on -#ssl_ecdh_curve = 'prime256v1' -#ssl_min_protocol_version = 'TLSv1.2' -#ssl_max_protocol_version = '' -#ssl_dh_params_file = '' -#ssl_passphrase_command = '' -#ssl_passphrase_command_supports_reload = off - - -#------------------------------------------------------------------------------ -# RESOURCE USAGE (except WAL) -#------------------------------------------------------------------------------ - -# - Memory - - -#shared_buffers = 128MB # min 128kB - # (change requires restart) -#huge_pages = try # on, off, or try - # (change requires restart) -#huge_page_size = 0 # zero for system default - # (change requires restart) -#temp_buffers = 8MB # min 800kB -#max_prepared_transactions = 0 # zero disables the feature - # (change requires restart) -# Caution: it is not advisable to set max_prepared_transactions nonzero unless -# you actively intend to use prepared transactions. -#work_mem = 4MB # min 64kB -#hash_mem_multiplier = 2.0 # 1-1000.0 multiplier on hash table work_mem -#maintenance_work_mem = 64MB # min 1MB -#autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem -#logical_decoding_work_mem = 64MB # min 64kB -#max_stack_depth = 2MB # min 100kB -#shared_memory_type = mmap # the default is the first option - # supported by the operating system: - # mmap - # sysv - # windows - # (change requires restart) -#dynamic_shared_memory_type = posix # the default is usually the first option - # supported by the operating system: - # posix - # sysv - # windows - # mmap - # (change requires restart) -#min_dynamic_shared_memory = 0MB # (change requires restart) - -# - Disk - - -#temp_file_limit = -1 # limits per-process temp file space - # in kilobytes, or -1 for no limit - -# - Kernel Resources - - -#max_files_per_process = 1000 # min 64 - # (change requires restart) - -# - Cost-Based Vacuum Delay - - -#vacuum_cost_delay = 0 # 0-100 milliseconds (0 disables) -#vacuum_cost_page_hit = 1 # 0-10000 credits -#vacuum_cost_page_miss = 2 # 0-10000 credits -#vacuum_cost_page_dirty = 20 # 0-10000 credits -#vacuum_cost_limit = 200 # 1-10000 credits - -# - Background Writer - - -#bgwriter_delay = 200ms # 10-10000ms between rounds -#bgwriter_lru_maxpages = 100 # max buffers written/round, 0 disables -#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round -#bgwriter_flush_after = 0 # measured in pages, 0 disables - -# - Asynchronous Behavior - - -#backend_flush_after = 0 # measured in pages, 0 disables -#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching -#maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching -#max_worker_processes = 8 # (change requires restart) -#max_parallel_workers_per_gather = 2 # taken from max_parallel_workers -#max_parallel_maintenance_workers = 2 # taken from max_parallel_workers -#max_parallel_workers = 8 # maximum number of max_worker_processes that - # can be used in parallel operations -#parallel_leader_participation = on -#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate - # (change requires restart) - - -#------------------------------------------------------------------------------ -# WRITE-AHEAD LOG -#------------------------------------------------------------------------------ - -# - Settings - - -#wal_level = replica # minimal, replica, or logical - # (change requires restart) -#fsync = on # flush data to disk for crash safety - # (turning this off can cause - # unrecoverable data corruption) -#synchronous_commit = on # synchronization level; - # off, local, remote_write, remote_apply, or on -#wal_sync_method = fsync # the default is the first option - # supported by the operating system: - # open_datasync - # fdatasync (default on Linux and FreeBSD) - # fsync - # fsync_writethrough - # open_sync -#full_page_writes = on # recover from partial page writes -#wal_log_hints = off # also do full page writes of non-critical updates - # (change requires restart) -#wal_compression = off # enables compression of full-page writes; - # off, pglz, lz4, zstd, or on -#wal_init_zero = on # zero-fill new WAL files -#wal_recycle = on # recycle WAL files -#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers - # (change requires restart) -#wal_writer_delay = 200ms # 1-10000 milliseconds -#wal_writer_flush_after = 1MB # measured in pages, 0 disables -#wal_skip_threshold = 2MB - -#commit_delay = 0 # range 0-100000, in microseconds -#commit_siblings = 5 # range 1-1000 - -# - Checkpoints - - -#checkpoint_timeout = 5min # range 30s-1d -#checkpoint_completion_target = 0.9 # checkpoint target duration, 0.0 - 1.0 -#checkpoint_flush_after = 0 # measured in pages, 0 disables -#checkpoint_warning = 30s # 0 disables -#max_wal_size = 1GB -#min_wal_size = 80MB - -# - Prefetching during recovery - - -#recovery_prefetch = try # prefetch pages referenced in the WAL? -#wal_decode_buffer_size = 512kB # lookahead window used for prefetching - # (change requires restart) - -# - Archiving - - -#archive_mode = off # enables archiving; off, on, or always - # (change requires restart) -#archive_library = '' # library to use to archive a logfile segment - # (empty string indicates archive_command should - # be used) -#archive_command = '' # command to use to archive a logfile segment - # placeholders: %p = path of file to archive - # %f = file name only - # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f' -#archive_timeout = 0 # force a logfile segment switch after this - # number of seconds; 0 disables - -# - Archive Recovery - - -# These are only used in recovery mode. - -#restore_command = '' # command to use to restore an archived logfile segment - # placeholders: %p = path of file to restore - # %f = file name only - # e.g. 'cp /mnt/server/archivedir/%f %p' -#archive_cleanup_command = '' # command to execute at every restartpoint -#recovery_end_command = '' # command to execute at completion of recovery - -# - Recovery Target - - -# Set these only when performing a targeted recovery. - -#recovery_target = '' # 'immediate' to end recovery as soon as a - # consistent state is reached - # (change requires restart) -#recovery_target_name = '' # the named restore point to which recovery will proceed - # (change requires restart) -#recovery_target_time = '' # the time stamp up to which recovery will proceed - # (change requires restart) -#recovery_target_xid = '' # the transaction ID up to which recovery will proceed - # (change requires restart) -#recovery_target_lsn = '' # the WAL LSN up to which recovery will proceed - # (change requires restart) -#recovery_target_inclusive = on # Specifies whether to stop: - # just after the specified recovery target (on) - # just before the recovery target (off) - # (change requires restart) -#recovery_target_timeline = 'latest' # 'current', 'latest', or timeline ID - # (change requires restart) -#recovery_target_action = 'pause' # 'pause', 'promote', 'shutdown' - # (change requires restart) - - -#------------------------------------------------------------------------------ -# REPLICATION -#------------------------------------------------------------------------------ - -# - Sending Servers - - -# Set these on the primary and on any standby that will send replication data. - -#max_wal_senders = 10 # max number of walsender processes - # (change requires restart) -#max_replication_slots = 10 # max number of replication slots - # (change requires restart) -#wal_keep_size = 0 # in megabytes; 0 disables -#max_slot_wal_keep_size = -1 # in megabytes; -1 disables -#wal_sender_timeout = 60s # in milliseconds; 0 disables -#track_commit_timestamp = off # collect timestamp of transaction commit - # (change requires restart) - -# - Primary Server - - -# These settings are ignored on a standby server. - -#synchronous_standby_names = '' # standby servers that provide sync rep - # method to choose sync standbys, number of sync standbys, - # and comma-separated list of application_name - # from standby(s); '*' = all -#vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed - -# - Standby Servers - - -# These settings are ignored on a primary server. - -#primary_conninfo = '' # connection string to sending server -#primary_slot_name = '' # replication slot on sending server -#promote_trigger_file = '' # file name whose presence ends recovery -#hot_standby = on # "off" disallows queries during recovery - # (change requires restart) -#max_standby_archive_delay = 30s # max delay before canceling queries - # when reading WAL from archive; - # -1 allows indefinite delay -#max_standby_streaming_delay = 30s # max delay before canceling queries - # when reading streaming WAL; - # -1 allows indefinite delay -#wal_receiver_create_temp_slot = off # create temp slot if primary_slot_name - # is not set -#wal_receiver_status_interval = 10s # send replies at least this often - # 0 disables -#hot_standby_feedback = off # send info from standby to prevent - # query conflicts -#wal_receiver_timeout = 60s # time that receiver waits for - # communication from primary - # in milliseconds; 0 disables -#wal_retrieve_retry_interval = 5s # time to wait before retrying to - # retrieve WAL after a failed attempt -#recovery_min_apply_delay = 0 # minimum delay for applying changes during recovery - -# - Subscribers - - -# These settings are ignored on a publisher. - -#max_logical_replication_workers = 4 # taken from max_worker_processes - # (change requires restart) -#max_sync_workers_per_subscription = 2 # taken from max_logical_replication_workers - - -#------------------------------------------------------------------------------ -# QUERY TUNING -#------------------------------------------------------------------------------ - -# - Planner Method Configuration - - -#enable_async_append = on -#enable_bitmapscan = on -#enable_gathermerge = on -#enable_hashagg = on -#enable_hashjoin = on -#enable_incremental_sort = on -#enable_indexscan = on -#enable_indexonlyscan = on -#enable_material = on -#enable_memoize = on -#enable_mergejoin = on -#enable_nestloop = on -#enable_parallel_append = on -#enable_parallel_hash = on -#enable_partition_pruning = on -#enable_partitionwise_join = off -#enable_partitionwise_aggregate = off -#enable_seqscan = on -#enable_sort = on -#enable_tidscan = on - -# - Planner Cost Constants - - -#seq_page_cost = 1.0 # measured on an arbitrary scale -#random_page_cost = 4.0 # same scale as above -#cpu_tuple_cost = 0.01 # same scale as above -#cpu_index_tuple_cost = 0.005 # same scale as above -#cpu_operator_cost = 0.0025 # same scale as above -#parallel_setup_cost = 1000.0 # same scale as above -#parallel_tuple_cost = 0.1 # same scale as above -#min_parallel_table_scan_size = 8MB -#min_parallel_index_scan_size = 512kB -#effective_cache_size = 4GB - -#jit_above_cost = 100000 # perform JIT compilation if available - # and query more expensive than this; - # -1 disables -#jit_inline_above_cost = 500000 # inline small functions if query is - # more expensive than this; -1 disables -#jit_optimize_above_cost = 500000 # use expensive JIT optimizations if - # query is more expensive than this; - # -1 disables - -# - Genetic Query Optimizer - - -#geqo = on -#geqo_threshold = 12 -#geqo_effort = 5 # range 1-10 -#geqo_pool_size = 0 # selects default based on effort -#geqo_generations = 0 # selects default based on effort -#geqo_selection_bias = 2.0 # range 1.5-2.0 -#geqo_seed = 0.0 # range 0.0-1.0 - -# - Other Planner Options - - -#default_statistics_target = 100 # range 1-10000 -#constraint_exclusion = partition # on, off, or partition -#cursor_tuple_fraction = 0.1 # range 0.0-1.0 -#from_collapse_limit = 8 -#jit = on # allow JIT compilation -#join_collapse_limit = 8 # 1 disables collapsing of explicit - # JOIN clauses -#plan_cache_mode = auto # auto, force_generic_plan or - # force_custom_plan -#recursive_worktable_factor = 10.0 # range 0.001-1000000 - - -#------------------------------------------------------------------------------ -# REPORTING AND LOGGING -#------------------------------------------------------------------------------ - -# - Where to Log - - -#log_destination = 'stderr' # Valid values are combinations of - # stderr, csvlog, jsonlog, syslog, and - # eventlog, depending on platform. - # csvlog and jsonlog require - # logging_collector to be on. - -# This is used when logging to stderr: -#logging_collector = off # Enable capturing of stderr, jsonlog, - # and csvlog into log files. Required - # to be on for csvlogs and jsonlogs. - # (change requires restart) - -# These are only used if logging_collector is on: -#log_directory = 'log' # directory where log files are written, - # can be absolute or relative to PGDATA -#log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern, - # can include strftime() escapes -#log_file_mode = 0600 # creation mode for log files, - # begin with 0 to use octal notation -#log_rotation_age = 1d # Automatic rotation of logfiles will - # happen after that time. 0 disables. -#log_rotation_size = 10MB # Automatic rotation of logfiles will - # happen after that much log output. - # 0 disables. -#log_truncate_on_rotation = off # If on, an existing log file with the - # same name as the new log file will be - # truncated rather than appended to. - # But such truncation only occurs on - # time-driven rotation, not on restarts - # or size-driven rotation. Default is - # off, meaning append to existing files - # in all cases. - -# These are relevant when logging to syslog: -#syslog_facility = 'LOCAL0' -#syslog_ident = 'postgres' -#syslog_sequence_numbers = on -#syslog_split_messages = on - -# This is only relevant when logging to eventlog (Windows): -# (change requires restart) -#event_source = 'PostgreSQL' - -# - When to Log - - -#log_min_messages = warning # values in order of decreasing detail: - # debug5 - # debug4 - # debug3 - # debug2 - # debug1 - # info - # notice - # warning - # error - # log - # fatal - # panic - -#log_min_error_statement = error # values in order of decreasing detail: - # debug5 - # debug4 - # debug3 - # debug2 - # debug1 - # info - # notice - # warning - # error - # log - # fatal - # panic (effectively off) - -#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements - # and their durations, > 0 logs only - # statements running at least this number - # of milliseconds - -#log_min_duration_sample = -1 # -1 is disabled, 0 logs a sample of statements - # and their durations, > 0 logs only a sample of - # statements running at least this number - # of milliseconds; - # sample fraction is determined by log_statement_sample_rate - -#log_statement_sample_rate = 1.0 # fraction of logged statements exceeding - # log_min_duration_sample to be logged; - # 1.0 logs all such statements, 0.0 never logs - - -#log_transaction_sample_rate = 0.0 # fraction of transactions whose statements - # are logged regardless of their duration; 1.0 logs all - # statements from all transactions, 0.0 never logs - -#log_startup_progress_interval = 10s # Time between progress updates for - # long-running startup operations. - # 0 disables the feature, > 0 indicates - # the interval in milliseconds. - -# - What to Log - - -#debug_print_parse = off -#debug_print_rewritten = off -#debug_print_plan = off -#debug_pretty_print = on -#log_autovacuum_min_duration = 10min # log autovacuum activity; - # -1 disables, 0 logs all actions and - # their durations, > 0 logs only - # actions running at least this number - # of milliseconds. -#log_checkpoints = on -#log_connections = off -#log_disconnections = off -#log_duration = off -#log_error_verbosity = default # terse, default, or verbose messages -#log_hostname = off -#log_line_prefix = '%m [%p] ' # special values: - # %a = application name - # %u = user name - # %d = database name - # %r = remote host and port - # %h = remote host - # %b = backend type - # %p = process ID - # %P = process ID of parallel group leader - # %t = timestamp without milliseconds - # %m = timestamp with milliseconds - # %n = timestamp with milliseconds (as a Unix epoch) - # %Q = query ID (0 if none or not computed) - # %i = command tag - # %e = SQL state - # %c = session ID - # %l = session line number - # %s = session start timestamp - # %v = virtual transaction ID - # %x = transaction ID (0 if none) - # %q = stop here in non-session - # processes - # %% = '%' - # e.g. '<%u%%%d> ' -#log_lock_waits = off # log lock waits >= deadlock_timeout -#log_recovery_conflict_waits = off # log standby recovery conflict waits - # >= deadlock_timeout -#log_parameter_max_length = -1 # when logging statements, limit logged - # bind-parameter values to N bytes; - # -1 means print in full, 0 disables -#log_parameter_max_length_on_error = 0 # when logging an error, limit logged - # bind-parameter values to N bytes; - # -1 means print in full, 0 disables -#log_statement = 'none' # none, ddl, mod, all -#log_replication_commands = off -#log_temp_files = -1 # log temporary files equal or larger - # than the specified size in kilobytes; - # -1 disables, 0 logs all temp files -#log_timezone = 'GMT' - - -#------------------------------------------------------------------------------ -# PROCESS TITLE -#------------------------------------------------------------------------------ - -#cluster_name = '' # added to process titles if nonempty - # (change requires restart) -#update_process_title = on - - -#------------------------------------------------------------------------------ -# STATISTICS -#------------------------------------------------------------------------------ - -# - Cumulative Query and Index Statistics - - -#track_activities = on -#track_activity_query_size = 1024 # (change requires restart) -#track_counts = on -#track_io_timing = off -#track_wal_io_timing = off -#track_functions = none # none, pl, all -#stats_fetch_consistency = cache - - -# - Monitoring - - -#compute_query_id = auto -#log_statement_stats = off -#log_parser_stats = off -#log_planner_stats = off -#log_executor_stats = off - - -#------------------------------------------------------------------------------ -# AUTOVACUUM -#------------------------------------------------------------------------------ - -#autovacuum = on # Enable autovacuum subprocess? 'on' - # requires track_counts to also be on. -#autovacuum_max_workers = 3 # max number of autovacuum subprocesses - # (change requires restart) -#autovacuum_naptime = 1min # time between autovacuum runs -#autovacuum_vacuum_threshold = 50 # min number of row updates before - # vacuum -#autovacuum_vacuum_insert_threshold = 1000 # min number of row inserts - # before vacuum; -1 disables insert - # vacuums -#autovacuum_analyze_threshold = 50 # min number of row updates before - # analyze -autovacuum_vacuum_scale_factor = 0.05 # fraction of table size before vacuum -#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table - # size before insert vacuum -autovacuum_analyze_scale_factor = 0.02 # fraction of table size before analyze -#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum - # (change requires restart) -#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age - # before forced vacuum - # (change requires restart) -#autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for - # autovacuum, in milliseconds; - # -1 means use vacuum_cost_delay -#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for - # autovacuum, -1 means use - # vacuum_cost_limit - - -#------------------------------------------------------------------------------ -# CLIENT CONNECTION DEFAULTS -#------------------------------------------------------------------------------ - -# - Statement Behavior - - -#client_min_messages = notice # values in order of decreasing detail: - # debug5 - # debug4 - # debug3 - # debug2 - # debug1 - # log - # notice - # warning - # error -#search_path = '"$user", public' # schema names -#row_security = on -#default_table_access_method = 'heap' -#default_tablespace = '' # a tablespace name, '' uses the default -#default_toast_compression = 'pglz' # 'pglz' or 'lz4' -#temp_tablespaces = '' # a list of tablespace names, '' uses - # only default tablespace -#check_function_bodies = on -#default_transaction_isolation = 'read committed' -#default_transaction_read_only = off -#default_transaction_deferrable = off -#session_replication_role = 'origin' -#statement_timeout = 0 # in milliseconds, 0 is disabled -#lock_timeout = 0 # in milliseconds, 0 is disabled -#idle_in_transaction_session_timeout = 0 # in milliseconds, 0 is disabled -#idle_session_timeout = 0 # in milliseconds, 0 is disabled -#vacuum_freeze_table_age = 150000000 -#vacuum_freeze_min_age = 50000000 -#vacuum_failsafe_age = 1600000000 -#vacuum_multixact_freeze_table_age = 150000000 -#vacuum_multixact_freeze_min_age = 5000000 -#vacuum_multixact_failsafe_age = 1600000000 -#bytea_output = 'hex' # hex, escape -#xmlbinary = 'base64' -#xmloption = 'content' -#gin_pending_list_limit = 4MB - -# - Locale and Formatting - - -#datestyle = 'iso, mdy' -#intervalstyle = 'postgres' -#timezone = 'GMT' -#timezone_abbreviations = 'Default' # Select the set of available time zone - # abbreviations. Currently, there are - # Default - # Australia (historical usage) - # India - # You can create your own file in - # share/timezonesets/. -#extra_float_digits = 1 # min -15, max 3; any value >0 actually - # selects precise output mode -#client_encoding = sql_ascii # actually, defaults to database - # encoding - -# These settings are initialized by initdb, but they can be changed. -#lc_messages = 'C' # locale for system error message - # strings -#lc_monetary = 'C' # locale for monetary formatting -#lc_numeric = 'C' # locale for number formatting -#lc_time = 'C' # locale for time formatting - -# default configuration for text search -#default_text_search_config = 'pg_catalog.simple' - -# - Shared Library Preloading - - -#local_preload_libraries = '' -#session_preload_libraries = '' -#shared_preload_libraries = '' # (change requires restart) -#jit_provider = 'llvmjit' # JIT library to use - -# - Other Defaults - - -#dynamic_library_path = '$libdir' -#extension_destdir = '' # prepend path when loading extensions - # and shared objects (added by Debian) -#gin_fuzzy_search_limit = 0 - - -#------------------------------------------------------------------------------ -# LOCK MANAGEMENT -#------------------------------------------------------------------------------ - -#deadlock_timeout = 1s -#max_locks_per_transaction = 64 # min 10 - # (change requires restart) -#max_pred_locks_per_transaction = 64 # min 10 - # (change requires restart) -#max_pred_locks_per_relation = -2 # negative values mean - # (max_pred_locks_per_transaction - # / -max_pred_locks_per_relation) - 1 -#max_pred_locks_per_page = 2 # min 0 - - -#------------------------------------------------------------------------------ -# VERSION AND PLATFORM COMPATIBILITY -#------------------------------------------------------------------------------ - -# - Previous PostgreSQL Versions - - -#array_nulls = on -#backslash_quote = safe_encoding # on, off, or safe_encoding -#escape_string_warning = on -#lo_compat_privileges = off -#quote_all_identifiers = off -#standard_conforming_strings = on -#synchronize_seqscans = on - -# - Other Platforms and Clients - - -#transform_null_equals = off - - -#------------------------------------------------------------------------------ -# ERROR HANDLING -#------------------------------------------------------------------------------ - -#exit_on_error = off # terminate session on any error? -#restart_after_crash = on # reinitialize after backend crash? -#data_sync_retry = off # retry or panic on failure to fsync - # data? - # (change requires restart) -#recovery_init_sync_method = fsync # fsync, syncfs (Linux 5.8+) - - -#------------------------------------------------------------------------------ -# CONFIG FILE INCLUDES -#------------------------------------------------------------------------------ - -# These options allow settings to be loaded from files other than the -# default postgresql.conf. Note that these are directives, not variable -# assignments, so they can usefully be given more than once. - -#include_dir = '...' # include files ending in '.conf' from - # a directory, e.g., 'conf.d' -#include_if_exists = '...' # include file only if it exists -#include = '...' # include file - - -#------------------------------------------------------------------------------ -# CUSTOMIZED OPTIONS -#------------------------------------------------------------------------------ - -# Add settings for extensions here \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 432d5b1dc..000000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,214 +0,0 @@ -name: starlake -version: '3.8' - -services: - starlake-db: - image: ghcr.io/hydradatabase/hydra:latest - restart: on-failure - container_name: starlake-db - ports: - - ${SL_DB_PORT:-5432}:5432 - environment: - POSTGRES_USER: ${SL_POSTGRES_USER:-dbuser} - POSTGRES_PASSWORD: ${SL_POSTGRES_PASSWORD:-dbuser123} - POSTGRES_DB: ${SL_POSTGRES_DB:-starlake} - AIRFLOW_DB: ${AIRFLOW_DB:-airflow} - DAGSTER_DB: ${DAGSTER_DB:-dagster} - command: postgres -c 'config_file=/etc/postgresql/postgresql.conf' - volumes: - - pgdata:/var/lib/postgresql/data - - ./conf/hydra/postgresql.conf:/etc/postgresql/postgresql.conf - - ./scripts/airflow/init-database.sh:/docker-entrypoint-initdb.d/init-airflow-database.sh - - ./scripts/dagster/init-database.sh:/docker-entrypoint-initdb.d/init-dagster-database.sh - - starlake-nas: - image: starlakeai/starlake-nas:latest - build: - context: . # Assuming Dockerfile_nas is in the current directory - dockerfile: Dockerfile_nas - container_name: starlake-nas - restart: on-failure - privileged: true # Required to access /proc/fs/nfsd - volumes: - - projects_data:/projects - - starlake-init-airflow-db: - image: starlakeai/starlake-airflow:latest - restart: on-failure - build: - context: . # Assuming Dockerfile_airflow is in the current directory - dockerfile: Dockerfile_airflow - container_name: starlake-init-airflow-db - depends_on: - - starlake-db - environment: - AIRFLOW__CORE__EXECUTOR: LocalExecutor - AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://${SL_POSTGRES_USER:-dbuser}:${SL_POSTGRES_PASSWORD:-dbuser123}@starlake-db:5432/${AIRFLOW_DB:-airflow} - AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - INSTALL_MYSQL_CLIENT: 'false' - INSTALL_MSSQL_CLIENT: 'false' - entrypoint: > - /bin/bash -c " - sleep 10 && - airflow db upgrade && - airflow users create --username ${AIRFLOW_USERNAME:-admin} --firstname ${AIRFLOW_FIRSTNAME:-Admin} --lastname ${AIRFLOW_LASTNAME:-User} --role Admin --email ${AIRFLOW_EMAIL:-admin@example.com} --password ${AIRFLOW_PASSWORD:-admin}" - - starlake-airflow: - image: starlakeai/starlake-airflow:latest - build: - context: . # Assuming Dockerfile_airflow is in the current directory - dockerfile: Dockerfile_airflow - container_name: starlake-airflow - restart: on-failure - depends_on: - - starlake-db - - starlake-nas - - starlake-init-airflow-db - environment: - AIRFLOW__CORE__EXECUTOR: SequentialExecutor # SequentialExecutor is required to load files sequentially - to use LocalExecutor we have to fix the creation of audit tables concurrently while loading several tables in parallel for the first time - AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://${SL_POSTGRES_USER:-dbuser}:${SL_POSTGRES_PASSWORD:-dbuser123}@starlake-db:5432/${AIRFLOW_DB:-airflow} - AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - INSTALL_MYSQL_CLIENT: 'false' - INSTALL_MSSQL_CLIENT: 'false' - SL_HOME: /app/starlake - AIRFLOW__WEBSERVER__BASE_URL: http://starlake-airflow:8080/airflow - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 30 - entrypoint: > - /bin/bash -c " - sleep 10 && - pip install --no-cache-dir starlake-orchestration[airflow] --upgrade --force-reinstall && - sudo mkdir -p /mnt/filestore/projects && - sudo mount -v -o nolock starlake-nas:/projects /mnt/filestore/projects && - sudo mount -v -o nolock starlake-nas:/projects/dags /opt/airflow/dags && - airflow scheduler & - exec airflow webserver" - volumes: - - /var/run/docker.sock:/var/run/docker.sock # Mount Docker socket to run Docker commands from the container - - airflow_logs:/opt/airflow/logs - privileged: true # Required for mounting NFS - - starlake-dagster: - image: starlakeai/starlake-dagster:latest - build: - context: . # Assuming Dockerfile_dagster is in the current directory - dockerfile: Dockerfile_dagster - container_name: starlake-dagster - restart: on-failure - depends_on: - - starlake-db - - starlake-nas - environment: - DAGSTER_PG_USERNAME: ${SL_POSTGRES_USER:-dbuser} - DAGSTER_PG_PASSWORD: ${SL_POSTGRES_PASSWORD:-dbuser123} - DAGSTER_PG_HOST: starlake-db - DAGSTER_PG_DB: ${DAGSTER_DB:-dagster} - SL_HOME: /app/starlake - entrypoint: > - /bin/bash -c " - sleep 10 && - pip install --no-cache-dir starlake-orchestration[dagster] --upgrade --force-reinstall && - mkdir -p /mnt/filestore/projects && - mount -v -o nolock starlake-nas:/projects /mnt/filestore/projects && - mount -v -o nolock starlake-nas:/projects/dags /opt/dagster/app/dags && - python3 dagster_code_locations.py && - service cron --full-restart & - exec dagster-webserver -h 0.0.0.0 -p 3000 --path-prefix /dagster" - volumes: - - /var/run/docker.sock:/var/run/docker.sock # Mount Docker socket to run Docker commands from the container - - dagster_logs:/opt/dagster/app/logs - - dagster_storage:/opt/dagster/home/storage - ports: - - ${SL_DAGSTER_PORT:-3000}:3000 # Dagster Webserver port - privileged: true # Required for mounting NFS - - starlake-api: - image: starlakeai/starlake-1.3-api:${SL_API_VERSION:-0.1} - pull_policy: always - container_name: starlake-api - restart: on-failure - depends_on: - - starlake-db - - starlake-nas - - starlake-airflow - privileged: true # Required for mount permissions - environment: - - SL_HOME=/app/starlake - - SL_FS=file:// - - SL_ENV= - - SL_ROOT= - - SL_USE_LOCAL_FILE_SYSTEM=false - - SL_API_GIT_COMMAND_ROOT=/git - - SL_API_SECURE=false - - SL_API_SESSION_AS_HEADER=true - - SL_API_HTTP_FRONT_URL=${SL_API_HTTP_FRONT_URL:-http://starlake-ui} - - SL_API_HTTP_INTERFACE=0.0.0.0 - - SL_API_HTTP_PORT=9000 - - SL_LOG_LEVEL=${SL_LOG_LEVEL:-info} - - SL_API_JDBC_DRIVER=org.postgresql.Driver - - SL_API_JDBC_USER=${SL_POSTGRES_USER:-dbuser} - - SL_API_JDBC_PASSWORD=${SL_POSTGRES_PASSWORD:-dbuser123} - - SL_API_JDBC_URL=jdbc:postgresql://starlake-db:5432/${SL_POSTGRES_DB:-starlake}?user=${SL_POSTGRES_USER:-dbuser}&password=${SL_POSTGRES_PASSWORD:-dbuser123} # JDBC URL to connect to the database - - SL_API_DOMAIN=${SL_API_DOMAIN:-localhost} - - SL_API_PROJECT_ROOT=/mnt/filestore/projects - - SL_API_ORCHESTRATOR_URL=http://localhost:${SL_UI_PORT:-80}/airflow/ - #- SL_API_ORCHESTRATOR_URL=http://localhost:${SL_UI_PORT:-80}/dagster/deployment/locations - - ENVIRONMENT=local # local environment - - FILESTORE_SHARE_NAME=projects # Environment variable to specify the share name of the NAS - - FILESTORE_IP_ADDRESS=starlake-nas # Environment variable to specify the IP address of the NAS - - FILESTORE_MNT_DIR=/mnt/filestore/projects # Environment variable to specify the mount path inside starlake-api container - - POSTGRES_HOST=starlake-db - - POSTGRES_DB=${SL_POSTGRES_DB:-starlake} - - POSTGRES_USER=${SL_POSTGRES_USER:-dbuser} - - POSTGRES_PASSWORD=${SL_POSTGRES_PASSWORD:-dbuser123} - - SL_UI_DEMO=${SL_UI_DEMO:-false} - - SL_API_MAIL_HOST=${SL_API_MAIL_HOST:-smtp.sendgrid.net} - - SL_API_MAIL_PORT=${SL_API_MAIL_PORT:-587} - - SL_API_MAIL_USER=${SL_API_MAIL_USER:-apikey} - - SL_API_MAIL_PASSWORD=${SL_API_MAIL_PASSWORD} - - SL_API_MAIL_FROM=${SL_API_MAIL_FROM:-contact@starlake.ai} - - starlake-ui: - image: starlakeai/starlake-1.3-ui:${SL_UI_VERSION:-0.1} - pull_policy: always - container_name: starlake-ui - restart: on-failure - depends_on: - - starlake-api - privileged: true # Required for mount permissions - ports: - - ${SL_UI_PORT:-80}:80 # starlake-ui default port - environment: - - FILESTORE_SHARE_NAME=projects # Environment variable to specify the share name of the NAS - - FILESTORE_IP_ADDRESS=starlake-nas # Environment variable to specify the IP address of the NAS - - FILESTORE_MNT_DIR=/mnt/filestore/projects # Environment variable to specify the mount path inside starlake-api container - volumes: - - .env.docker:/app/.env:ro - - starlake-projects: - image: starlakeai/starlake-projects:latest - build: - context: . # Assuming Dockerfile_projects is in the current directory - dockerfile: Dockerfile_projects - container_name: starlake-projects - restart: on-failure - depends_on: - - starlake-ui - privileged: true # Required for mount permissions - environment: - POSTGRES_USER: ${SL_POSTGRES_USER:-dbuser} - POSTGRES_PASSWORD: ${SL_POSTGRES_PASSWORD:-dbuser123} - POSTGRES_DB: ${SL_POSTGRES_DB:-starlake} - POSTGRES_HOST: starlake-db - FILESTORE_SHARE_NAME: projects # Environment variable to specify the share name of the NAS - FILESTORE_IP_ADDRESS: starlake-nas # Environment variable to specify the IP address of the NAS - FILESTORE_MNT_DIR: /mnt/filestore/projects # Environment variable to specify the mount path inside starlake-api container - command: postgres -c 'config_file=/etc/postgresql/postgresql.conf' - volumes: - - ./projects:/projects - -volumes: - projects_data: - pgdata: - airflow_logs: - dagster_logs: - dagster_storage: diff --git a/docker/projects/starbake.zip b/docker/projects/starbake.zip deleted file mode 100644 index 944e1a7c0..000000000 Binary files a/docker/projects/starbake.zip and /dev/null differ diff --git a/docker/scripts/airflow/init-database.sh b/docker/scripts/airflow/init-database.sh deleted file mode 100755 index eda3bd616..000000000 --- a/docker/scripts/airflow/init-database.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "postgres" <<-EOSQL - CREATE DATABASE ${AIRFLOW_DB:-airflow}; - GRANT ALL PRIVILEGES ON DATABASE ${AIRFLOW_DB:-airflow} TO "$POSTGRES_USER"; -EOSQL \ No newline at end of file diff --git a/docker/scripts/dagster/code_locations.py b/docker/scripts/dagster/code_locations.py deleted file mode 100644 index b69ab59d4..000000000 --- a/docker/scripts/dagster/code_locations.py +++ /dev/null @@ -1,55 +0,0 @@ -from dagster import Definitions -import importlib.util -import glob -import os - -# Define the path and any files to exclude -path = "dags" -excluded_files = ["definitions.py", "__init__.py"] - -# Recursively find all `.py` files and exclude specified files -python_files = [ - file for file in glob.glob(f"{path}/**/*.py", recursive=True) - if os.path.basename(file) not in excluded_files -] - -import sys - -def load_module_with_defs(file_path) : - directory_path = os.path.dirname(file_path) - package_name = directory_path.replace("/", ".") - module_name = os.path.splitext(os.path.basename(file_path))[0] - spec = importlib.util.spec_from_file_location(f"{package_name}.{module_name}", file_path) - module = importlib.util.module_from_spec(spec) - sys.modules[f"{package_name}.{module_name}"] = module - - try: - spec.loader.exec_module(module) - except Exception as e: - print(f"Error loading module {module}: {e}") - return None - - # Ensure `defs` exists in the module - defs: Definitions = getattr(module, "defs", None) - if defs is not None: - return f"{package_name}.{module_name}" - else: - return None - -modules = list() -for file in python_files: - module = load_module_with_defs(file) - if module is not None: - modules.append(module) - -print(modules) - -with open("pyproject.toml", "w") as f: - f.write("[tool.dagster]\n") - if modules: - f.write("modules = [" + ",".join([f'{{ type = "module", name = "{module}" }}' for module in modules]) + "]\n") - -with open("workspace.yaml", "w") as f: - f.write("load_from:\n") - for module in modules: - f.write(f" - python_module: {module}\n") diff --git a/docker/scripts/dagster/init-database.sh b/docker/scripts/dagster/init-database.sh deleted file mode 100755 index 925688dbf..000000000 --- a/docker/scripts/dagster/init-database.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "postgres" <<-EOSQL - CREATE DATABASE ${DAGSTER_DB:-dagster}; - GRANT ALL PRIVILEGES ON DATABASE ${DAGSTER_DB:-dagster} TO "$POSTGRES_USER"; -EOSQL \ No newline at end of file diff --git a/docker/scripts/docker/starlake.sh b/docker/scripts/docker/starlake.sh deleted file mode 100644 index d1324fd73..000000000 --- a/docker/scripts/docker/starlake.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Exit immediately if a command exits with a non-zero status -set -eo pipefail - -# Check if at least one argument is passed -if [ "$#" -eq 0 ]; then - echo "No arguments provided. Usage: starlake [args...]" - exit 1 -fi - -options="" -command="$1" -shift - -arguments=() -while [ $# -gt 0 ]; do - case "$1" in - -o | --options) options="$2"; shift; shift;; - *) arguments+=("$1");shift;; - esac -done - -envs=$(echo $options | tr "," "\n") - -docker_envs=() -for env in $envs; do - docker_envs+=("-e $env") -done - -docker exec ${docker_envs[*]} starlake-api /app/starlake/starlake.sh $command ${arguments[*]} diff --git a/docker/scripts/nfs/start.sh b/docker/scripts/nfs/start.sh deleted file mode 100644 index 5deada8c0..000000000 --- a/docker/scripts/nfs/start.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh -set -e - -# Démarrer rpcbind -rpcbind - -# Démarrer rpc.statd -rpc.statd - -# Appliquer les configurations d'exportation -exportfs -r - -# Démarrer les services NFS -rpc.nfsd -rpc.mountd -F - -# Garder le conteneur en fonctionnement -exec /usr/sbin/rpc.nfsd diff --git a/docker/scripts/projects/entrypoint.sh b/docker/scripts/projects/entrypoint.sh deleted file mode 100755 index bbfde1b7f..000000000 --- a/docker/scripts/projects/entrypoint.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -set -e - -mkdir -p $FILESTORE_MNT_DIR -mount -v -o nolock $FILESTORE_IP_ADDRESS:/$FILESTORE_SHARE_NAME $FILESTORE_MNT_DIR -mount - -export PGPASSWORD="${POSTGRES_PASSWORD}" -member_id=$(psql -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -w -t -A -c "SELECT id FROM public.slk_member WHERE email = 'admin@localhost.local'") -echo "Member ID: $member_id" - -if [[ $member_id =~ ^[0-9]+$ ]]; then - # List all zip files - zips=$(find /projects -type f -regex '.*\.zip') - - for zip in $zips; do - echo "Unzipping $zip" - project_uuid=$(cat /proc/sys/kernel/random/uuid) - mkdir -p /projects/$project_uuid - unzip -o -d /projects/$project_uuid $zip - ids=$(find /projects/$project_uuid -mindepth 1 -maxdepth 1 -type d) - size=$(echo "$ids" | wc -l) - if [ "$size" -eq 1 ]; then - for id in $ids; do - project_id=$(basename "$id") - if [[ $project_id =~ ^[0-9]+$ ]]; then - project_name=$(basename "$zip" | cut -d. -f1) - if [ ! -d $FILESTORE_MNT_DIR/$member_id/$project_id ]; then - echo "Project $project_name will be created with id $project_id and UUID $project_uuid" - psql -v ON_ERROR_STOP=1 -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -w <<-EOSQL -INSERT INTO public.slk_project (id, code, "name", description, repository, active, deleted, created, updated, master, owner, owner_email, access, pat, airflow_role) -OVERRIDING SYSTEM VALUE -VALUES($project_id, '$project_uuid', '$project_name', '$project_name', '', true, false, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, -1, $member_id, 'admin@localhost.local', 'ADMIN', '', 'DEV:OPS,STAGING:OPS,PROD:OPS'); -INSERT INTO public.slk_project_props (id, project, properties, created, updated) -OVERRIDING SYSTEM VALUE -VALUES($project_id, $project_id, '[{"envName":"__sl_ignore__"}]', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP); -EOSQL - mkdir -p $FILESTORE_MNT_DIR/$member_id/$project_id - cp -r $id/* $FILESTORE_MNT_DIR/$member_id/$project_id - rm -rf /projects/$project_uuid - else - echo "Project $project_id is already present in $FILESTORE_MNT_DIR/$member_id/" - rm -rf /projects/$project_uuid - fi - else - echo "Project $project_id should consist of digits only" - rm -rf /projects/$project_uuid - fi - done - else - echo "Unzipped project should be placed in a single directory" - rm -rf /projects/$project_uuid - fi - done -else - exit 1 -fi