diff --git a/.dockerignore b/.dockerignore index 72cc4090..b853319e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -8,3 +8,6 @@ docassemble-os/** target/** bin/** *.hprof +*.local +*.log +*.zip diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml new file mode 100644 index 00000000..22daca90 --- /dev/null +++ b/.github/workflows/fly.yml @@ -0,0 +1,21 @@ +# Example fly.yml +# You can use this as a template for the Fly.io continuous deployment GitHub workflow config file. +# Copy into .github/workflows/fly.yml for GitHub to see this file. +# For more details, check out: +# https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/ +name: Fly Deploy +on: + push: + branches: + - main +jobs: + deploy: + name: Deploy app + runs-on: ubuntu-latest + concurrency: deploy-group # optional: ensure only one action runs at a time + steps: + - uses: actions/checkout@v4 + - uses: superfly/flyctl-actions/setup-flyctl@master + - run: flyctl deploy --remote-only + env: + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 6baa59aa..3f307a9c 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -7,9 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up JDK 17 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: '17' distribution: 'adopt' diff --git a/.gitignore b/.gitignore index c65d6990..0634d555 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ hs_err_pid* # Contains key info client_sign.properties +client_sign.properties.local *.pfx .env* .da_env diff --git a/Dockerfile b/Dockerfile index b5f96891..03777a08 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY pom.xml LICENSE client_sign.propertie[s] quartz.properties Suffolk.pf[x] ac RUN mvn -f /usr/src/app/pom.xml -DskipTests clean dependency:resolve dependency:go-offline package && mvn -f /usr/src/app/pom.xml test COPY src /usr/src/app/src RUN mvn -f /usr/src/app/pom.xml -DskipTests package dependency:build-classpath -Dmdep.outputFile=cp.txt -PnoDockerTests -COPY docker_run_script.sh docker_integration_test.sh /usr/src/app/ +COPY docker_run_script.sh docker_integration_test.sh fly_startup_script.sh /usr/src/app/ EXPOSE 9000 diff --git a/docker_run_script.sh b/docker_run_script.sh index 3d903c53..8116c748 100755 --- a/docker_run_script.sh +++ b/docker_run_script.sh @@ -1,7 +1,14 @@ #! /bin/sh set -ex + cd /usr/src/app + +# Customize startup if running on Fly.io. +if [ -n "$FLY_MACHINE_ID" ]; then + ./fly_startup_script.sh +fi + # Add this before the `-cp` line if needed to get exact SOAP envelopes being sent # -javaagent:extract-tls-secrets-4.0.0.jar=/tmp/secrets/secrets.log \ # Add this when we figure out ThreadPools and Unmarshalling (see #111) diff --git a/docs/adr/010-use-a-platform-and-database-as-services-to-improve-operations.md b/docs/adr/010-use-a-platform-and-database-as-services-to-improve-operations.md new file mode 100644 index 00000000..399200bd --- /dev/null +++ b/docs/adr/010-use-a-platform-and-database-as-services-to-improve-operations.md @@ -0,0 +1,46 @@ +# Use a Platform and Database as Services to Improve Operations + +## Context + +The existing system is deployed to Lightsail. Both the Java application and Postgres database are Dockerized and run within the Lightsail VPS. Many tasks require connecting to the running server and executing commands in the shell. + +**User Story:** +As a Suffolk LIT Lab operator, I want to improve the ease of operations in these key areas: +* Data: recover from a possible data loss by restoring a previous version of the database or rolling back the database +* App Stability: restart the app in case it crashes or becomes unavailable +* Monitoring / Alerting: monitor the app and be alerted when there are issues with the API being available + +Ideally, this will lower the barrier to entry for others to run their own EfileProxyServer instances. + +## Decision + +Use the combination of a Platform-as-a-Service (PaaS) with a Database-as-a-Service (DBaaS), specifically Fly.io and Supabase. Both offerings have dashboards with good usability. You can view logs and key metrics (memory usage, CPU, error counts, query performance) through the web interface. You can also perform common operational tasks such as restarting a machine and restoring the database. Some key tasks, such as the daily database backup and security updates to the operating system, are handled automatically by the platform, reducing the manual operational load. + +## Alternatives Considered + +1. Scripting out common operations and using cron jobs with the existing Lightsail setup + * Pros + * Builds on existing familiarity with the system + * Smaller delta between the new process and the existing process + * Cons + * Would have to still handle many operational tasks manually + * More development work per operational task +2. Postgres database replication + * Pros + * Adds resiliency with minimal modification + * Cons + * Only addresses the Data bullet point + * Would still require a decision on the replica database +3. Using a different PaaS such as Heroku + * Pros + * Ease-of-use and more features out-of-the-box + * Cons + * Expensive if the application requires more than 2GB of memory + * Less optimized for Docker applications than Fly.io +4. Using a different Database-as-a-Service (DBaaS) such as Heroku Postgres, AWS Aurora, or Neon + * Pros + * Similar operational characteristics + * Cons + * Supabase has the best combination of ease of use, pricing, and track record. All the other alternatives are beaten in one or more of those aspects in comparison + * Notes + * This is an easy decision to change our minds on in the future, as any Postgres-compatible database can be swapped in by changing the POSTGRES_* variables in the .env and the data is quite portable with a variety of pg_dump/export/replication options supported by all the DBaaS options. \ No newline at end of file diff --git a/docs/adr/011-use-papertrail-to-increase-access-to-logs.md b/docs/adr/011-use-papertrail-to-increase-access-to-logs.md new file mode 100644 index 00000000..24964c59 --- /dev/null +++ b/docs/adr/011-use-papertrail-to-increase-access-to-logs.md @@ -0,0 +1,33 @@ +# Use Papertrail to Increase Access to Logs + +## Context + +Logs are viewable within Fly.io. Access to those logs is constrained to authorized users. However, this permission is not fine-grained and authorized users also have the power to perform administrative actions. + +**User Story:** +As a Suffolk LIT Lab operator, I want to share access to logs with stakeholders who can benefit from being able to self-diagnose any issues using the EFSP API. + +## Decision + +Integrate Papertrail through the existing slf4j/logback stack as a log appender option, using TLS-encrypted syslog over TCP. If the Papertrail environment variables are set (see env.example for details), the logs will also be forwarded to Papertrail. Papertrail will serve as a central point of access for viewing the logs. The Papertrail UI makes it easy to filter and search through the logs. Being able to control log access independent of access to more admin operations within Fly.io makes for better security. Separating out the logging concern allows you to ship logs from other environments besides Fly.io as well. For example, the logs from Lightsail instances can be fed into the same dashboard. The current staging and production Fly.io instances will all forward to a single Papertrail dashboard and identified by the system and configurable application name. + +## Alternatives Considered + +1. Using the Fly.io log shipper + * Pros + * Closer to the platform + * Cons + * Requires having extra machines running the log shipping at all times + * For one-off runs, the usage is clunkier because output has to be redirected to /dev/console for it to be picked up +2. Observability platforms such as Splunk or Datadog + * Pros + * More complete offerings/advanced features + * Cons + * Much more expensive +3. Using a token-based Papertrail appender + * Pros + * Better identification of the senders + * Cons + * Not the recommended option by Papertrail for Java apps + * Less performant + * Likely requires some Java code to implement the token appender \ No newline at end of file diff --git a/docs/operations_guide/db_restore_confirm.png b/docs/operations_guide/db_restore_confirm.png new file mode 100644 index 00000000..451816a6 Binary files /dev/null and b/docs/operations_guide/db_restore_confirm.png differ diff --git a/docs/operations_guide/fly_dashboard_side_menu.png b/docs/operations_guide/fly_dashboard_side_menu.png new file mode 100644 index 00000000..9a34cc57 Binary files /dev/null and b/docs/operations_guide/fly_dashboard_side_menu.png differ diff --git a/docs/operations_guide/fly_grafana_metrics.png b/docs/operations_guide/fly_grafana_metrics.png new file mode 100644 index 00000000..33f93ef8 Binary files /dev/null and b/docs/operations_guide/fly_grafana_metrics.png differ diff --git a/docs/operations_guide/fly_live_logs.png b/docs/operations_guide/fly_live_logs.png new file mode 100644 index 00000000..e2dc7c49 Binary files /dev/null and b/docs/operations_guide/fly_live_logs.png differ diff --git a/docs/operations_guide/github_actions_secrets_and_varibles.png b/docs/operations_guide/github_actions_secrets_and_varibles.png new file mode 100644 index 00000000..a88dfbf8 Binary files /dev/null and b/docs/operations_guide/github_actions_secrets_and_varibles.png differ diff --git a/docs/operations_guide/github_side_menu_security_secrets_and_variables.png b/docs/operations_guide/github_side_menu_security_secrets_and_variables.png new file mode 100644 index 00000000..be416de0 Binary files /dev/null and b/docs/operations_guide/github_side_menu_security_secrets_and_variables.png differ diff --git a/docs/operations_guide/index.md b/docs/operations_guide/index.md new file mode 100644 index 00000000..60c4fa81 --- /dev/null +++ b/docs/operations_guide/index.md @@ -0,0 +1,334 @@ +# EFSP Operations Manual + +## Overview + +EFSP (Electronic Filing Service Provider) is a proxy between clients such as DocAssemble and an [ECF 4.0 (LegalXML's Electronic Court Filing 4.0 standard)](https://docs.oasis-open.org/legalxml-courtfiling/specs/ecf/v4.0/ecf-v4.0-spec/ecf-v4.0-spec.html) EFM (Electronic Filing Manager), such as [Tyler Technology's Odyssey](https://www.tylertech.com/products/enterprise-justice/enterprise-case-manager). The EFSP is commonly used to create court filings. This application (EfileProxyServer) fulfills the EFSP role and when EFSP is mentioned in this document, you can assume it is referring to this application. + +## Design Approach + +Try to keep code changes minimal. Keep backwards-compatibility and allow new features to be controlled by additional environment variables. When possible, allow the features to be used beyond just the platform/database stack (Fly.io/Supabase) we're using. + +## Staging/Production + +There are two environments for EFSP. The staging environment is safe to test against and is configured to communicate w/ the staging Tyler EFM. Care should be taken as emails & SMS messages are still sent. This should be fine as long as you're testing with your own contact information. The production environment is shared between many DocAssemble servers. In the future, this may be partitioned into jurisdictions for scalability and isolation. + +## Comparison to Previous Setup on AWS Lightsail + +The previous staging & production environments ran in AWS Lightsail. EFSP ran as a dockerized application on a 4GB Ubuntu instance. Docker Compose was used to manage the setup. The Java application and Postgres database server were managed through Docker Compose and both ran within the same instance. + +Most operational tasks were performed by SSH'ing into the machine and executing commands in the CLI (Command-Line Interface). The source code was installed and updated on the machine directly using Git. Logs were stored directly in the local filesystem, with API access through the EFSP application to provide visibility to clients who did not have SSH access. + +Data is stored within two separate databases: user_transactions and tyler_efm_codes. The tyler_efm_codes database can be rebuilt from scratch. The user_transactions database contains critical state and needs to be preserved. + +The Quartz Scheduler is embedded to run Tyler Code updates on a daily schedule at around 2:15 am (server time, default ET for BOS). The scheduler is not clustered and in-memory. However, it is relatively safe for multiple updates to be running in separate processes, as the database will sort out the locking/blocking and the tyler_efm_codes database will be updated correctly regardless. + +SSL/TLS is handled by Let's Encrypt. However, the certificate renewal is currently a manual process that involves a bit of downtime. + +## Fly.io Stack + +The updated stack uses Fly.io, Supabase, Papertrail, and Cloudflare. + +* Fly.io runs the dockerized EFSP application. +* Supabase is the Postgres database. +* Papertrail serves as the cloud-hosted log management system, making it easy to aggregate, manage, and share logs. +* Cloudflare provides SSL/TLS encryption and DNS. (TODO: are we planning to use the DDoS protection?) + +The overall goal is to make it easier to run and operate EFSP as a service, with the tradeoff being a small monthly cost. Web interfaces are favored over the CLI (Command-Line Interface), although there is still lots of CLI and all existing commands are supported. Ideally, the learning curve will be shortened and day-to-day operational tasks can be done without needing to use the CLI. + +The current setup keeps a single machine running at all times within the Boston region. Because Fly.io, Supabase, Papertrail, and Cloudflare are all cloud services, scaling up and out is pretty easy. The trade-off is one of increased cost as more scale is requested. Fly.io also supports auto-scaling. Given the current volume, scaling was not explored and we opted for the simplest conceptual model of a single, continuously instance of the application. + +The EFSP application itself is mostly clusterable, the exceptions being: +* the way the Tyler EFM Code Updates are scheduled right now +* CourtPolicy data, which is currently cached in memory (see l. 21 in /src/main/java/edu/suffolk/litlab/efspserver/ecf4/PolicyCacher.java [https://github.com/SuffolkLITLab/EfileProxyServer/blob/a67c75a6690e9bd54fd4050d0c274feccf13b382/src/main/java/edu/suffolk/litlab/efspserver/ecf4/PolicyCacher.java#L12]). To honor Tyler's restriction to only hit the endpoint once per day, this cache will need to be updated so that it can be shared across all instances. Caching it in the database would be one option to achieve that. + +For a single, always running instance, nothing needs to be done. + +In a scale-out scenario, the simplest approach of disabling the updates is supported. All EFSP instances within the same environment use the same shared database, so the single updater will result in every instance seeing the latest EFM codes. Alternatively, the Quartz scheduler can be swapped over to store scheduling data in the database and configured for clustered operation. + +## Secrets/Configuration + +All the supported variables are enumerated in the env.example file. Most of these are the same between the Lightsail and Fly.io setups. There are new variables for Papertrail and to configure the Quartz Scheduler. Details are in the env.example file. + +You should have separate .env files for each environment. For example, staging values would be stored in a .env.staging and production values in .env.production. + +## Secured Files/Certs + +For files that you want to keep out of the Docker image, you should set up a private Fly.io Tigris bucket. This is Fly.io's equivalent of AWS's S3 service. When a Fly.io machine starts up, the fly_startup_script.sh will run. The script will pull down the keystore cert from Tigris if the file is not already in the system. While Fly.io will automatically inject the AWS config values into your application the first time you setup Tigris, you should also store the AWS config values in your .env file in case you need to reference them again or re-create a Fly.io app in the future. + +For more details, check out: +https://fly.io/docs/reference/tigris/ + +## First Time Fly.io Setup + +This section covers the steps for spinning up a brand-new Fly.io app. + +### Pre-requisites + +You should already have the following: +* An .env file with the secrets and environment configuration appropriate for the new app. The docs will refer to this as .env.fly but you can name it whatever makes sense and substitute the name when you see it in the example commands. +* A Fly.io account +* [flyctl](https://fly.io/docs/flyctl/), the Fly.io CLI tools, installed + +#### Create the new app + +You will need to create the app before deploying. Since the fly.toml already exists, you will want to create the app without generating a new one. To create the app, you will need an app name and Fly.io organization. The example commands will reference efsp-staging and the suffolk-lit-lab organization, but you should substitute your own values there. You can use the same app name, but the Fly.io organization will likely be different. + +Create the app by running: +```bash +fly app create efsp-staging --org suffolk-lit-lab +``` + +You should see a message saying: +``` +New app created: efsp-staging +``` + +Next, configure the app with your .env values. You should have already copied the env.example file as .env.fly and edited the values in it to match your environment. +```bash +cat .env.fly | fly secrets import --app efsp-staging --stage +``` + +The stage option is used so that the values are set but the application won't redeploy until the next step. + +Now deploy the application by running: +```bash +fly deploy --config fly.toml --app efsp-staging +``` +You can omit the --app parameter if you're using the app name defined within the fly.toml. You can also omit the --config parameter if you're using the default fly.toml file (as opposed to fly.production.toml, for example). The shortened version in that case would be: +```bash +fly deploy +``` + +This step will take some time as Fly.io verifies the configuration and builds the cloud virtual machines using a cloud-based Docker system. After the build is complete, Fly.io will push the image to its registry, whose name should start with registry.fly.io. + +If all goes well, your app will be running. Your app should be reached by https on a Fly.io domain name. This value will look something like https://{your-app-name}.fly.dev/ and should be in the output. + +Finally, you can set the scale to limit the number of machines created to 1. This step is optional. By default, Fly.io will create 2 machines for high availability. Setting it to 1 makes for a simpler and slightly cheaper setup, but with tradeoffs in performance and availability. To see more in-depth discussion on the tradeoffs, go to the "Scaling Up Fly.io" section. +```bash +fly scale --app efsp-staging count app=1 +``` + + +## Deploying Code Updates + +### Manual Deployment + +The process for manual deployment is very similar to the non-Fly.io steps. + +First, update the code on your local to the latest: +```bash +git fetch --all +git pull origin main +``` + +Then run: +```bash +fly deploy --config fly.toml +``` +If you are deploying to a different environment, change fly.toml to match the config for the target environment. If you don't pass a `--config` option, the default fly.toml file will be used. For EFSP, this is the staging environment. The production config file is fly.production.toml. + +To deploy to production: +```bash +fly deploy -c fly.production.toml +``` + +### Automatic Continuous Deployment + +If you have forked the EFSP repo, you can set up auto-deploy. Fly.io can be deployed to from a GitHub workflow. For more information on how you would do this, check out: +https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/ + +You can use the .github/workflows/fly.yml by setting your own FLY_API_TOKEN as a repository secret. Alternatively, you can use it as the starting point for your customizations, or remove it entirely if you do not want GitHub to run the deployment workflow. + +One thing to keep in mind is that the manual deployment builds the Docker image from the contents of your local filesystem. This is important for .pfx certificate files, as they are git-ignored but not docker-ignored. When doing manual builds, the .pfx will be baked into the image. Continuous deployment builds the image from the repository in GitHub, which excludes the secured files. You will need to set up the .pfx certificate to be included from secure storage. See the [Secured Files/Certs section](#secured-filescerts) for more information. + +## Viewing Logs + +You can view the logs directly on Fly.io. The application logs are also shipped over to Papertrail, which will be covered in more detail. Papertrail is the recommended interface to view logs because it has search, filtering, and aggregation capabilities. However, checking on Fly.io is helpful if you are troubleshooting, as you will be able to see the platform logs in additional to the applications logs. Plus, even if there is an error preventing the application from sending logs to Papertrail, the information will be stored within Fly.io's logs. + +### Viewing Logs with Fly.io + +To view the logs within Fly.io, click on "Live Logs" in the side menu. + +![Fly.io Dashboard Side Menu](./fly_dashboard_side_menu.png) + +You should now see a web view displaying the live logs. + +![Fly.io Live Logs](./fly_live_logs.png) + +For more information on viewing metrics and logs on Fly.io, check out: +https://fly.io/docs/metrics-and-logs/ + + +### Papertrail + +You can read this overview of the log viewer to familiarize yourself with Papertrail: +https://www.papertrail.com/help/event-viewer/ + +### Papertrail Configuration + +The Papertrail system documented here has already been configured. If you do not have access to this Papertrail account, you will need to set up your own account on the service. Please refer to the Papertrail documentation for more information: +https://www.papertrail.com/help/papertrail-documentation/ + +#### Groups + +Groups were set up for each environment (dev, staging, and prod). Dev is used for local machines and other testing. + +#### Log Destinations + +The EFSP app is configured in logback to send logs to Papertrail via the TLS Syslog method. This uses TCP(TLS) sent to a specific host and port, which is displayed in the log destination page after you set that up. + +![Papertrail Example Log Destination](./papertrail_log_destination.png) + +For more details, check out: +https://www.papertrail.com/help/log-destinations/ + +#### Dashboard + +After you log into Papertrail, you will see the dashboard page. The list of groups and systems are displayed here. +![Papertrail Dashboard](./papertrail_dashboard.png) + +### Viewing Logs + +Click on "Events" in the top menu to go to the log viewer. You will probably spend most of your time in the log viewer. The middle of the page displays the logs. At the bottom of the page, you can adjust the filtering and search criteria. + +![Papertrail Log Viewer](./papertrail_events.png) + +You can type in words to search for in the search box. For example, here the search term "Started" is used to search for only the log messages associated with the application being started: +![Papertrail Search](./papertrail_search.png) + +You can choose to only see the logs for a specific group (e.g. staging, prod) by adjusting the group selector at the left-hand side. +![Papertrail Group Selector](./papertrail_group_selector.png) + +The search box also supports more advanced syntax. For more details, check out: +https://www.papertrail.com/help/search-syntax/ + +### Alerts + +You can configure Papertrail to send alerts when it sees certain patterns in the logs. For more details, check out: +https://www.papertrail.com/help/alerts/ + +### Scaling Up Fly.io + +You can scale up the # of machines if you reach a point where there is a lot of load, want to improve global response times, or want to take advantage of more advanced deployment strategies. The only caveat is that the Tyler EFM Code Updater, which runs on a schedule by default, can get into a conflict if there are multiple instances doing the update at the same time. Since the Quartz Scheduler is running in-memory by default and the schedule is fixed in the code, it is safest to disable the automated updates on any supplemental machines. See env.example for more details on how to do that. + +The default Fly config for EFSP will keep just a single machine running at all times. This is important as the code update will only happen if an instance is active when the schedule hits (2:15 am on the machine's clock). + +You can set the # of instances to scale to with the following command: +```bash +fly scale count app={NUMBER_OF_INSTANCES} +``` +where NUMBER_OF_INSTANCES is 1 or higher. For setting it to 1 makes for the simplest setup. You can also leave it with the default 2 to take advantage of Fly.io's rolling deployment strategy. Auto-stop is enabled, so as long as there isn't traffic hitting the machine during the scheduled code update, you will not need to worry about concurrency issues even with multiple machine instances configured. + +For more details on deployment, check out: +https://fly.io/docs/apps/deploy/ + +Generally speaking, you will want to have pre-created machines that are stopped. This optimizes the cold start time, although it will still be relatively slow since EFSP is a Java application. Expect a pre-created machine's cold start to take about 5 seconds. Creating a new machine will be an order or two of magnitude longer. Pre-created machines that are stopped cost very little as you are only billed for the storage space. The cost is negligible compared to the cost of a running machine. + +Properly scaling EFSP using Fly.io is an advanced topic beyond the scope of this guide. You should have a good understanding of Fly.io's architecture before attempting this. Here are some good reference links to start from: +* https://fly.io/docs/apps/scale-machine/#scale-vm-memory-and-cpu-with-flyctl +* https://fly.io/docs/flyctl/scale/ +* https://fly.io/docs/reference/autoscaling/#main-content-start +* https://fly.io/docs/apps/autostart-stop/ + + +### Viewing Metrics for Your Fly.io Application + +Fly.io collects metrics such as memory usage, CPU utilization, etc. To view your metrics, go to the dashboard. Select "Metrics" from the side menu on the left. + +![Fly.io Dashboard Side Menu](./fly_dashboard_side_menu.png) + +You will now see Fly.io's managed Grafana page. This gathers together the metrics collected from your application and provides easy to understand visualizations. + +![Grafana Dashboard](./fly_grafana_metrics.png) + +To learn more about Fly.io metrics, check out: +https://fly.io/docs/metrics-and-logs/metrics/#dashboards + +To learn more about Grafana, check out: +* https://grafana.com/docs/grafana/latest/dashboards/ +* https://grafana.com/docs/grafana/latest/panels-visualizations/visualizations/ + +### Generating a New API Key + +Run the fly_create_api_key.sh script to generate a new API key. The generated key will grant API access to your EFSP instance. The API key is printed to the console output and can be copied from there. Best practice is to give each user of your EFSP instance their own API key. + +To create an API key for the app specified in your fly.toml, run: +```bash +./fly_create_api_key.sh +``` + +If you want to create a production key, run: +```bash +./fly_create_prod_api_key.sh +``` + +Both scripts are the same except for the Fly.io config file passed to the command. You can also create more copies for any of your environments, or alter the script to take in the app or config name. Two different script files are used to make it easier and more obvious for the user running the command. + +The script uses Fly.io commands to spin up an ephemeral machine that will run the Java code to generate the API. This method leverages Fly.io as the authorization mechanism and keeps the permission to generate new API keys to the set of authorized users. An ephemeral machine is used so that key generation does not impact the API performance. + +### SSH Console access + +Fly.io offers a Fly command that is akin to connecting to a Lightsail instance using SSH. This is handy if you need to troubleshoot the machine. Unlike SSH, you won't need to set up any keypairs. Instead, your Fly.io login will be all that is needed to authenticate you. To SSH to a Fly.io machine, use the following command: +```bash +fly ssh console +``` + +### Setting Secrets/Configuration in Fly.io + +You can view the names of the variables that are set with the following command: +```bash +fly secrets list +``` + +Note that this will not show the values, but it does display both the digest and creation date for the value. + +The recommended way to set secrets in Fly.io is to edit them in a .env file. The name .env.fly is assumed for the remainder of these instructions. Note that .env files are excluded from Git. This is important as you should never commit secrets into the repository. + +After you edit your .env.fly file and set the appropriate values, you can use the following command to update the Fly.io secrets: +```bash +cat .env.fly | fly secrets import +``` + +This will sync the value of every variable in the .env.fly to Fly.io. Note that this will not unset/touch any secrets whose names are not in the .env.fly file. + +You can manually set a single secret with the following command: +```bash +fly secrets set [name] [value] +``` + +To unset a value, use the following command: +```bash +fly secrets unset [name] +``` + +### Supabase Customizations + +The "Enforce SSL on incoming connections" is set to true. Since Fly.io and Supabase are not within the same location, this setting ensures that traffic between the two is secured. +For security purposes, the pg_graphql extension is disabled and API access as RLS (Role-Level Security) is not set up for EFSP databases. If possible, you should also restrict network access to an inclusion list. + +### Backing Up the Database + +The Supabase Postgres database is automatically backed up on a daily basis. + +### Restoring a Database Backup +Go the Database -> [Platform] Backups. You will see a list of all the available backups. Click on the "Restore" button. This will bring up the confirmation dialog. Select "Confirm Restore" to restore the backup. + +![Backups](./supabase_db_backups.png) + +### Viewing Data in the Database + +You can view database in the Postgres database through Supabase's web interface. Go to the Table Editor. +![Tables](./supabase_db_tables.png) + +Here you can see all the tables within the "postgres" database. Database in this context refers to the database within Postgres's database/schema/table concept, not as the generic term. The web view can only show tables within the "postgres" database. For the EFSP Fly.io setup, both POSTGRES_CODES_DB and POSTGRES_USER_DB are set to "postgres" so that the data is surfaced within Supabase's interface. Future development work could allow the schema to be configurable, in which case the data could be organized into two different schemas, both of which are accessible in the web interface. In Lightsail, two different databases were used: tyler_efm_codes and user_transactions, respectively. You can also configure your .env.fly to use those databases. This will work fine on the application side, but you will lose visibility in the web interface. + +### Connecting to Supabase with Your Own Database Tools + +You can also use your own database tools, such as [PgAdmin](https://www.pgadmin.org/) or psql, to interact with the data in your Supabase database. To get the information for the connection string, go to "Project Settings", then click on "Database" under the "Configuration" section. You should now see a page with the connection string on the right. + +![Supabase PSQL Connection Settings](./supabase_psql_connection.png) + +One example use case is migrating data from your existing Docker setup to Supabase. You can use pg_dump with the data only option to export the data from the user_transactions database in Docker, then use psql to import the data into Supabase's postgres database. + +For more details about connecting to Supabase with your own database tools, check out: +https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connections diff --git a/docs/operations_guide/papertrail_dashboard.png b/docs/operations_guide/papertrail_dashboard.png new file mode 100644 index 00000000..9f19881a Binary files /dev/null and b/docs/operations_guide/papertrail_dashboard.png differ diff --git a/docs/operations_guide/papertrail_events.png b/docs/operations_guide/papertrail_events.png new file mode 100644 index 00000000..95408703 Binary files /dev/null and b/docs/operations_guide/papertrail_events.png differ diff --git a/docs/operations_guide/papertrail_group_selector.png b/docs/operations_guide/papertrail_group_selector.png new file mode 100644 index 00000000..88a03198 Binary files /dev/null and b/docs/operations_guide/papertrail_group_selector.png differ diff --git a/docs/operations_guide/papertrail_log_destination.png b/docs/operations_guide/papertrail_log_destination.png new file mode 100644 index 00000000..cad16e55 Binary files /dev/null and b/docs/operations_guide/papertrail_log_destination.png differ diff --git a/docs/operations_guide/papertrail_search.png b/docs/operations_guide/papertrail_search.png new file mode 100644 index 00000000..664c032c Binary files /dev/null and b/docs/operations_guide/papertrail_search.png differ diff --git a/docs/operations_guide/supabase_cpu_memory.png b/docs/operations_guide/supabase_cpu_memory.png new file mode 100644 index 00000000..e44b1c1e Binary files /dev/null and b/docs/operations_guide/supabase_cpu_memory.png differ diff --git a/docs/operations_guide/supabase_db_backups.png b/docs/operations_guide/supabase_db_backups.png new file mode 100644 index 00000000..112ef94e Binary files /dev/null and b/docs/operations_guide/supabase_db_backups.png differ diff --git a/docs/operations_guide/supabase_db_metrics.png b/docs/operations_guide/supabase_db_metrics.png new file mode 100644 index 00000000..0b065a25 Binary files /dev/null and b/docs/operations_guide/supabase_db_metrics.png differ diff --git a/docs/operations_guide/supabase_db_tables.png b/docs/operations_guide/supabase_db_tables.png new file mode 100644 index 00000000..5f17647e Binary files /dev/null and b/docs/operations_guide/supabase_db_tables.png differ diff --git a/docs/operations_guide/supabase_details.png b/docs/operations_guide/supabase_details.png new file mode 100644 index 00000000..87931af7 Binary files /dev/null and b/docs/operations_guide/supabase_details.png differ diff --git a/docs/operations_guide/supabase_disk_io.png b/docs/operations_guide/supabase_disk_io.png new file mode 100644 index 00000000..2bbd2dab Binary files /dev/null and b/docs/operations_guide/supabase_disk_io.png differ diff --git a/docs/operations_guide/supabase_menu.png b/docs/operations_guide/supabase_menu.png new file mode 100644 index 00000000..750eb801 Binary files /dev/null and b/docs/operations_guide/supabase_menu.png differ diff --git a/docs/operations_guide/supabase_psql_connection.png b/docs/operations_guide/supabase_psql_connection.png new file mode 100644 index 00000000..45c7e1a3 Binary files /dev/null and b/docs/operations_guide/supabase_psql_connection.png differ diff --git a/docs/operations_guide/supabase_query.png b/docs/operations_guide/supabase_query.png new file mode 100644 index 00000000..68c30f7b Binary files /dev/null and b/docs/operations_guide/supabase_query.png differ diff --git a/docs/operations_guide/supabase_query_performance.png b/docs/operations_guide/supabase_query_performance.png new file mode 100644 index 00000000..05210da8 Binary files /dev/null and b/docs/operations_guide/supabase_query_performance.png differ diff --git a/docs/operations_guide/supabase_tools.png b/docs/operations_guide/supabase_tools.png new file mode 100644 index 00000000..b9b7f617 Binary files /dev/null and b/docs/operations_guide/supabase_tools.png differ diff --git a/docs/operations_guide/tigris_buckets.png b/docs/operations_guide/tigris_buckets.png new file mode 100644 index 00000000..11ab43da Binary files /dev/null and b/docs/operations_guide/tigris_buckets.png differ diff --git a/docs/operations_guide/tigris_efsp_certs_bucket.png b/docs/operations_guide/tigris_efsp_certs_bucket.png new file mode 100644 index 00000000..737f6973 Binary files /dev/null and b/docs/operations_guide/tigris_efsp_certs_bucket.png differ diff --git a/env.example b/env.example index 27b37dda..d6dc3449 100644 --- a/env.example +++ b/env.example @@ -45,7 +45,7 @@ TYLER_USER_EMAIL= TYLER_USER_PASSWORD= # The jurisdictions of this Tyler EFM, separated by spaces TYLER_JURISDICTIONS=illinois -TYLER_ENV=stage +TYLER_ENV=stage— # Tyler has a way to pay for your filing with a credit card using their own portal, # this is that URL and the Key that is used TOGA_URL= @@ -63,3 +63,30 @@ JEFFERSON_ENDPOINT=https://example.com # The API Key that is sent with every request to the above URL. # NOTE: this ONLY needs to be in the Docassemble server. JEFFERSON_KEY= + +# For Fly.io, this is the private Tigris (S3-compatible object store) URL to the cert referenced by PATH_TO_KEYSTORE +#S3_TO_KEYSTORE_CERT=s3://my-tigris-url/MyOrg.pfx + +# Fly.io Tigris +#BUCKET_NAME= +#AWS_ENDPOINT_URL_S3=https://fly.storage.tigris.dev +#AWS_ACCESS_KEY_ID=tid_{YOUR_VALUE} +#AWS_SECRET_ACCESS_KEY=tsec_{YOUR_VALUE} + +##### PAPERTRAIL ##### +# Ships logs to the Papertrail service. To disable this, leave the PAPERTRAIL_HOST unset. +#PAPERTRAIL_HOST= +#PAPERTRAIL_PORT= +# You should identify the environment in the name to make it easy to filter when viewing the logs +# e.g. efsp-staging, efsp-prod +#PAPERTRAIL_APP_NAME=my-efsp + +##### Code Updater ##### +# Set to true to disable the Quartz Schedule for the Tyler EFM CodeUpdater. Since we don't support a clustered +# environment yet, this is recommended when running multiple instances. +# Leave commented out or set to false if you are only running a single EFSP instance and want to run the CodeUpdater +# automatically within the instance. +#DISABLE_SCHEDULE_FOR_CODE_UPDATE=false +# Leave commented out or set to false to have the CodeUpdater run daily at around 2:15 am. +# Set to true to immediately run the Tyler EFM CodeUpdater upon startup. This is useful for testing. +#SCHEDULE_CODE_UPDATE_IMMEDIATELY=false \ No newline at end of file diff --git a/fly.production.toml b/fly.production.toml new file mode 100644 index 00000000..38c36538 --- /dev/null +++ b/fly.production.toml @@ -0,0 +1,23 @@ +# fly.toml app configuration file generated for efileproxyserver on 2024-04-26T11:13:35-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'efsp-prod' +primary_region = 'bos' +kill_timeout = 60 + +[build] + +[http_service] + internal_port = 9009 + force_https = false + auto_stop_machines = true + auto_start_machines = true + min_machines_running = 1 + processes = ['app'] + +[[vm]] + memory = '4gb' + cpu_kind = 'shared' + cpus = 2 diff --git a/fly.toml b/fly.toml new file mode 100644 index 00000000..ce6605cf --- /dev/null +++ b/fly.toml @@ -0,0 +1,23 @@ +# fly.toml app configuration file generated for efileproxyserver on 2024-04-26T11:13:35-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'efsp-staging' +primary_region = 'bos' +kill_timeout = 60 + +[build] + +[http_service] + internal_port = 9009 + force_https = false + auto_stop_machines = true + auto_start_machines = true + min_machines_running = 1 + processes = ['app'] + +[[vm]] + memory = '4gb' + cpu_kind = 'shared' + cpus = 2 diff --git a/fly_create_api_key.sh b/fly_create_api_key.sh new file mode 100755 index 00000000..6db32df9 --- /dev/null +++ b/fly_create_api_key.sh @@ -0,0 +1,3 @@ +echo "Who will be using this API key? This value will be stored in the server_name field (so no spaces)." +read server_name +fly console --debug --verbose --vm-size shared-cpu-2x --vm-memory 4096 --command "mvn -f /usr/src/app/pom.xml exec:java@LoginDatabase -Dexec.args=\"$server_name true true\"" diff --git a/fly_create_prod_api_key.sh b/fly_create_prod_api_key.sh new file mode 100755 index 00000000..34871fa7 --- /dev/null +++ b/fly_create_prod_api_key.sh @@ -0,0 +1,3 @@ +echo "Who will be using this API key? This value will be stored in the server_name field (so no spaces)." +read server_name +fly console --config fly.production.toml --debug --verbose --vm-size shared-cpu-2x --vm-memory 4096 --command "mvn -f /usr/src/app/pom.xml exec:java@LoginDatabase -Dexec.args=\"$server_name true true\"" diff --git a/fly_startup_script.sh b/fly_startup_script.sh new file mode 100755 index 00000000..13fd0dd8 --- /dev/null +++ b/fly_startup_script.sh @@ -0,0 +1,13 @@ +#! /bin/sh + +set -e + +# Download cert from encrypted storage if on fly.io +if test -n "$FLY_MACHINE_ID"; then + echo "Running Fly.io startup checks..." + if ! test -f "$PATH_TO_KEYSTORE"; then + echo "Installing cert" + apk add --no-cache aws-cli + aws s3 cp $S3_TO_KEYSTORE_CERT . + fi +fi diff --git a/pom.xml b/pom.xml index e2f10d21..c8021cd6 100644 --- a/pom.xml +++ b/pom.xml @@ -257,6 +257,16 @@ quartz 2.3.2 + + com.papertrailapp + logback-syslog4j + 1.0.0 + + + org.codehaus.janino + janino + 3.1.9 + org.junit.jupiter junit-jupiter @@ -519,6 +529,8 @@ **/UserDatabaseTest.java **/LoginDatabaseTest.java **/CodeDatabaseTest.java + **/CodesServiceTest.java + **/DatabaseVersionTest.java diff --git a/src/main/java/edu/suffolk/litlab/efspserver/ecf4/TylerModuleSetup.java b/src/main/java/edu/suffolk/litlab/efspserver/ecf4/TylerModuleSetup.java index 0048795c..9a536d6e 100644 --- a/src/main/java/edu/suffolk/litlab/efspserver/ecf4/TylerModuleSetup.java +++ b/src/main/java/edu/suffolk/litlab/efspserver/ecf4/TylerModuleSetup.java @@ -41,6 +41,7 @@ import org.quartz.JobDetail; import org.quartz.Scheduler; import org.quartz.SchedulerException; +import org.quartz.SimpleScheduleBuilder; import org.quartz.Trigger; import org.quartz.TriggerBuilder; import org.quartz.impl.StdSchedulerFactory; @@ -198,23 +199,19 @@ public void preSetup() { log.info("Done checking table if absent"); try { + boolean disableQuartzSchedule = + Boolean.parseBoolean(GetEnv("DISABLE_SCHEDULE_FOR_CODE_UPDATE").orElse("false")); + boolean scheduleImmediately = + Boolean.parseBoolean(GetEnv("SCHEDULE_CODE_UPDATE_IMMEDIATELY").orElse("false")); + + if (disableQuartzSchedule) { + return; + } + Scheduler scheduler = StdSchedulerFactory.getDefaultScheduler(); scheduler.start(); - String jobName = "job-" + this.tylerJurisdiction + "-" + this.tylerEnv; - - JobDetail job = - JobBuilder.newJob(UpdateCodeVersions.class) - .withIdentity(jobName, "codesdb-group") - .usingJobData("TYLER_JURISDICTION", this.tylerJurisdiction) - .usingJobData("TYLER_ENV", this.tylerEnv) - .usingJobData("X509_PASSWORD", this.x509Password) - .usingJobData("POSTGRES_URL", this.pgUrl) - .usingJobData("POSTGRES_DB", this.pgDb) - .usingJobData("POSTGRES_USERNAME", this.pgUser) - .usingJobData("POSTGRES_PASSWORD", this.pgPassword) - .build(); - + // Always schedule daily codes update. var r = new Random(); String triggerName = "trigger-" + this.tylerJurisdiction + "-" + this.tylerEnv; Trigger trigger = @@ -222,11 +219,30 @@ public void preSetup() { .withIdentity(triggerName, "codesdb-group") .startNow() .withSchedule(CronScheduleBuilder.dailyAtHourAndMinute(2, 13 + r.nextInt(4))) - // Testable version! Updates the codes 20 seconds after launch - // .withSchedule(SimpleScheduleBuilder.simpleSchedule().withIntervalInSeconds(20)) .build(); - scheduler.scheduleJob(job, trigger); + log.info("Scheduling daily Tyler EFM code update job."); + scheduler.scheduleJob( + buildJob("job-" + this.tylerJurisdiction + "-" + this.tylerEnv), trigger); + + if (scheduleImmediately) { + // Schedule immediate codes update. + // Testable version - updates the codes 20 seconds after launch + // Also useful for immediately running the update on ephemeral machines that are controlled + // by external cron + Trigger immediateTrigger = + TriggerBuilder.newTrigger() + .withIdentity( + "trigger-immediate-" + this.tylerJurisdiction + "-" + this.tylerEnv, + "codesdb-group") + .startNow() + .withSchedule(SimpleScheduleBuilder.simpleSchedule().withIntervalInSeconds(20)) + .build(); + log.info("Scheduling immediate Tyler EFM code update job."); + scheduler.scheduleJob( + buildJob("job-immediate-" + this.tylerJurisdiction + "-" + this.tylerEnv), + immediateTrigger); + } } catch (SchedulerException se) { log.error("Scheduler Exception: " + StdLib.strFromException(se)); throw new RuntimeException(se); @@ -397,4 +413,17 @@ public void shutdown() { public String toString() { return "TylerModuleSetup[jurisdiction=" + tylerJurisdiction + ",env=" + tylerEnv + "]"; } + + private JobDetail buildJob(final String jobName) { + return JobBuilder.newJob(UpdateCodeVersions.class) + .withIdentity(jobName, "codesdb-group") + .usingJobData("TYLER_JURISDICTION", this.tylerJurisdiction) + .usingJobData("TYLER_ENV", this.tylerEnv) + .usingJobData("X509_PASSWORD", this.x509Password) + .usingJobData("POSTGRES_URL", this.pgUrl) + .usingJobData("POSTGRES_DB", this.pgDb) + .usingJobData("POSTGRES_USERNAME", this.pgUser) + .usingJobData("POSTGRES_PASSWORD", this.pgPassword) + .build(); + } } diff --git a/src/main/java/edu/suffolk/litlab/efspserver/ecfcodes/CodeUpdater.java b/src/main/java/edu/suffolk/litlab/efspserver/ecfcodes/CodeUpdater.java index 2dac1f53..2321f686 100644 --- a/src/main/java/edu/suffolk/litlab/efspserver/ecfcodes/CodeUpdater.java +++ b/src/main/java/edu/suffolk/litlab/efspserver/ecfcodes/CodeUpdater.java @@ -381,7 +381,7 @@ private boolean downloadCourtTables( } Optional signedTime = signer.signedCurrentTime(); if (signedTime.isEmpty()) { - log.error("Couldn't get signed time to download codeds, skipping all"); + log.error("Couldn't get signed time to download codes, skipping all"); return false; } Map downloaded = diff --git a/src/main/java/edu/suffolk/litlab/efspserver/services/EfspServer.java b/src/main/java/edu/suffolk/litlab/efspserver/services/EfspServer.java index 7d62f877..9413c2b4 100644 --- a/src/main/java/edu/suffolk/litlab/efspserver/services/EfspServer.java +++ b/src/main/java/edu/suffolk/litlab/efspserver/services/EfspServer.java @@ -218,7 +218,7 @@ public static void main(String[] args) throws Exception { List jurisdictions = List.of(tylerJurisdictions.orElse("").split(" ")); List togaKeys = List.of(togaKeyStr.orElse("").split(" ")); if (jurisdictions.size() > 0 && jurisdictions.size() != togaKeys.size()) { - log.error("TOGA_CLIENT_KEYS list should be same size as TYLER_JURSIDICTIONS list."); + log.error("TOGA_CLIENT_KEYS list should be same size as TYLER_JURISDICTIONS list."); throw new RuntimeException("TOGA_CLIENT_KEYS and TYLER_JURISDICTION mismatch"); } for (int idx = 0; idx < jurisdictions.size(); idx++) { diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 119e8a1f..70ae6871 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -10,6 +10,31 @@ | %d{yyyy-MM-dd HH:mm:ss.SSS} | %X{serverId} | %X{userId} | %X{operation} | %thread | %-5level | %logger{50} |- %msg |%n + + + + + %-5level %logger{35}: %m%n%xEx + + + + + ${PAPERTRAIL_HOST} + + ${PAPERTRAIL_PORT} + + ${PAPERTRAIL_APP_NAME} + + 128000 + + + + + + + + +