Skip to content

Commit

Permalink
Merge pull request #325 from NEONScience/NSE-10616
Browse files Browse the repository at this point in the history
Nse 10616
  • Loading branch information
covesturtevant authored Aug 13, 2024
2 parents 990bdd9 + 133576a commit 3faa868
Show file tree
Hide file tree
Showing 15 changed files with 535 additions and 129 deletions.
2 changes: 1 addition & 1 deletion pipe/windobserverii/pipe_list_windobserverii.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ windobserverii_location_loader.yaml
windobserverii_calibration_assignment.yaml
windobserverii_location_asset_assignment.yaml
windobserverii_location_active_dates_assignment.yaml
windobserverii_calibration_group_and_validate.yaml
windobserverii_calibration_group_and_convert.yaml
windobserverii_location_group_and_restructure.yaml
windobserverii_fill_date_gaps_and_regularize.yaml
210 changes: 210 additions & 0 deletions pipe/windobserverii/site-list.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
[
{
"site" : "ABBY",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "BARR",
"kafka_start_date" : "2023-06-01"
},
{
"site" : "BART",
"kafka_start_date" : "2023-05-10"
},
{
"site" : "BLAN",
"kafka_start_date" : "2023-03-03"
},
{
"site" : "BONA",
"kafka_start_date" : "2023-06-01"
},
{
"site" : "CLBJ",
"kafka_start_date" : "2023-05-19"
},
{
"site" : "CPER",
"kafka_start_date" : "2022-12-30"
},
{
"site" : "DCFS",
"kafka_start_date" : "2023-05-19"
},
{
"site" : "DEJU",
"kafka_start_date" : "2023-06-01"
},
{
"site" : "DELA",
"kafka_start_date" : "2023-06-16"
},
{
"site" : "DSNY",
"kafka_start_date" : "2023-03-09"
},
{
"site" : "GRSM",
"kafka_start_date" : "2023-05-10"
},
{
"site" : "GUAN",
"kafka_start_date" : "2023-03-09"
},
{
"site" : "HARV",
"kafka_start_date" : "2023-03-03"
},
{
"site" : "HEAL",
"kafka_start_date" : "2023-06-01"
},
{
"site" : "JERC",
"kafka_start_date" : "2023-03-09"
},
{
"site" : "JORN",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "KONA",
"kafka_start_date" : "2023-03-15"
},
{
"site" : "KONZ",
"kafka_start_date" : "2023-05-19"
},
{
"site" : "LAJA",
"kafka_start_date" : "2023-05-10"
},
{
"site" : "LENO",
"kafka_start_date" : "2023-06-15"
},
{
"site" : "MD00",
"start_date" : "2017-07-18",
"end_date" : "2017-08-23"
},
{
"site" : "MD01",
"start_date" : "2021-07-23",
"end_date" : "2021-11-17"
},
{
"site" : "MD02",
"start_date" : "2021-08-23",
"end_date" : "2021-11-30"
},
{
"site" : "MD03",
"start_date" : "2022-04-08",
"end_date" : "2022-05-11"
},
{
"site" : "MLBS",
"kafka_start_date" : "2023-02-03"
},
{
"site" : "MOAB",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "NIWO",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "NOGP",
"kafka_start_date" : "2023-05-19"
},
{
"site" : "OAES",
"kafka_start_date" : "2023-05-19"
},
{
"site" : "ONAQ",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "ORNL",
"kafka_start_date" : "2023-05-10"
},
{
"site" : "OSBS",
"kafka_start_date" : "2023-03-09"
},
{
"site" : "PUUM",
"kafka_start_date" : "2023-02-03"
},
{
"site" : "RMNP",
"kafka_start_date" : "2023-01-19"
},
{
"site" : "SCBI",
"kafka_start_date" : "2023-03-03"
},
{
"site" : "SERC",
"kafka_start_date" : "2023-03-03"
},
{
"site" : "SJER",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "SOAP",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "SRER",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "STEI",
"kafka_start_date" : "2023-03-15"
},
{
"site" : "STER",
"kafka_start_date" : "2023-01-13"
},
{
"site" : "TALL",
"kafka_start_date" : "2023-05-10"
},
{
"site" : "TEAK",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "TOOL",
"kafka_start_date" : "2023-06-01"
},
{
"site" : "TREE",
"kafka_start_date" : "2023-03-15"
},
{
"site" : "UKFS",
"kafka_start_date" : "2023-03-15"
},
{
"site" : "UNDE",
"kafka_start_date" : "2023-03-15"
},
{
"site" : "WOOD",
"kafka_start_date" : "2023-05-24"
},
{
"site" : "WREF",
"kafka_start_date" : "2023-02-03"
},
{
"site" : "YELL",
"kafka_start_date" : "2023-02-03"
}
]
11 changes: 4 additions & 7 deletions pipe/windobserverii/windobserverii_calibration_assignment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,17 @@ input:
repo: windobserverii_cron_daily_and_date_control
glob: /data_year*.txt
parallelism_spec:
constant: 5
constant: 2
autoscaling: true
resource_requests:
memory: 1G
memory: 200M
cpu: 0.8
resource_limits:
memory: 2G
memory: 600M
cpu: 1.5
sidecar_resource_requests:
memory: 5G
memory: 3G
cpu: 1
sidecar_resource_limits:
memory: 8Gi
cpu: 1.2
datum_set_spec:
number: 5
scheduling_spec:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
pipeline:
name: windobserverii_calibration_group_and_validate
name: windobserverii_calibration_group_and_convert
transform:
image_pull_secrets: [battelleecology-quay-read-all-pull-secret]
image: quay.io/battelleecology/neon-is-cal-grp-conv:aa651ed4
Expand Down Expand Up @@ -45,27 +45,28 @@ transform:
python3 -m filter_joiner.filter_joiner_main
fi
# Run calibration conversion module
Rscript ./flow.cal.conv.R \
Rscript ./flow.cal.conv.R \
DirIn=/tmp/pfs/filter_joined \
DirOut=/pfs/out \
DirErr=/pfs/out/errored_datums \
FileSchmQf=$FILE_SCHEMA_FLAGS \
"TermQf=u_axis_wind_speed|v_axis_wind_speed"
EOF
env:
# Environment variables for filter-joiner.
# There are two configs here, CONFIG_KAFKA is when data comes from kafka, CONFIG_TRINO is when data comes from trino.
# The engironment variable CONFIG is set to the appropriate one as detected in the bash script above
CONFIG_KAFKA: |
CONFIG_TRINO: |
---
# Configuration for filter-joiner module that will bring together the data and calibrations
# In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
# Metadata indices will typically begin at index 3.
input_paths:
- path:
name: DATA_PATH_KAFKA
name: DATA_PATH_TRINO
# Filter for data directory
glob_pattern: /tmp/kafka_merged/windobserverii/*/*/*/*/**
glob_pattern: /pfs/DATA_PATH_TRINO/windobserverii/*/*/*/*/**
# Join on named location (already joined below by day)
join_indices: [7]
outer_join: true
Expand All @@ -75,16 +76,16 @@ transform:
glob_pattern: /pfs/CALIBRATION_PATH/windobserverii/*/*/*/*/**
# Join on named location (already joined below by day)
join_indices: [7]
CONFIG_TRINO: |
CONFIG_KAFKA: |
---
# Configuration for filter-joiner module that will bring together the data and calibrations
# In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
# Metadata indices will typically begin at index 3.
input_paths:
- path:
name: DATA_PATH_TRINO
name: DATA_PATH_KAFKA
# Filter for data directory
glob_pattern: /pfs/DATA_PATH_TRINO/windobserverii/*/*/*/*/**
glob_pattern: /tmp/kafka_merged/windobserverii/*/*/*/*/**
# Join on named location (already joined below by day)
join_indices: [7]
outer_join: true
Expand All @@ -110,7 +111,6 @@ input:
name: FILE_SCHEMA_FLAGS
repo: windobserverii_avro_schemas
glob: /windobserverii/flags_calibration_windobserverii.avsc
# Outer join all repos so that varying sensors between kafka and trino loaders will all get joined with calibrations. Filter-joiner will narrow down.
- join:
- pfs:
name: CALIBRATION_PATH
Expand All @@ -134,11 +134,19 @@ input:
outer_join: true
empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
parallelism_spec:
constant: 2
constant: 5
autoscaling: true
resource_requests:
memory: 1.5G
cpu: 3.3
autoscaling: true
resource_limits:
memory: 3G
cpu: 4.5
sidecar_resource_requests:
memory: 3G
cpu: 0.6
datum_set_spec:
number: 1
scheduling_spec:
node_selector:
cloud.google.com/gke-ephemeral-storage-local-ssd: "true"
Expand All @@ -156,14 +164,3 @@ pod_spec: |-
"operator": "Exists"
}
] }
pod_patch: |-
[
{ "op": "replace",
"path":"/containers/1/resources/requests/memory",
"value":"3.5G"
},
{ "op": "replace",
"path": "/containers/1/resources/requests/cpu",
"value": "0.5"
}
]
21 changes: 8 additions & 13 deletions pipe/windobserverii/windobserverii_calibration_list_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,16 @@ input:
repo: windobserverii_cron_daily_and_date_control_tick
glob: /*
empty_files: true
autoscaling: true
resource_requests:
memory: 64M
memory: 500M
cpu: 0.4
resource_limits:
memory: 1G
cpu: 1.5
sidecar_resource_requests:
memory: 1G
cpu: 0.5
autoscaling: true
scheduling_spec:
node_selector:
cloud.google.com/gke-ephemeral-storage-local-ssd: "true"
Expand All @@ -37,14 +43,3 @@ pod_spec: |-
"operator": "Exists"
}
] }
pod_patch: |-
[
{ "op": "replace",
"path":"/containers/1/resources/requests/memory",
"value":"64M"
},
{ "op": "replace",
"path": "/containers/1/resources/requests/cpu",
"value": "0.1"
}
]
5 changes: 1 addition & 4 deletions pipe/windobserverii/windobserverii_calibration_loader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,8 @@ resource_limits:
memory: 1G
cpu: 1.5
sidecar_resource_requests:
memory: 600M
cpu: 0.2
sidecar_resource_limits:
memory: 2G
cpu: 1.2
cpu: 0.5
datum_set_spec:
number: 1
scheduling_spec:
Expand Down
Loading

0 comments on commit 3faa868

Please sign in to comment.