Skip to content

Commit

Permalink
Merge pull request #142 from TGSAI/141-aws-example
Browse files Browse the repository at this point in the history
141 aws example
  • Loading branch information
BrianMichell authored Nov 15, 2024
2 parents 1049080 + 0b88f2f commit 838538d
Show file tree
Hide file tree
Showing 7 changed files with 468 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,6 @@ endif()

list(APPEND mdio_COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})


add_subdirectory(mdio)

37 changes: 37 additions & 0 deletions examples/real_data_example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.24)
project(real_data_example_project VERSION 1.0.0 LANGUAGES CXX)

# Set the C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Include FetchContent module
include(FetchContent)

# Fetch the mdio-cpp library from the specified branch
FetchContent_Declare(
mdio
GIT_REPOSITORY https://github.com/TGSAI/mdio-cpp.git
GIT_TAG main
)
FetchContent_MakeAvailable(mdio)

# Fetch the indicators library
FetchContent_Declare(
indicators
GIT_REPOSITORY https://github.com/p-ranav/indicators.git
GIT_TAG v2.3
)
FetchContent_MakeAvailable(indicators)

# Create an executable target
add_executable(real_data_example src/real_data_example.cc)

# Link the mdio and indicators libraries to the executable
target_link_libraries(real_data_example PRIVATE
mdio
${mdio_INTERNAL_DEPS}
${mdio_INTERNAL_S3_DRIVER_DEPS}
PNG::PNG
indicators
)
47 changes: 47 additions & 0 deletions examples/real_data_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Seismic Data Extractor

A C++ tool for extracting and processing 3D seismic data slices from MDIO datasets. The tool will extract a slice of a larger MDIO dataset and output the result to a NumPy array called seismic_slice.npy.

This tool demonstrates working with the Poseidon dataset, a public seismic dataset available on AWS Earth, and is provided under a CC BY 4.0 license.

## Overview

This tool allows users to extract specific slices from 3D seismic data stored in MDIO format, with options to specify ranges for inline, crossline, and depth dimensions. The extracted data can be saved in NumPy format for further analysis.

## Prerequisites

- GCC 11 *or better* or Clang14 *or better*
- MDIO library (for reading seismic data)
- indicators library (for progress bars)
- CMake (for building)

## Installation

[Installation instructions would be valuable here]

## Usage

```bash
./real_data_example [flags]
```

### Flags

- `--dataset_path`: Path to the MDIO dataset (default: "s3://tgs-opendata-poseidon/full_stack_agc.mdio")
- `--inline_range`: Inline range in format {inline,start,end,step} (default: "{inline,700,701,1}")
- `--xline_range`: Crossline range in format {crossline,start,end,step} (default: "{crossline,500,700,1}")
- `--depth_range`: Optional depth range in format {depth,start,end,step}
- `--variable_name`: Name of the seismic variable to extract (default: "seismic")
- `--print_dataset`: Print the dataset URL and return without processing

### Example

To print the contents of the file ...
```
./real_data_example --print_dataset --dataset_path="s3://tgs-opendata-poseidon/full_stack_agc.mdio"
```

To slice the data and write to numpy ...
```
./real_data_example --inline_range="{inline,700,701,1}" --xline_range="{crossline,500,1000,1}" --depth_range="{time, 300, 900,1}"
```
44 changes: 44 additions & 0 deletions examples/real_data_example/src/interpolation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2024 TGS

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <mdio/mdio.h>

using Index = mdio::Index;

template <typename T, mdio::DimensionIndex R, mdio::ArrayOriginKind OriginKind>
float bilinear_interpolate(const mdio::SharedArray<T, R, OriginKind>& accessor,
float x, float y, Index slice_index, Index x_min,
Index x_max, Index y_min, Index y_max) {
// Clamp coordinates to valid range
x = std::max(float(x_min), std::min(float(x_max - 1), x));
y = std::max(float(y_min), std::min(float(y_max - 1), y));

Index x0 = std::floor(x);
Index y0 = std::floor(y);
Index x1 = std::min(x0 + 1, x_max - 1);
Index y1 = std::min(y0 + 1, y_max - 1);

float fx = x - x0;
float fy = y - y0;

float c00 = accessor({slice_index, x0, y0});
float c10 = accessor({slice_index, x1, y0});
float c01 = accessor({slice_index, x0, y1});
float c11 = accessor({slice_index, x1, y1});

return (c00 * (1 - fx) * (1 - fy) + c10 * fx * (1 - fy) +
c01 * (1 - fx) * fy + c11 * fx * fy);
}
95 changes: 95 additions & 0 deletions examples/real_data_example/src/progress.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2024 TGS

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <mdio/mdio.h>

#include <chrono>
#include <indicators/cursor_control.hpp>
#include <indicators/indeterminate_progress_bar.hpp>
#include <memory>
#include <sstream>
#include <string>
#include <thread>

namespace mdio {

template <typename T>
class ProgressTracker {
public:
explicit ProgressTracker(const std::string& message = "Loading data...")
: message_(message),
bar_{
indicators::option::BarWidth{40},
indicators::option::Start{"["},
indicators::option::Fill{"·"},
indicators::option::Lead{"<==>"},
indicators::option::End{"]"},
indicators::option::PostfixText{message},
indicators::option::ForegroundColor{indicators::Color::yellow},
indicators::option::FontStyles{std::vector<indicators::FontStyle>{
indicators::FontStyle::bold}},
} {
std::cout << "\033[2K\r" << std::flush;
indicators::show_console_cursor(false);
start_time_ = std::chrono::steady_clock::now();
}

void tick() {
auto current_time = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
current_time - start_time_)
.count();
std::stringstream time_str;
time_str << message_ << " " << elapsed << "s";
bar_.set_option(indicators::option::PostfixText{time_str.str()});
bar_.tick();
}

void complete() {
bar_.mark_as_completed();
bar_.set_option(
indicators::option::ForegroundColor{indicators::Color::green});
bar_.set_option(indicators::option::PostfixText{message_ + " completed"});
indicators::show_console_cursor(true);
}

~ProgressTracker() { indicators::show_console_cursor(true); }

private:
std::string message_;
std::chrono::steady_clock::time_point start_time_;
indicators::IndeterminateProgressBar bar_;
};

template <typename T = void, DimensionIndex R = dynamic_rank,
ArrayOriginKind OriginKind = offset_origin>
Future<VariableData<T, R, OriginKind>> ReadWithProgress(
Variable<T>& variable, const std::string& message = "Loading data...") {
auto tracker = std::make_shared<ProgressTracker<T>>(message);
auto future = variable.Read();

std::thread([tracker, future]() {
while (!future.ready()) {
tracker->tick();
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
tracker->complete();
}).detach();

return future;
}

} // namespace mdio
130 changes: 130 additions & 0 deletions examples/real_data_example/src/real_data_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2024 TGS

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <mdio/mdio.h>

#include <indicators/cursor_control.hpp>
#include <indicators/indeterminate_progress_bar.hpp>
#include <indicators/progress_bar.hpp>

#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/strings/str_split.h"
#include "interpolation.h"
#include "progress.h"
#include "seismic_numpy.h"
#include "seismic_png.h"
#include "tensorstore/tensorstore.h"

#define MDIO_RETURN_IF_ERROR(...) TENSORSTORE_RETURN_IF_ERROR(__VA_ARGS__)

using Index = mdio::Index;

ABSL_FLAG(std::string, inline_range, "{inline,700,701,1}",
"Inline range in format {inline,start,end,step}");
ABSL_FLAG(std::string, xline_range, "{crossline,500,700,1}",
"Crossline range in format {crossline,start,end,step}");
ABSL_FLAG(std::string, depth_range, "",
"Optional depth range in format {depth,start,end,step}");
ABSL_FLAG(std::string, variable_name, "seismic",
"Name of the seismic variable");
ABSL_FLAG(bool, print_dataset, false, "Print the dataset URL and return");
ABSL_FLAG(std::string, dataset_path,
"s3://tgs-opendata-poseidon/full_stack_agc.mdio",
"The path to the dataset");

// Make Run a template function for both the type and descriptors
template <typename T, typename... Descriptors>
absl::Status Run(const Descriptors... descriptors) {
// New feature to print the dataset if the flag is set

MDIO_ASSIGN_OR_RETURN(
auto dataset,
mdio::Dataset::Open(std::string(absl::GetFlag(FLAGS_dataset_path)),
mdio::constants::kOpen)
.result())

if (absl::GetFlag(FLAGS_print_dataset)) {
std::cout << dataset << std::endl;
return absl::OkStatus(); // Return early if just printing the dataset
}

// slice the dataset
MDIO_ASSIGN_OR_RETURN(auto inline_slice, dataset.isel(descriptors...));

// Get variable with template type T using the variable name from the CLI
MDIO_ASSIGN_OR_RETURN(auto variable, inline_slice.variables.template get<T>(
absl::GetFlag(FLAGS_variable_name)));

if (variable.rank() != 3) {
return absl::InvalidArgumentError("Seismic data must be 3D");
}

MDIO_ASSIGN_OR_RETURN(auto seismic_data, ReadWithProgress(variable).result())

auto seismic_accessor = seismic_data.get_data_accessor();

// Write numpy file
MDIO_RETURN_IF_ERROR(WriteNumpy(seismic_accessor, "seismic_slice.npy"));

return absl::OkStatus();
}

mdio::RangeDescriptor<> ParseRange(std::string_view range) {
// Remove leading/trailing whitespace and braces
if (range.empty()) {
// FIXME - we need a better way to handle this
return {"ignore_me", 0, 1, 1};
}

range.remove_prefix(range.find_first_not_of(" {"));
range.remove_suffix(range.length() - range.find_last_not_of("} ") - 1);

// Split by comma into parts
std::vector<std::string_view> parts = absl::StrSplit(range, ',');

if (parts.size() != 4) {
throw std::runtime_error(
"Invalid range format. Expected {label,start,end,step}");
}

// Clean up the label (first part) by removing quotes and spaces
auto label = parts[0];
label.remove_prefix(std::min(label.find_first_not_of(" \""), label.size()));
label.remove_suffix(
label.length() -
std::min(label.find_last_not_of(" \"") + 1, label.size()));

return {
label, // dimension name
std::stoi(std::string(parts[1])), // start
std::stoi(std::string(parts[2])), // end
std::stoi(std::string(parts[3])) // step
};
}

int main(int argc, char* argv[]) {
absl::ParseCommandLine(argc, argv);

// keep me in memory
auto inline_range = absl::GetFlag(FLAGS_inline_range);
auto crossline_range = absl::GetFlag(FLAGS_xline_range);
auto depth_range = absl::GetFlag(FLAGS_depth_range);

auto desc1 = ParseRange(inline_range);
auto desc2 = ParseRange(crossline_range);
auto desc3 = ParseRange(depth_range);

return Run<float>(desc1, desc2, desc3).ok() ? 0 : 1;
}
Loading

0 comments on commit 838538d

Please sign in to comment.