Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Local execution e2e training #1472

Open
wants to merge 40 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6adb290
temporary weight adjust index
reyna-abhyankar Aug 25, 2024
61697c2
Loss function
reyna-abhyankar Aug 27, 2024
b56c046
Add cuda test for loss function
reyna-abhyankar Aug 27, 2024
f75a3d4
Format
reyna-abhyankar Aug 27, 2024
f74711f
Refactor and build optimizer kernels, op
reyna-abhyankar Aug 27, 2024
40c6252
Finish optimizer local backing
reyna-abhyankar Aug 27, 2024
ad9b9ea
Format
reyna-abhyankar Aug 27, 2024
1ddfade
E2E update test
reyna-abhyankar Aug 27, 2024
dde9496
Format
reyna-abhyankar Aug 27, 2024
59635d8
Small fixes
reyna-abhyankar Sep 11, 2024
103ef07
Format
reyna-abhyankar Sep 11, 2024
f48f9ff
Fix test and small issues
reyna-abhyankar Sep 18, 2024
189c9c8
Format
reyna-abhyankar Sep 18, 2024
d93f464
Merge branch 'repo-refactor' into local-e2e-training
reyna-abhyankar Oct 1, 2024
b5647c8
Pass tests after merge
reyna-abhyankar Oct 1, 2024
f5ff91e
Fix input/weight differentiation
reyna-abhyankar Oct 1, 2024
7470e71
Fix signature to use unified rep
reyna-abhyankar Oct 1, 2024
deece1b
Fix model training instance abstraction
reyna-abhyankar Oct 1, 2024
1d3cc94
Change subcase test name
reyna-abhyankar Oct 1, 2024
3cf5d08
Quick fixes
reyna-abhyankar Oct 16, 2024
79ef4c9
Refactor training backing and instance
reyna-abhyankar Oct 22, 2024
a73b1c3
Expose op folders publicly
reyna-abhyankar Nov 13, 2024
c6fed29
Add tensor type, operate over reduced tensor
reyna-abhyankar Nov 13, 2024
0cdfb1a
Fixes
reyna-abhyankar Jan 7, 2025
9d252b3
Remove tensor lower
reyna-abhyankar Jan 15, 2025
895c117
Add tensor and task lowering scheme
reyna-abhyankar Jan 17, 2025
411017d
Build local exec
reyna-abhyankar Jan 22, 2025
0128abb
Disaggregate local backend
reyna-abhyankar Feb 1, 2025
277f8c2
Update task binding interface and cost estimator
reyna-abhyankar Feb 1, 2025
377c6aa
Merge master into local execution
reyna-abhyankar Feb 4, 2025
8efaec7
Build
reyna-abhyankar Feb 6, 2025
1dc1398
Format
reyna-abhyankar Feb 6, 2025
17ad5c8
Split task spec files
reyna-abhyankar Feb 6, 2025
639c2c1
Delete outdated sim environment file
reyna-abhyankar Feb 6, 2025
a697044
Finish API
reyna-abhyankar Feb 13, 2025
187a8d5
Add tests for allocated and unallocated
reyna-abhyankar Feb 13, 2025
a0f8113
Fix nonnegative
reyna-abhyankar Feb 13, 2025
b1eab94
Format
reyna-abhyankar Feb 13, 2025
b532c50
Pass allocated-unallocated tests
reyna-abhyankar Feb 13, 2025
f28e5c2
Update task registry tests
reyna-abhyankar Feb 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ build_targets = [
"compiler",
"substitution-generator",
"local-execution",
"task-spec",
"models",
"export-model-arch",
"substitution-to-dot",
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_subdirectory(runtime)
add_subdirectory(op-attrs)
add_subdirectory(kernels)
add_subdirectory(local-execution)
add_subdirectory(task-spec)
add_subdirectory(utils)
add_subdirectory(ffi)
add_subdirectory(substitutions)
Expand Down
2 changes: 2 additions & 0 deletions lib/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ file(GLOB_RECURSE SRC
LIST_DIRECTORIES False
src/*.cc
src/cuda/cuda_helper.cu
src/cuda/loss_function_kernels.cu
src/cuda/optimizer_kernels.cu
src/cuda/ops/*.cu
)

Expand Down
22 changes: 16 additions & 6 deletions lib/kernels/include/kernels/array_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ namespace FlexFlow {
struct ArrayShape {
public:
ArrayShape() = delete;
ArrayShape(nonnegative_int *dims, nonnegative_int num_dims);
ArrayShape(TensorShape const &shape);
ArrayShape(std::vector<nonnegative_int> const &);
explicit ArrayShape(nonnegative_int *dims, nonnegative_int num_dims);
explicit ArrayShape(TensorShape const &shape);
explicit ArrayShape(std::vector<nonnegative_int> const &);
explicit ArrayShape(LegionOrdered<nonnegative_int> const &);

/**
* @brief Alias of ArrayShape::num_elements for compatibility with
Expand Down Expand Up @@ -46,9 +47,11 @@ struct ArrayShape {
std::optional<nonnegative_int> at_maybe(legion_dim_t) const;
std::optional<nonnegative_int> at_maybe(ff_dim_t) const;

ArrayShape
sub_shape(std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const;
ArrayShape sub_shape(std::optional<ff_dim_t> start,
std::optional<ff_dim_t> end) const;

ArrayShape sub_shape(std::optional<legion_dim_t> start,
std::optional<legion_dim_t> end) const;

public:
LegionOrdered<nonnegative_int> dims;
Expand All @@ -66,4 +69,11 @@ std::ostream &operator<<(std::ostream &, ArrayShape const &);

} // namespace FlexFlow

namespace std {
template <>
struct hash<::FlexFlow::ArrayShape> {
size_t operator()(::FlexFlow::ArrayShape const &) const;
};
} // namespace std

#endif
2 changes: 2 additions & 0 deletions lib/kernels/include/kernels/legion_dim.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value);

legion_dim_t legion_dim_from_ff_dim(ff_dim_t, nonnegative_int num_dimensions);

ff_dim_t ff_dim_from_legion_dim(legion_dim_t, nonnegative_int num_dimensions);

template <typename T>
using LegionOrdered = DimOrdered<legion_dim_t, T>;

Expand Down
9 changes: 7 additions & 2 deletions lib/kernels/include/kernels/optimizer_kernels.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifndef _FLEXFLOW_KERNELS_INCLUDE_KERNELS_OPTIMIZER_KERNELS_H
#define _FLEXFLOW_KERNELS_INCLUDE_KERNELS_OPTIMIZER_KERNELS_H

#include "device.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"

namespace FlexFlow {

Expand All @@ -20,7 +21,8 @@ void sgd_nccl_update_task_gpu(ffStream_t,
float lr,
float momentum,
bool nesterov,
float weight_decay PerDeviceFFHandle const &,
float weight_decay,
PerDeviceFFHandle const &,
float const *weight_grad_ptr,
size_t size,
float *weight_ptr,
Expand All @@ -32,6 +34,8 @@ void adam_ps_update_task_gpu(ffStream_t,
float beta2,
float weight_decay,
float epsilon,
size_t size,
int num_replicas,
float const *weight_grad_ptr,
float *adam_m_ptr,
float *adam_v_ptr,
Expand All @@ -43,6 +47,7 @@ void adam_nccl_update_task_gpu(ffStream_t,
float beta2,
float weight_decay,
float epsilon,
size_t size,
PerDeviceFFHandle const &,
float const *weight_grad_ptr,
float *adam_m_ptr,
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/src/allocation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ GenericTensorAccessorW
Allocator::allocate_tensor(TensorShape const &tensor_shape) {
void *ptr =
this->allocate(get_size_in_bytes(tensor_shape).unwrap_nonnegative());
return {tensor_shape.data_type, tensor_shape, ptr};
return {tensor_shape.data_type, ArrayShape{tensor_shape}, ptr};
}

} // namespace FlexFlow
46 changes: 41 additions & 5 deletions lib/kernels/src/array_shape.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "kernels/array_shape.h"
#include "op-attrs/dim_ordered/slice.h"
#include "utils/containers/product.h"
#include "utils/containers/reversed.h"
#include "utils/containers/transform.h"
#include "utils/containers/vector_of.h"
#include "utils/nonnegative_int/num_elements.h"

Expand All @@ -20,6 +22,9 @@
ArrayShape::ArrayShape(std::vector<nonnegative_int> const &input_dims)
: dims(input_dims) {}

ArrayShape::ArrayShape(LegionOrdered<nonnegative_int> const &legion_tensor_dims)
: dims(legion_tensor_dims) {}

Check warning on line 26 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L25-L26

Added lines #L25 - L26 were not covered by tests

nonnegative_int ArrayShape::get_volume() const {
return this->num_elements();
}
Expand Down Expand Up @@ -51,6 +56,26 @@
return dims.at(legion_dim_from_ff_dim(idx, this->num_dims()));
}

ArrayShape ArrayShape::sub_shape(std::optional<ff_dim_t> start,

Check warning on line 59 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L59

Added line #L59 was not covered by tests
std::optional<ff_dim_t> end) const {
return ArrayShape{legion_ordered_from_ff_ordered(
slice(ff_ordered_from_legion_ordered(this->dims), start, end))};

Check warning on line 62 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L61-L62

Added lines #L61 - L62 were not covered by tests
}

ArrayShape ArrayShape::sub_shape(std::optional<legion_dim_t> start,

Check warning on line 65 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L65

Added line #L65 was not covered by tests
std::optional<legion_dim_t> end) const {
std::optional<ff_dim_t> legion_start =
transform(start, [&](auto const &start_unwrapped) {
return ff_dim_from_legion_dim(start_unwrapped, num_dims());

Check warning on line 69 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L68-L69

Added lines #L68 - L69 were not covered by tests
});

std::optional<ff_dim_t> legion_end =
transform(end, [&](auto const &end_unwrapped) {
return ff_dim_from_legion_dim(end_unwrapped, num_dims());

Check warning on line 74 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L73-L74

Added lines #L73 - L74 were not covered by tests
});
return this->sub_shape(legion_start, legion_end);

Check warning on line 76 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L76

Added line #L76 was not covered by tests
}

bool ArrayShape::operator==(ArrayShape const &other) const {
return this->tie() == other.tie();
}
Expand All @@ -59,11 +84,11 @@
return this->tie() != other.tie();
}

ArrayShape ArrayShape::sub_shape(
std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const {
NOT_IMPLEMENTED();
}
// ArrayShape ArrayShape::sub_shape(
// std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
// std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const {
// NOT_IMPLEMENTED();
// }

std::optional<nonnegative_int> ArrayShape::at_maybe(legion_dim_t index) const {
if (index.value < dims.size()) {
Expand Down Expand Up @@ -103,3 +128,14 @@
}

} // namespace FlexFlow

namespace std {
size_t hash<FlexFlow::ArrayShape>::operator()(

Check warning on line 133 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L133

Added line #L133 was not covered by tests
::FlexFlow::ArrayShape const &x) const {
size_t result = 0;
result ^= std::hash<::FlexFlow::LegionOrdered<::FlexFlow::nonnegative_int>>{}(
x.dims) +
0x9e3779b9 + (result << 6) + (result >> 2);
return result;

Check warning on line 139 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L135-L139

Added lines #L135 - L139 were not covered by tests
}
} // namespace std
4 changes: 2 additions & 2 deletions lib/kernels/src/cuda/cuda_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ cudaError_t get_legion_stream(cudaStream_t *stream) {
#error "Unknown device, please make sure if CUDA is enabled"
#endif

__global__ void scale_kernel(float *ptr, coord_t size, float a, float b) {
__global__ void scale_kernel(float *ptr, size_t size, float a, float b) {
CUDA_KERNEL_LOOP(i, size) {
ptr[i] = (b - a) * ptr[i] + a;
}
}

__global__ void ones_kernel(float *ptr, coord_t size) {
__global__ void ones_kernel(float *ptr, size_t size) {
CUDA_KERNEL_LOOP(i, size) {
ptr[i] = 1.0f;
}
Expand Down
5 changes: 4 additions & 1 deletion lib/kernels/src/cuda/ops/concat_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "device.h"
#include "kernels/concat_kernels.h"
#include "kernels/legion_dim.h"
#include "utils/nonnegative_int/nonnegative_int.h"
#include <cassert>

namespace FlexFlow {
Expand All @@ -25,7 +27,8 @@ void calc_blk_size(size_t &num_blocks,
size_t &blk_size,
ArrayShape const &shape,
ff_dim_t axis) {
blk_size = shape.sub_shape(legion_dim_t{0_n}, axis)
legion_dim_t axis_legion_dim = legion_dim_from_ff_dim(axis, shape.num_dims());
blk_size = shape.sub_shape(legion_dim_t{nonnegative_int{0}}, axis_legion_dim)
.num_elements()
.unwrap_nonnegative();
num_blocks =
Expand Down
Loading
Loading