Skip to content

Commit

Permalink
Fix cantains contains (PaddlePaddle#61066)
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc authored and eee4017 committed Jan 30, 2024
1 parent d780c27 commit eae987c
Show file tree
Hide file tree
Showing 10 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace backends {
#define TENSOR_SHAPE_ARGS "tensor_shape_args"

/**
* Split a CINN Module into two separate modules, one cantains the host
* Split a CINN Module into two separate modules, one contains the host
* functions, the other contains the device kernels.
*
* This contains some process:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ inline bool is_horizontal_relation(::pir::Operation* producer,
if (!consumer->ops_set.count(tmp_op)) {
continue;
}
// recored visited op.
// recorded visited op.
if (!visited_set.count(tmp_op)) {
visited_set.insert(tmp_op);
candidates.push(tmp_op);
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/hlir/pass/op_fusion_pass_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ CONDITION_FUNC(is_horizontal_relation) {
candidates.pop();
// visit all producer node
for (auto tmp_node : helper->GetProducerNode(candidate)) {
// check depency.
// check dependency.
if (producer == tmp_node) {
return true;
}
// check node is in region.
if (!consumer->nodes_set.count(tmp_node)) {
continue;
}
// recored visited node.
// recorded visited node.
if (!visited_set.count(tmp_node)) {
visited_set.insert(tmp_node);
candidates.push(tmp_node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ void OptimizeReductionTactic::Apply(ir::IRSchedule* sch,
std::vector<ir::Expr> loops = sch->GetLoops(block_id);
int first_reduce_loop_idx = context_->iter_space_info.sp_space.size();
CHECK_LT(first_reduce_loop_idx, loops.size())
<< "first_reduce_loop_idx shoud be less than number of loop.";
<< "first_reduce_loop_idx should be less than number of loop.";
ir::Expr block = sch->GetBlock(block_id);
ir::Tensor reduce_tensor = analyzer::GetStoreTensorOfSBlock(block);
int non_reduce_memory_space_rank =
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/lang/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ One can pass a `Shared` object by passing a pointer and the consumer object shou

## Tensor

The input or the temporary ouptut node.
The input or the temporary output node.

Every `Compute` will output a Tensor, the tensor can be sliced.

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/table/depends/dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
namespace paddle {
namespace distributed {

// dense optimzier
// dense optimizer
// TODO(tangwei12) integrate with sparse optimzer later.
class DenseOptimizer {
public:
Expand Down
24 changes: 12 additions & 12 deletions paddle/fluid/framework/fleet/box_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class BasicAucCalculator {
double size() const { return _size; }
double rmse() const { return _rmse; }
std::vector<double>& get_negative() { return _table[0]; }
std::vector<double>& get_postive() { return _table[1]; }
std::vector<double>& get_positive() { return _table[1]; }
double& local_abserr() { return _local_abserr; }
double& local_sqrerr() { return _local_sqrerr; }
double& local_pred() { return _local_pred; }
Expand Down Expand Up @@ -453,7 +453,7 @@ class BoxWrapper {
p_agent_ = boxps::PSAgentBase::GetIns(feedpass_thread_num_);
p_agent_->Init();
for (const auto& slot_name : slot_omit_in_feedpass) {
slot_name_omited_in_feedpass_.insert(slot_name);
slot_name_omitted_in_feedpass_.insert(slot_name);
}
slot_vector_ = slot_vector;
keys_tensor.resize(platform::GetGPUDeviceCount());
Expand Down Expand Up @@ -548,8 +548,8 @@ class BoxWrapper {

bool UseAfsApi() const { return use_afs_api_; }

const std::unordered_set<std::string>& GetOmitedSlot() const {
return slot_name_omited_in_feedpass_;
const std::unordered_set<std::string>& GetOmittedSlot() const {
return slot_name_omitted_in_feedpass_;
}

class MetricMsg {
Expand Down Expand Up @@ -914,7 +914,7 @@ class BoxWrapper {
// TODO(hutuxian): magic number, will add a config to specify
const int feedpass_thread_num_ = 30; // magic number
static std::shared_ptr<BoxWrapper> s_instance_;
std::unordered_set<std::string> slot_name_omited_in_feedpass_;
std::unordered_set<std::string> slot_name_omitted_in_feedpass_;
// EMBEDX_DIM and EXPAND_EMBED_DIM
static int embedx_dim_;
static int expand_embed_dim_;
Expand Down Expand Up @@ -1098,19 +1098,19 @@ class BoxHelper {
const std::deque<Record>& pass_data = input_channel_->GetData();

// get feasigns that FeedPass doesn't need
const std::unordered_set<std::string>& slot_name_omited_in_feedpass_ =
box_ptr->GetOmitedSlot();
std::unordered_set<int> slot_id_omited_in_feedpass_;
const std::unordered_set<std::string>& slot_name_omitted_in_feedpass_ =
box_ptr->GetOmittedSlot();
std::unordered_set<int> slot_id_omitted_in_feedpass_;
const auto& all_readers = dataset_->GetReaders();
PADDLE_ENFORCE_GT(all_readers.size(),
0,
platform::errors::PreconditionNotMet(
"Readers number must be greater than 0."));
const auto& all_slots_name = all_readers[0]->GetAllSlotAlias();
for (size_t i = 0; i < all_slots_name.size(); ++i) {
if (slot_name_omited_in_feedpass_.find(all_slots_name[i]) !=
slot_name_omited_in_feedpass_.end()) {
slot_id_omited_in_feedpass_.insert(i);
if (slot_name_omitted_in_feedpass_.find(all_slots_name[i]) !=
slot_name_omitted_in_feedpass_.end()) {
slot_id_omitted_in_feedpass_.insert(i);
}
}
const size_t tnum = box_ptr->GetFeedpassThreadNum();
Expand All @@ -1130,7 +1130,7 @@ class BoxHelper {
begin,
begin + len_per_thread + (i < remain ? 1 : 0),
p_agent,
std::ref(slot_id_omited_in_feedpass_),
std::ref(slot_id_omitted_in_feedpass_),
i));
begin += len_per_thread + (i < remain ? 1 : 0);
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/fleet/ps_gpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ class PSGPUWrapper {
#ifdef PADDLE_WITH_CUDA
std::vector<MemoryPool*> mem_pools_;
std::vector<HBMMemoryPoolFix*> hbm_pools_; // in multi mfdim, one table need
// hbm pools of totol dims number
// hbm pools of total dims number
#endif

std::shared_ptr<
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/executor_statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,9 @@ int StatisticsEngine::Stat(const platform::NodeTrees& trees) {
}
}
auto& python_end = statistics_[name2idx_["PythonEnd"]];
const auto& totol = statistics_[name2idx_["Total"]];
const auto& total = statistics_[name2idx_["Total"]];
const auto& cplusplus_end = statistics_[name2idx_["CplusplusEnd"]];
python_end.total_time = totol.total_time - cplusplus_end.total_time;
python_end.total_time = total.total_time - cplusplus_end.total_time;
python_end.count = cplusplus_end.count + 1;

auto& launch_kernel = statistics_[name2idx_["LaunchKernel"]];
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/ps_gpu_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ void PSGPUWorker::TrainFiles() {
{
std::lock_guard<std::mutex> lock(mutex);
VLOG(0) << "worker " << thread_id_ << ": " << var_name
<< " cantains inf or nan";
<< " contains inf or nan";
auto all_vars = thread_scope->LocalVarNames();
std::stringstream ss;
ss << "====== worker " << thread_id_ << "======\n";
Expand Down Expand Up @@ -556,7 +556,7 @@ void PSGPUWorker::TrainFilesWithProfiler() {
for (size_t i = 0; i < op_name.size(); ++i) {
VLOG(0) << "card:" << thread_id_ << ", op: " << op_name[i]
<< ", mean time: " << op_total_time[i] / total_ins_num
<< "s, totol time:" << op_total_time[i] << "sec";
<< "s, total time:" << op_total_time[i] << "sec";
}
VLOG(0) << "card: " << thread_id_ << " read time: " << read_time
<< ", percent: " << read_time / total_time * 100;
Expand Down

0 comments on commit eae987c

Please sign in to comment.