diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 2f6942fe139..cc5f256ea80 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -464,17 +464,6 @@ std::unique_ptr make_all_nulls_column(schema_element const& schema, */ column_name_info make_column_name_info(schema_element const& schema, std::string const& col_name); -/** - * @brief Get the path data type of a column by path if present in input schema - * - * @param path path of the column - * @param options json reader options which holds schema - * @return data type of the column if present - */ -std::optional get_path_data_type( - host_span const> path, - cudf::io::json_reader_options const& options); - /** * @brief Helper class to get path of a column by column id from reduced column tree * diff --git a/cpp/src/io/json/parser_features.cpp b/cpp/src/io/json/parser_features.cpp index 2da320b2af3..4b4827ca8d9 100644 --- a/cpp/src/io/json/parser_features.cpp +++ b/cpp/src/io/json/parser_features.cpp @@ -68,78 +68,6 @@ void json_reader_options::set_dtypes(schema_element types) } // namespace cudf::io namespace cudf::io::json::detail { -namespace { - -// example schema and its path. -// "a": int {"a", int} -// "a": [ int ] {"a", list}, {"element", int} -// "a": { "b": int} {"a", struct}, {"b", int} -// "a": [ {"b": int }] {"a", list}, {"element", struct}, {"b", int} -// "a": [ null] {"a", list}, {"element", str} -// back() is root. -// front() is leaf. -/** - * @brief Get the path data type of a column by path if present in input schema - * - * @param path path of the json column - * @param root root of input schema element - * @return data type of the column if present, otherwise std::nullopt - */ -std::optional get_path_data_type( - host_span const> path, schema_element const& root) -{ - if (path.empty() || path.size() == 1) { - return root.type; - } else { - if (path.back().second == NC_STRUCT && root.type.id() == type_id::STRUCT) { - auto const child_name = path.first(path.size() - 1).back().first; - auto const child_schema_it = root.child_types.find(child_name); - return (child_schema_it != std::end(root.child_types)) - ? get_path_data_type(path.first(path.size() - 1), child_schema_it->second) - : std::optional{}; - } else if (path.back().second == NC_LIST && root.type.id() == type_id::LIST) { - auto const child_schema_it = root.child_types.find(list_child_name); - return (child_schema_it != std::end(root.child_types)) - ? get_path_data_type(path.first(path.size() - 1), child_schema_it->second) - : std::optional{}; - } - return std::optional{}; - } -} - -std::optional child_schema_element(std::string const& col_name, - cudf::io::json_reader_options const& options) -{ - return std::visit( - cudf::detail::visitor_overload{ - [col_name](std::vector const& user_dtypes) -> std::optional { - auto column_index = atol(col_name.data()); - return (static_cast(column_index) < user_dtypes.size()) - ? std::optional{{user_dtypes[column_index]}} - : std::optional{}; - }, - [col_name]( - std::map const& user_dtypes) -> std::optional { - return (user_dtypes.find(col_name) != std::end(user_dtypes)) - ? std::optional{{user_dtypes.find(col_name)->second}} - : std::optional{}; - }, - [col_name]( - std::map const& user_dtypes) -> std::optional { - return (user_dtypes.find(col_name) != std::end(user_dtypes)) - ? user_dtypes.find(col_name)->second - : std::optional{}; - }, - [col_name](schema_element const& user_dtypes) -> std::optional { - return (user_dtypes.child_types.find(col_name) != std::end(user_dtypes.child_types)) - ? user_dtypes.child_types.find(col_name)->second - : std::optional{}; - }}, - options.get_dtypes()); -} - -} // namespace - /// Created an empty column of the specified schema struct empty_column_functor { rmm::cuda_stream_view stream; @@ -311,48 +239,4 @@ column_name_info make_column_name_info(schema_element const& schema, std::string } return info; } - -std::optional get_path_data_type( - host_span const> path, - cudf::io::json_reader_options const& options) -{ - if (path.empty()) return {}; - std::optional col_schema = child_schema_element(path.back().first, options); - // check if it has value, then do recursive call and return. - if (col_schema.has_value()) { - return get_path_data_type(path, col_schema.value()); - } else { - return {}; - } -} - -// idea: write a memoizer using template and lambda?, then call recursively. -std::vector path_from_tree::get_path(NodeIndexT this_col_id) -{ - std::vector path; - // stops at root. - while (this_col_id != parent_node_sentinel) { - auto type = column_categories[this_col_id]; - std::string name = ""; - // code same as name_and_parent_index lambda. - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { - if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) { - name = column_names[this_col_id]; - } else { - name = list_child_name; - } - } else if (column_categories[parent_col_id] == NC_FN) { - auto field_name_col_id = parent_col_id; - parent_col_id = column_parent_ids[parent_col_id]; - name = column_names[field_name_col_id]; - } - // "name": type/schema - path.emplace_back(name, type); - this_col_id = parent_col_id; - if (this_col_id == row_array_parent_col_id) return path; - } - return {}; -} - } // namespace cudf::io::json::detail