Skip to content

Commit

Permalink
Add placeholder for the experimental JSON reader (NVIDIA#11334)
Browse files Browse the repository at this point in the history
Adds a new boolean option to libcudf `json_reader_options` - `experimental` (defaults to `false`).
When the experimental reader is selected a placeholder implementation is called that simply fails for now.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: rapidsai/cudf#11334
  • Loading branch information
vuule authored Jul 29, 2022
1 parent 9bed351 commit b68ecd1
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 0 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ add_library(
src/io/functions.cpp
src/io/json/json_gpu.cu
src/io/json/reader_impl.cu
src/io/json/experimental/read_json.cpp
src/io/orc/aggregate_orc_metadata.cpp
src/io/orc/dict_enc.cu
src/io/orc/orc.cpp
Expand Down
29 changes: 29 additions & 0 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ class json_reader_options {
// Whether to parse dates as DD/MM versus MM/DD
bool _dayfirst = false;

// Whether to use the experimental reader
bool _experimental = false;

/**
* @brief Constructor from source info.
*
Expand Down Expand Up @@ -193,6 +196,13 @@ class json_reader_options {
*/
bool is_enabled_dayfirst() const { return _dayfirst; }

/**
* @brief Whether the experimental reader should be used.
*
* @returns true if the experimental reader will be used, false otherwise
*/
bool is_enabled_experimental() const { return _experimental; }

/**
* @brief Set data types for columns to be read.
*
Expand Down Expand Up @@ -241,6 +251,13 @@ class json_reader_options {
* @param val Boolean value to enable/disable day first parsing format
*/
void enable_dayfirst(bool val) { _dayfirst = val; }

/**
* @brief Set whether to use the experimental reader.
*
* @param val Boolean value to enable/disable the experimental reader
*/
void enable_experimental(bool val) { _experimental = val; }
};

/**
Expand Down Expand Up @@ -348,6 +365,18 @@ class json_reader_options_builder {
return *this;
}

/**
* @brief Set whether to use the experimental reader.
*
* @param val Boolean value to enable/disable experimental parsing
* @return this for chaining
*/
json_reader_options_builder& experimental(bool val)
{
options._experimental = val;
return *this;
}

/**
* @brief move json_reader_options member once it's built.
*/
Expand Down
31 changes: 31 additions & 0 deletions cpp/src/io/json/experimental/read_json.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "read_json.hpp"

#include <cudf/utilities/error.hpp>

namespace cudf::io::detail::json::experimental {

table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
json_reader_options const& reader_opts,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("Not implemented");
}

} // namespace cudf::io::detail::json::experimental
36 changes: 36 additions & 0 deletions cpp/src/io/json/experimental/read_json.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/io/datasource.hpp>
#include <cudf/io/json.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <memory>

namespace cudf::io::detail::json::experimental {

table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
json_reader_options const& reader_opts,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

}
6 changes: 6 additions & 0 deletions cpp/src/io/json/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#include "json_gpu.hpp"

#include "experimental/read_json.hpp"

#include <hash/concurrent_unordered_map.cuh>

#include <io/comp/io_uncomp.hpp>
Expand Down Expand Up @@ -571,6 +573,10 @@ table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (reader_opts.is_enabled_experimental()) {
return experimental::read_json(sources, reader_opts, stream, mr);
}

CUDF_EXPECTS(not sources.empty(), "No sources were defined");

CUDF_EXPECTS(reader_opts.is_enabled_lines(), "Only JSON Lines format is currently supported.\n");
Expand Down

0 comments on commit b68ecd1

Please sign in to comment.