|
32 | 32 | #include <cudf/table/table.hpp>
|
33 | 33 | #include <cudf/utilities/default_stream.hpp>
|
34 | 34 |
|
| 35 | +#include <thrust/iterator/constant_iterator.h> |
| 36 | + |
35 | 37 | #include <numeric>
|
36 | 38 | #include <stdexcept>
|
37 | 39 | #include <string>
|
@@ -164,37 +166,6 @@ TEST_F(StringColumnTest, ConcatenateColumnView)
|
164 | 166 | CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
|
165 | 167 | }
|
166 | 168 |
|
167 |
| -TEST_F(StringColumnTest, ConcatenateColumnViewLarge) |
168 |
| -{ |
169 |
| - // Test large concatenate, causes out of bound device memory errors if kernel |
170 |
| - // indexing is not int64_t. |
171 |
| - // 1.5GB bytes, 5k columns |
172 |
| - constexpr size_t num_strings = 10000; |
173 |
| - constexpr size_t string_length = 150000; |
174 |
| - constexpr size_t strings_per_column = 2; |
175 |
| - constexpr size_t num_columns = num_strings / strings_per_column; |
176 |
| - |
177 |
| - std::vector<std::string> strings; |
178 |
| - std::vector<char const*> h_strings; |
179 |
| - std::vector<cudf::test::strings_column_wrapper> strings_column_wrappers; |
180 |
| - std::vector<cudf::column_view> strings_columns; |
181 |
| - |
182 |
| - std::string s(string_length, 'a'); |
183 |
| - for (size_t i = 0; i < num_strings; ++i) |
184 |
| - h_strings.push_back(s.data()); |
185 |
| - |
186 |
| - for (size_t i = 0; i < num_columns; ++i) |
187 |
| - strings_column_wrappers.push_back(cudf::test::strings_column_wrapper( |
188 |
| - h_strings.data() + i * strings_per_column, h_strings.data() + (i + 1) * strings_per_column)); |
189 |
| - for (auto& wrapper : strings_column_wrappers) |
190 |
| - strings_columns.push_back(wrapper); |
191 |
| - |
192 |
| - auto results = cudf::concatenate(strings_columns); |
193 |
| - |
194 |
| - cudf::test::strings_column_wrapper expected(h_strings.begin(), h_strings.end()); |
195 |
| - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); |
196 |
| -} |
197 |
| - |
198 | 169 | TEST_F(StringColumnTest, ConcatenateManyColumns)
|
199 | 170 | {
|
200 | 171 | std::vector<char const*> h_strings{
|
@@ -226,6 +197,49 @@ TEST_F(StringColumnTest, ConcatenateTooLarge)
|
226 | 197 | EXPECT_THROW(cudf::concatenate(input_cols), std::overflow_error);
|
227 | 198 | }
|
228 | 199 |
|
| 200 | +TEST_F(StringColumnTest, ConcatenateLargeStrings) |
| 201 | +{ |
| 202 | + CUDF_TEST_ENABLE_LARGE_STRINGS(); |
| 203 | + auto itr = thrust::constant_iterator<std::string_view>( |
| 204 | + "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes |
| 205 | + auto input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB |
| 206 | + auto view = cudf::column_view(input); |
| 207 | + std::vector<cudf::column_view> input_cols; |
| 208 | + std::vector<cudf::size_type> splits; |
| 209 | + int const multiplier = 10; |
| 210 | + for (int i = 0; i < multiplier; ++i) { // 2500MB > 2GB |
| 211 | + input_cols.push_back(view); |
| 212 | + splits.push_back(view.size() * (i + 1)); |
| 213 | + } |
| 214 | + splits.pop_back(); // remove last entry |
| 215 | + auto result = cudf::concatenate(input_cols); |
| 216 | + auto sv = cudf::strings_column_view(result->view()); |
| 217 | + EXPECT_EQ(sv.size(), view.size() * multiplier); |
| 218 | + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); |
| 219 | + |
| 220 | + // verify results in sections |
| 221 | + auto sliced = cudf::split(result->view(), splits); |
| 222 | + for (auto c : sliced) { |
| 223 | + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); |
| 224 | + } |
| 225 | + |
| 226 | + // also test with large strings column as input |
| 227 | + { |
| 228 | + input_cols.clear(); |
| 229 | + input_cols.push_back(input); // regular column |
| 230 | + input_cols.push_back(result->view()); // large column |
| 231 | + result = cudf::concatenate(input_cols); |
| 232 | + sv = cudf::strings_column_view(result->view()); |
| 233 | + EXPECT_EQ(sv.size(), view.size() * (multiplier + 1)); |
| 234 | + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); |
| 235 | + splits.push_back(view.size() * multiplier); |
| 236 | + sliced = cudf::split(result->view(), splits); |
| 237 | + for (auto c : sliced) { |
| 238 | + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); |
| 239 | + } |
| 240 | + } |
| 241 | +} |
| 242 | + |
229 | 243 | struct TableTest : public cudf::test::BaseFixture {};
|
230 | 244 |
|
231 | 245 | TEST_F(TableTest, ConcatenateTables)
|
|
0 commit comments