From 9c1f4a26b3c49b875ffc6ab706416105437cb964 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:42:51 -0800 Subject: [PATCH 1/8] Avoid private cudf DeviceScalar in favor of using pylibcudf & pyarrow --- .../morpheus/morpheus/_lib/cudf_helpers.pyx | 33 ++++++------------- .../morpheus/_lib/cudf_helpers/__init__.pyi | 2 ++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index fe0e96536..a61d7f1c6 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -35,15 +35,10 @@ from cudf._lib.column cimport Column # isort: off -# imports needed for get_element, which is required by from_column_view_with_fix -cimport pylibcudf.libcudf.copying as cpp_copying -from pylibcudf.libcudf.column.column_view cimport column_view -from libcpp.memory cimport make_unique, unique_ptr -from pylibcudf.libcudf.scalar.scalar cimport scalar -from pylibcudf cimport Table as plc_Table -from cudf._lib.scalar cimport DeviceScalar - # imports needed for from_column_view_with_fix +import pylibcudf as plc +from pylibcudf cimport Column as plc_Column, Table as plc_Table +from pylibcudf.libcudf.column.column_view cimport column_view import rmm from libc.stdint cimport uintptr_t from cudf.core.buffer import ( @@ -64,18 +59,6 @@ from cudf._lib.null_mask import bitmask_allocation_size_bytes # isort: on -cdef get_element(column_view col_view, size_type index): - - cdef unique_ptr[scalar] c_output - with nogil: - c_output = move( - cpp_copying.get_element(col_view, index) - ) - - return DeviceScalar.from_unique_ptr( - move(c_output), dtype=dtype_from_column_view(col_view) - ) - cdef Column from_column_view_with_fix(column_view cv, object owner): """ Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it, @@ -118,9 +101,13 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): if offset_child_column.size() == 0: base_nbytes = 0 else: - chars_size = get_element( - offset_child_column, offset_child_column.size()-1).value - base_nbytes = chars_size + offset_child_column_owner = owner.children[offsets_column_index] + plc_owner = offset_child_column_owner.to_pylibcudf(mode="read") + plc_offsets_col = plc_Column.from_column_view( + offset_child_column, plc_owner + ) + plc_scalar = plc.copying.get_element(plc_offsets_col, offset_child_column.size()-1) + base_nbytes = plc.interop.to_arrow(plc_scalar).as_py() if data_ptr: if data_owner is None: diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index bece30f67..166b0e42d 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -8,6 +8,7 @@ from cudf.core.dtypes import StructDtype import _cython_3_0_11 import cudf import itertools +import pylibcudf import rmm __all__ = [ @@ -19,6 +20,7 @@ __all__ = [ "bitmask_allocation_size_bytes", "cudf", "itertools", + "plc", "rmm" ] From 70ffce74cefa42dd9f8061c86e9a636ddad54d36 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:53:18 -0800 Subject: [PATCH 2/8] Add back get_element --- .../morpheus/morpheus/_lib/cudf_helpers.pyx | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index a61d7f1c6..1611fa222 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -35,10 +35,15 @@ from cudf._lib.column cimport Column # isort: off -# imports needed for from_column_view_with_fix -import pylibcudf as plc -from pylibcudf cimport Column as plc_Column, Table as plc_Table +# imports needed for get_element, which is required by from_column_view_with_fix +cimport pylibcudf.libcudf.copying as cpp_copying from pylibcudf.libcudf.column.column_view cimport column_view +from libcpp.memory cimport make_unique, unique_ptr +from pylibcudf.libcudf.scalar.scalar cimport scalar +from pylibcudf cimport Table as plc_Table, Scalar as plc_Scalar +import pylibcudf as plc + +# imports needed for from_column_view_with_fix import rmm from libc.stdint cimport uintptr_t from cudf.core.buffer import ( @@ -59,6 +64,18 @@ from cudf._lib.null_mask import bitmask_allocation_size_bytes # isort: on +cdef get_element(column_view col_view, size_type index): + + cdef unique_ptr[scalar] c_output + with nogil: + c_output = move( + cpp_copying.get_element(col_view, index) + ) + + plc_scalar = plc_Scalar.from_libcudf(move(c_output)) + return plc.interop.to_arrow(plc_scalar).to_py() + + cdef Column from_column_view_with_fix(column_view cv, object owner): """ Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it, @@ -101,13 +118,9 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): if offset_child_column.size() == 0: base_nbytes = 0 else: - offset_child_column_owner = owner.children[offsets_column_index] - plc_owner = offset_child_column_owner.to_pylibcudf(mode="read") - plc_offsets_col = plc_Column.from_column_view( - offset_child_column, plc_owner - ) - plc_scalar = plc.copying.get_element(plc_offsets_col, offset_child_column.size()-1) - base_nbytes = plc.interop.to_arrow(plc_scalar).as_py() + chars_size = get_element( + offset_child_column, offset_child_column.size()-1) + base_nbytes = chars_size if data_ptr: if data_owner is None: From 4d102ccbe83734a1225e971505a7bb82631c87a2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 14:57:39 -0800 Subject: [PATCH 3/8] Update copyright year --- python/morpheus/morpheus/_lib/cudf_helpers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index 1611fa222..84612b961 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); From ea9707e107e1b9020f0a490b7c903e6a3a4347f2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 6 Jan 2025 10:36:04 -0800 Subject: [PATCH 4/8] Simpler repro of the core dump occurring in 'tests/examples/log_parsing/test_log_parsing_pipe.py' this requires exporting CudfHelpers, REVERT THIS COMMIT PRIOR TO MERGING! --- .../include/morpheus/utilities/cudf_util.hpp | 3 ++- .../morpheus/_lib/tests/test_file_in_out.cpp | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp b/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp index 7a87620b9..f2eea4585 100644 --- a/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp +++ b/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp @@ -17,6 +17,7 @@ #pragma once +#include "morpheus/export.h" #include "morpheus/objects/table_info.hpp" #include "morpheus/objects/table_info_data.hpp" @@ -37,7 +38,7 @@ namespace morpheus { * the actual generated cython calls. The cython implementation in 'cudf_helpers_api.h' can only appear in the * translation unit for the pybind module declaration. These functions should be considered de */ -struct CudfHelper +struct MORPHEUS_EXPORT CudfHelper { public: static void load(); diff --git a/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp b/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp index 55b5465ae..dcf28e98e 100644 --- a/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp +++ b/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp @@ -19,7 +19,9 @@ #include "morpheus/io/deserializers.hpp" #include "morpheus/io/serializers.hpp" +#include "morpheus/messages/control.hpp" #include "morpheus/messages/meta.hpp" +#include "morpheus/utilities/cudf_util.hpp" // for CudfHelper #include #include @@ -108,3 +110,23 @@ TEST_F(TestFileInOut, RoundTripJSONLines) EXPECT_EQ(output_data, src_data); } } + +TEST_F(TestFileInOut, CoreDumpOnGetInfo) +{ + auto input_file = test::get_morpheus_root() / "examples/data/log-parsing-validation-data-input.csv"; + auto py_df = read_file_to_df(input_file); + auto meta = MessageMeta::create_from_python(std::move(py_df)); + + auto sliced_meta = SlicedMessageMeta(meta, 0, 1024); + + pybind11::gil_scoped_release no_gil; + auto sliced_info = sliced_meta.get_info(); + + // This unforuntately requires grabbing the GIL and is a work-around for issue #2018 + auto new_meta = MessageMeta::create_from_python(CudfHelper::table_from_table_info(sliced_info)); + + auto cm = std::make_shared(); + cm->payload(new_meta); + + auto table_info = cm->payload()->get_info("raw"); +} From e12895b9c589dfe145cc758fc73094f8f0806bf4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:54:56 -0800 Subject: [PATCH 5/8] Temporarily undo changes to double check failure with CI --- python/morpheus/morpheus/_lib/cudf_helpers.pyx | 14 +++++++------- .../morpheus/_lib/cudf_helpers/__init__.pyi | 2 -- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index 84612b961..fe0e96536 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,8 +40,8 @@ cimport pylibcudf.libcudf.copying as cpp_copying from pylibcudf.libcudf.column.column_view cimport column_view from libcpp.memory cimport make_unique, unique_ptr from pylibcudf.libcudf.scalar.scalar cimport scalar -from pylibcudf cimport Table as plc_Table, Scalar as plc_Scalar -import pylibcudf as plc +from pylibcudf cimport Table as plc_Table +from cudf._lib.scalar cimport DeviceScalar # imports needed for from_column_view_with_fix import rmm @@ -72,9 +72,9 @@ cdef get_element(column_view col_view, size_type index): cpp_copying.get_element(col_view, index) ) - plc_scalar = plc_Scalar.from_libcudf(move(c_output)) - return plc.interop.to_arrow(plc_scalar).to_py() - + return DeviceScalar.from_unique_ptr( + move(c_output), dtype=dtype_from_column_view(col_view) + ) cdef Column from_column_view_with_fix(column_view cv, object owner): """ @@ -119,7 +119,7 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): base_nbytes = 0 else: chars_size = get_element( - offset_child_column, offset_child_column.size()-1) + offset_child_column, offset_child_column.size()-1).value base_nbytes = chars_size if data_ptr: diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index 166b0e42d..bece30f67 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -8,7 +8,6 @@ from cudf.core.dtypes import StructDtype import _cython_3_0_11 import cudf import itertools -import pylibcudf import rmm __all__ = [ @@ -20,7 +19,6 @@ __all__ = [ "bitmask_allocation_size_bytes", "cudf", "itertools", - "plc", "rmm" ] From 106db149d07115a4f678c34592b111dad211adae Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:00:40 -0800 Subject: [PATCH 6/8] Revert "Temporarily undo changes to double check failure with CI" This reverts commit e12895b9c589dfe145cc758fc73094f8f0806bf4. --- python/morpheus/morpheus/_lib/cudf_helpers.pyx | 14 +++++++------- .../morpheus/_lib/cudf_helpers/__init__.pyi | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index fe0e96536..84612b961 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,8 +40,8 @@ cimport pylibcudf.libcudf.copying as cpp_copying from pylibcudf.libcudf.column.column_view cimport column_view from libcpp.memory cimport make_unique, unique_ptr from pylibcudf.libcudf.scalar.scalar cimport scalar -from pylibcudf cimport Table as plc_Table -from cudf._lib.scalar cimport DeviceScalar +from pylibcudf cimport Table as plc_Table, Scalar as plc_Scalar +import pylibcudf as plc # imports needed for from_column_view_with_fix import rmm @@ -72,9 +72,9 @@ cdef get_element(column_view col_view, size_type index): cpp_copying.get_element(col_view, index) ) - return DeviceScalar.from_unique_ptr( - move(c_output), dtype=dtype_from_column_view(col_view) - ) + plc_scalar = plc_Scalar.from_libcudf(move(c_output)) + return plc.interop.to_arrow(plc_scalar).to_py() + cdef Column from_column_view_with_fix(column_view cv, object owner): """ @@ -119,7 +119,7 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): base_nbytes = 0 else: chars_size = get_element( - offset_child_column, offset_child_column.size()-1).value + offset_child_column, offset_child_column.size()-1) base_nbytes = chars_size if data_ptr: diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index bece30f67..166b0e42d 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -8,6 +8,7 @@ from cudf.core.dtypes import StructDtype import _cython_3_0_11 import cudf import itertools +import pylibcudf import rmm __all__ = [ @@ -19,6 +20,7 @@ __all__ = [ "bitmask_allocation_size_bytes", "cudf", "itertools", + "plc", "rmm" ] From 9a6c17db06addd19c7ba64eade46d2cedf09246d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:03:27 -0800 Subject: [PATCH 7/8] as_py instead of to_py --- python/morpheus/morpheus/_lib/cudf_helpers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index 84612b961..9aec181e3 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -73,7 +73,7 @@ cdef get_element(column_view col_view, size_type index): ) plc_scalar = plc_Scalar.from_libcudf(move(c_output)) - return plc.interop.to_arrow(plc_scalar).to_py() + return plc.interop.to_arrow(plc_scalar).as_py() cdef Column from_column_view_with_fix(column_view cv, object owner): From 015935ce53adfa0b9108b84522a764be144bff74 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:03:49 -0800 Subject: [PATCH 8/8] Revert "Simpler repro of the core dump occurring in 'tests/examples/log_parsing/test_log_parsing_pipe.py' this requires exporting CudfHelpers, REVERT THIS COMMIT PRIOR TO MERGING!" This reverts commit ea9707e107e1b9020f0a490b7c903e6a3a4347f2. --- .../include/morpheus/utilities/cudf_util.hpp | 3 +-- .../morpheus/_lib/tests/test_file_in_out.cpp | 22 ------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp b/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp index f2eea4585..7a87620b9 100644 --- a/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp +++ b/python/morpheus/morpheus/_lib/include/morpheus/utilities/cudf_util.hpp @@ -17,7 +17,6 @@ #pragma once -#include "morpheus/export.h" #include "morpheus/objects/table_info.hpp" #include "morpheus/objects/table_info_data.hpp" @@ -38,7 +37,7 @@ namespace morpheus { * the actual generated cython calls. The cython implementation in 'cudf_helpers_api.h' can only appear in the * translation unit for the pybind module declaration. These functions should be considered de */ -struct MORPHEUS_EXPORT CudfHelper +struct CudfHelper { public: static void load(); diff --git a/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp b/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp index dcf28e98e..55b5465ae 100644 --- a/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp +++ b/python/morpheus/morpheus/_lib/tests/test_file_in_out.cpp @@ -19,9 +19,7 @@ #include "morpheus/io/deserializers.hpp" #include "morpheus/io/serializers.hpp" -#include "morpheus/messages/control.hpp" #include "morpheus/messages/meta.hpp" -#include "morpheus/utilities/cudf_util.hpp" // for CudfHelper #include #include @@ -110,23 +108,3 @@ TEST_F(TestFileInOut, RoundTripJSONLines) EXPECT_EQ(output_data, src_data); } } - -TEST_F(TestFileInOut, CoreDumpOnGetInfo) -{ - auto input_file = test::get_morpheus_root() / "examples/data/log-parsing-validation-data-input.csv"; - auto py_df = read_file_to_df(input_file); - auto meta = MessageMeta::create_from_python(std::move(py_df)); - - auto sliced_meta = SlicedMessageMeta(meta, 0, 1024); - - pybind11::gil_scoped_release no_gil; - auto sliced_info = sliced_meta.get_info(); - - // This unforuntately requires grabbing the GIL and is a work-around for issue #2018 - auto new_meta = MessageMeta::create_from_python(CudfHelper::table_from_table_info(sliced_info)); - - auto cm = std::make_shared(); - cm->payload(new_meta); - - auto table_info = cm->payload()->get_info("raw"); -}