From 26eecdae41c41918f28b6dda5fbbc990398f0eee Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 30 Jan 2025 10:07:06 +0900 Subject: [PATCH 1/4] GH-44760: [GLib] Add garrow_record_batch_validate_full() --- c_glib/arrow-glib/record-batch.cpp | 18 ++++++++++++++ c_glib/arrow-glib/record-batch.h | 4 +++ c_glib/test/test-record-batch.rb | 39 ++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp index 07e83c9f23ad0..6acf217b516aa 100644 --- a/c_glib/arrow-glib/record-batch.cpp +++ b/c_glib/arrow-glib/record-batch.cpp @@ -516,6 +516,24 @@ garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error) return garrow::check(error, arrow_record_batch->Validate(), "[record-batch][validate]"); } +/** + * garrow_record_batch_validate_full + * @record_batch: A #GArrowRecordBatch + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 20.0.0 + */ +gboolean +garrow_record_batch_validate_full(GArrowRecordBatch *record_batch, GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + return garrow::check(error, + arrow_record_batch->ValidateFull(), + "[record-batch][validate_full]"); +} + typedef struct GArrowRecordBatchIteratorPrivate_ { arrow::RecordBatchIterator iterator; diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h index 8d17a44be5883..5a51ad983bbee 100644 --- a/c_glib/arrow-glib/record-batch.h +++ b/c_glib/arrow-glib/record-batch.h @@ -113,6 +113,10 @@ GARROW_AVAILABLE_IN_20_0 gboolean garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error); +GARROW_AVAILABLE_IN_20_0 +gboolean +garrow_record_batch_validate_full(GArrowRecordBatch *record_batch, GError **error); + #define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type()) GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator, diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb index ba4b15a67782a..49989f64c7316 100644 --- a/c_glib/test/test-record-batch.rb +++ b/c_glib/test/test-record-batch.rb @@ -221,5 +221,44 @@ def test_invalid end end end + + sub_test_case("#validate_full") do + def setup + @id_field = Arrow::Field.new("uint8", Arrow::UInt8DataType.new) + @name_field = Arrow::Field.new("string", Arrow::StringDataType.new) + @schema = Arrow::Schema.new([@id_field, @name_field]) + + @uint8_value = build_uint_array([1]) + @valid_name_value = build_string_array(["abc"]) + @n_rows = @uint8_value.length + + # U+3042 HIRAGANA LETTER A, U+3044 HIRAGANA LETTER I + data = "\u3042\u3044".b[0..-2] + value_offsets = Arrow::Buffer.new([0,data.size].pack("l*")) + @invalid_name_value = Arrow::StringArray.new(1, + value_offsets, + Arrow::Buffer.new(data), + nil, + -1) + end + + def test_valid + record_batch = Arrow::RecordBatch.new(@schema, @n_rows, [@uint8_value, @valid_name_value]) + + assert do + record_batch.validate_full + end + end + + def test_invalid + message = "[record-batch][validate_full]: Invalid: " + + "In column 1: Invalid: Invalid UTF8 sequence at string index 0" + record_batch = Arrow::RecordBatch.new(@schema, @n_rows, [@uint8_value, @invalid_name_value]) + + assert_raise(Arrow::Error::Invalid.new(message)) do + record_batch.validate_full + end + end + end end end From 987d0fbd3fd36614f29c1b189520e7bf5c74ab20 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 30 Jan 2025 14:09:18 +0900 Subject: [PATCH 2/4] Update c_glib/arrow-glib/record-batch.cpp Co-authored-by: Sutou Kouhei --- c_glib/arrow-glib/record-batch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp index 6acf217b516aa..2c94919d96609 100644 --- a/c_glib/arrow-glib/record-batch.cpp +++ b/c_glib/arrow-glib/record-batch.cpp @@ -531,7 +531,7 @@ garrow_record_batch_validate_full(GArrowRecordBatch *record_batch, GError **erro const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); return garrow::check(error, arrow_record_batch->ValidateFull(), - "[record-batch][validate_full]"); + "[record-batch][validate-full]"); } typedef struct GArrowRecordBatchIteratorPrivate_ From cf6e44162241349ede6d9942800ca94a916353df Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 30 Jan 2025 14:09:37 +0900 Subject: [PATCH 3/4] Update c_glib/test/test-record-batch.rb Co-authored-by: Sutou Kouhei --- c_glib/test/test-record-batch.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb index 49989f64c7316..0e3be9e99a04f 100644 --- a/c_glib/test/test-record-batch.rb +++ b/c_glib/test/test-record-batch.rb @@ -236,10 +236,10 @@ def setup data = "\u3042\u3044".b[0..-2] value_offsets = Arrow::Buffer.new([0,data.size].pack("l*")) @invalid_name_value = Arrow::StringArray.new(1, - value_offsets, - Arrow::Buffer.new(data), - nil, - -1) + value_offsets, + Arrow::Buffer.new(data), + nil, + -1) end def test_valid From a876a5d81ebdac6f26220f8439dee4ef749522cc Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 30 Jan 2025 14:12:52 +0900 Subject: [PATCH 4/4] GH-44760: [GLib] Add garrow_record_batch_validate_full() --- c_glib/test/test-record-batch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb index 0e3be9e99a04f..90f72c3d52206 100644 --- a/c_glib/test/test-record-batch.rb +++ b/c_glib/test/test-record-batch.rb @@ -251,7 +251,7 @@ def test_valid end def test_invalid - message = "[record-batch][validate_full]: Invalid: " + + message = "[record-batch][validate-full]: Invalid: " + "In column 1: Invalid: Invalid UTF8 sequence at string index 0" record_batch = Arrow::RecordBatch.new(@schema, @n_rows, [@uint8_value, @invalid_name_value])