Skip to content

Commit

Permalink
GH-44759: [GLib] Add garrow_record_batch_validate() (#45353)
Browse files Browse the repository at this point in the history
### Rationale for this change

[RecordBatch::Validate](https://arrow.apache.org/docs/cpp/api/table.html#_CPPv4NK5arrow11RecordBatch8ValidateEv) available in the C++ API.
But, GLib doesn't support that method yet.

### What changes are included in this PR?

This PR adds a validation method in the record-batch class.
Before this change, the `Validate()` method was used in the `garrow_record_batch_new` implicitly.
This PR removes it and adds it as a separate method. Users need to call `garrow_record_batch_validate()` explicitly by themselves. This is a backward incompatible change.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

**This PR includes breaking changes to public APIs.**

* GitHub Issue: #44759

Authored-by: Hiroyuki Sato <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
hiroyuki-sato authored Jan 27, 2025
1 parent 2c90daf commit be38880
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
23 changes: 17 additions & 6 deletions c_glib/arrow-glib/record-batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,7 @@ garrow_record_batch_new(GArrowSchema *schema,
}

auto arrow_record_batch = arrow::RecordBatch::Make(arrow_schema, n_rows, arrow_columns);
auto status = arrow_record_batch->Validate();
if (garrow_error_check(error, status, tag)) {
return garrow_record_batch_new_raw(&arrow_record_batch);
} else {
return NULL;
}
return garrow_record_batch_new_raw(&arrow_record_batch);
}

/**
Expand Down Expand Up @@ -702,3 +697,19 @@ garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator)
auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator);
return &priv->iterator;
}

/**
* garrow_record_batch_validate
* @record_batch: A #GArrowRecordBatch
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: %TRUE on success, %FALSE on error.
*
* Since: 20.0.0
*/
gboolean
garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error)
{
const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
return garrow::check(error, arrow_record_batch->Validate(), "[record-batch][validate]");
}
4 changes: 4 additions & 0 deletions c_glib/arrow-glib/record-batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch,
GArrowWriteOptions *options,
GError **error);

GARROW_AVAILABLE_IN_20_0
gboolean
garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error);

#define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type())
GARROW_AVAILABLE_IN_0_17
G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator,
Expand Down
32 changes: 32 additions & 0 deletions c_glib/test/test-record-batch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,37 @@ def test_serialize
assert_equal(@record_batch,
input_stream.read_record_batch(@record_batch.schema))
end

sub_test_case("#validate") do
def setup
@id_field = Arrow::Field.new("id", Arrow::UInt8DataType.new)
@name_field = Arrow::Field.new("name", Arrow::StringDataType.new)
@schema = Arrow::Schema.new([@id_field, @name_field])

@id_value = build_uint_array([1])
@name_value = build_string_array(["abc"])
@values = [@id_value, @name_value]
end

def test_valid
n_rows = @id_value.length
record_batch = Arrow::RecordBatch.new(@schema, n_rows, @values)

assert do
record_batch.validate
end
end

def test_invalid
message = "[record-batch][validate]: Invalid: " +
"Number of rows in column 0 did not match batch: 1 vs 2"
n_rows = @id_value.length + 1 # incorrect number of rows

record_batch = Arrow::RecordBatch.new(@schema, n_rows, @values)
assert_raise(Arrow::Error::Invalid.new(message)) do
record_batch.validate
end
end
end
end
end

0 comments on commit be38880

Please sign in to comment.