Skip to content

Commit

Permalink
GH-41140: [C#] Account for offset and length in union arrays (#41165)
Browse files Browse the repository at this point in the history
### Rationale for this change

See #41140. This makes a sliced union array behave as expected without having to manually account for the array offset unless accessing the underlying buffers.

### What changes are included in this PR?

Accounts for the offset and length when getting type ids, value offsets and field arrays for sparse and dense union arrays.

### Are these changes tested?

Yes, I've updated the union array tests to cover this.

### Are there any user-facing changes?

Yes, this is a user facing bug fix.
* GitHub Issue: #41140

Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
  • Loading branch information
adamreeve authored and raulcd committed May 8, 2024
1 parent b28633c commit dcfeceb
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 19 deletions.
3 changes: 1 addition & 2 deletions csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class DenseUnionArray : UnionArray
{
public ArrowBuffer ValueOffsetBuffer => Data.Buffers[1];

public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>();
public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>().Slice(Offset, Length);

public DenseUnionArray(
IArrowType dataType,
Expand All @@ -38,7 +38,6 @@ public DenseUnionArray(
dataType, length, nullCount, offset, new[] { typeIds, valuesOffsetBuffer },
children.Select(child => child.Data)))
{
_fields = children.ToArray();
ValidateMode(UnionMode.Dense, Type.Mode);
}

Expand Down
1 change: 0 additions & 1 deletion csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ public SparseUnionArray(
dataType, length, nullCount, offset, new[] { typeIds },
children.Select(child => child.Data)))
{
_fields = children.ToArray();
ValidateMode(UnionMode.Sparse, Type.Mode);
}

Expand Down
13 changes: 10 additions & 3 deletions csharp/src/Apache.Arrow/Arrays/UnionArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public abstract class UnionArray : IArrowArray
protected IReadOnlyList<IArrowArray> _fields;

public IReadOnlyList<IArrowArray> Fields =>
LazyInitializer.EnsureInitialized(ref _fields, () => InitializeFields());
LazyInitializer.EnsureInitialized(ref _fields, InitializeFields);

public ArrayData Data { get; }

Expand All @@ -35,7 +35,7 @@ public abstract class UnionArray : IArrowArray

public ArrowBuffer TypeBuffer => Data.Buffers[0];

public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span.Slice(Offset, Length);

public int Length => Data.Length;

Expand Down Expand Up @@ -106,7 +106,14 @@ private IReadOnlyList<IArrowArray> InitializeFields()
IArrowArray[] result = new IArrowArray[Data.Children.Length];
for (int i = 0; i < Data.Children.Length; i++)
{
result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]);
var childData = Data.Children[i];
if (Mode == UnionMode.Sparse && (Data.Offset != 0 || childData.Length != Data.Length))
{
// We only slice the child data for sparse mode,
// so that the sliced value offsets remain valid in dense mode
childData = childData.Slice(Data.Offset, Data.Length);
}
result[i] = ArrowArrayFactory.BuildArray(childData);
}
return result;
}
Expand Down
87 changes: 74 additions & 13 deletions csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Linq;
using Apache.Arrow.Types;
using Xunit;
Expand All @@ -24,7 +25,7 @@ public class UnionArrayTests
[Theory]
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
public void UnionArray_IsNull(UnionMode mode)
public void UnionArrayIsNull(UnionMode mode)
{
var (array, expectedNull) = BuildUnionArray(mode, 100);

Expand All @@ -38,40 +39,100 @@ public void UnionArray_IsNull(UnionMode mode)
[Theory]
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
public void UnionArray_Slice(UnionMode mode)
public void UnionArraySlice(UnionMode mode)
{
var (array, expectedNull) = BuildUnionArray(mode, 10);

for (var offset = 0; offset < array.Length; ++offset)
{
for (var length = 0; length < array.Length - offset; ++length)
{
var slicedArray = ArrowArrayFactory.Slice(array, offset, length);
var slicedArray = (UnionArray)ArrowArrayFactory.Slice(array, offset, length);

var nullCount = 0;
for (var i = 0; i < slicedArray.Length; ++i)
{
// TODO: Shouldn't need to add offset in IsNull/IsValid calls,
// see https://github.com/apache/arrow/issues/41140
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(offset + i));
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(offset + i));
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(i));
nullCount += expectedNull[offset + i] ? 1 : 0;

CompareValue(array, offset + i, slicedArray, i);
}

Assert.True(nullCount == slicedArray.NullCount, $"offset = {offset}, length = {length}");
Assert.Equal(nullCount, slicedArray.NullCount);
}
}
}

private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length)
[Theory]
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
public void UnionArrayConstructedWithOffset(UnionMode mode)
{
const int length = 10;
var (array, expectedNull) = BuildUnionArray(mode, length);

for (var offset = 0; offset < array.Length; ++offset)
{
var (slicedArray, _) = BuildUnionArray(mode, length, offset);

var nullCount = 0;
for (var i = 0; i < slicedArray.Length; ++i)
{
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(i));
nullCount += expectedNull[offset + i] ? 1 : 0;

CompareValue(array, offset + i, slicedArray, i);
}

Assert.Equal(nullCount, slicedArray.NullCount);
}
}

private static void CompareValue(UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
{
var typeId = originalArray.TypeIds[originalIndex];
var sliceTypeId = slicedArray.TypeIds[sliceIndex];
Assert.Equal(typeId, sliceTypeId);

switch (typeId)
{
case 0:
CompareFieldValue<int, Int32Array>(typeId, originalArray, originalIndex, slicedArray, sliceIndex);
break;
case 1:
CompareFieldValue<float, FloatArray>(typeId, originalArray, originalIndex, slicedArray, sliceIndex);
break;
default:
throw new Exception($"Unexpected type id {typeId}");
}
}

private static void CompareFieldValue<T, TArray>(byte typeId, UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
where T: struct
where TArray : PrimitiveArray<T>
{
if (originalArray is DenseUnionArray denseOriginalArray)
{
Assert.IsType<DenseUnionArray>(slicedArray);

originalIndex = denseOriginalArray.ValueOffsets[originalIndex];
sliceIndex = ((DenseUnionArray)slicedArray).ValueOffsets[sliceIndex];
}
var originalValue = ((TArray)originalArray.Fields[typeId]).GetValue(originalIndex);
var sliceValue = ((TArray)slicedArray.Fields[typeId]).GetValue(sliceIndex);
Assert.Equal(originalValue, sliceValue);
}

private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length, int offset=0)
{
var fields = new Field[]
{
new Field("field0", new Int32Type(), true),
new Field("field1", new FloatType(), true),
};
var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray();
var typeIds = new[] { 0, 1 };
var type = new UnionType(fields, typeIds, mode);

var nullCount = 0;
Expand All @@ -85,7 +146,7 @@ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode,
{
var isNull = i % 3 == 0;
expectedNull[i] = isNull;
nullCount += isNull ? 1 : 0;
nullCount += (isNull && i >= offset) ? 1 : 0;

if (i % 2 == 0)
{
Expand Down Expand Up @@ -140,8 +201,8 @@ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode,
};

UnionArray array = mode == UnionMode.Dense
? new DenseUnionArray(type, length, children, typeIdsBuffer, valuesOffsetBuffer, nullCount)
: new SparseUnionArray(type, length, children, typeIdsBuffer, nullCount);
? new DenseUnionArray(type, length - offset, children, typeIdsBuffer, valuesOffsetBuffer, nullCount, offset)
: new SparseUnionArray(type, length - offset, children, typeIdsBuffer, nullCount, offset);

return (array, expectedNull);
}
Expand Down

0 comments on commit dcfeceb

Please sign in to comment.