Skip to content

Commit

Permalink
Fixing operations on Mixed types (#7368)
Browse files Browse the repository at this point in the history
- Fixes #7231
- Cleans up vectorized operations to distinguish unary and binary operations.
- Introduces MixedStorage which may pretend to be a more specialized storage on demand.
- Ensures that operations request a more specialized storage on right-hand side to ensure compatibility with reported inferred storage type.
- Ensures that a dataflow error returned by an Enso callback in Java is propagated as a polyglot exception and can be caught back in Enso
- Tests for comparison of Mixed storages with each other and other types
- Started using `Set` for `Filter_Condition.Is_In` for better performance.
- ~~Migrated `Column.map` and `Column.zip` to use the Java-to-Enso callbacks.~~
- This does not forward warnings. IMO we should not be losing them. We can switch and add a ticket to fix the warnings, but that would be a regression (current implementation handles them correctly). Instead, we should first gain some ability to work with warnings in polyglot. I created a ticket to get this figured out #7371
- ~~Trying to avoid conversions when calling Enso functions from Java.~~
- Needs extra care as dataflow errors may not be handled right then. So only works for simple functions that should not error.
- Not sure how much it really helps. [Benchmarks](#7270 (comment)) suggested it could improve the performance quite significantly, but the practical solution is not exactly the same as the one measured, so we may have to measure and tune it to get the best results.
- Created #7378 to track this.
  • Loading branch information
radeusgd authored Jul 25, 2023
1 parent 1f6fcf1 commit 4b5a2e2
Show file tree
Hide file tree
Showing 55 changed files with 1,398 additions and 687 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import project.Any.Any
import project.Data.Set.Set
import project.Data.Text.Case_Sensitivity.Case_Sensitivity
import project.Data.Text.Regex.Regex
import project.Data.Text.Text
Expand Down Expand Up @@ -192,10 +193,12 @@ type Filter_Condition
Not_Like sql_pattern ->
regex = sql_like_to_regex sql_pattern
elem -> regex.matches elem . not
## TODO once we have proper hashing we could create a hashmap and
answer quicker, currently we need to do a full scan for each element.
Is_In values -> values.contains
Not_In values -> elem -> values.contains elem . not
Is_In values ->
set = Set.from_vector values
set.contains
Not_In values ->
set = Set.from_vector values
elem -> set.contains elem . not

## PRIVATE
Convert to a display representation of this Filter_Condition.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import project.Error.Error
import project.Panic.Panic

polyglot java import org.enso.base.polyglot.WrappedDataflowError

## PRIVATE
handle_polyglot_dataflow_errors ~action =
Panic.catch WrappedDataflowError action caught_panic->
Error.throw caught_panic.payload.getDataflowError
273 changes: 165 additions & 108 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ closest_storage_type value_type = case value_type of
Value_Type.Date_Time _ -> DateTimeType.INSTANCE
Value_Type.Time -> TimeOfDayType.INSTANCE
Value_Type.Mixed -> AnyObjectType.INSTANCE
_ -> Error.throw (Illegal_Argument.Error "Columns of type "+value_type.to_display_text+" are currently not supported in the in-memory backend.")
_ ->
Error.throw (Illegal_Argument.Error "Columns of type "+value_type.to_display_text+" are currently not supported in the in-memory backend.")

## PRIVATE
Converts a value type to an in-memory storage type, possibly approximating it
Expand Down
15 changes: 9 additions & 6 deletions distribution/lib/Standard/Test/0.0.0-dev/src/Test.enso
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,15 @@ type Test
example_fail = Test.fail "Something went wrong."
fail : Text -> Nothing|Text -> Test_Result
fail message details=Nothing =
clue_data = State.get Clue
message_with_clue = case clue_data of
failure = Test_Result.Failure (Test.enrich_message_with_clue message) details
Panic.throw failure

## PRIVATE
enrich_message_with_clue : Text -> Text
enrich_message_with_clue message =
case State.get Clue of
Clue.Value add_clue -> add_clue message
_ -> message
failure = Test_Result.Failure message_with_clue details
Panic.throw failure

## PRIVATE
Reports an unexpected dataflow error has occurred.
Expand Down Expand Up @@ -203,8 +206,8 @@ run_spec ~behavior =
case ex of
Test_Result.Failure _ _ -> ex
Finished_With.Error err stack_trace_text ->
Test_Result.Failure ("An unexpected error was returned: " + err.to_text) details=stack_trace_text
_ -> Test_Result.Failure ("An unexpected panic was thrown: " + ex.to_text) details=maybeExc.get_stack_trace_text
Test_Result.Failure (Test.enrich_message_with_clue ("An unexpected error was returned: " + err.to_text)) details=stack_trace_text
_ -> Test_Result.Failure (Test.enrich_message_with_clue ("An unexpected panic was thrown: " + ex.to_text)) details=maybeExc.get_stack_trace_text
result

## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ public static Object convertPolyglotValue(Value item) {
return item.asTime();
}

if (item.isException()) {
throw new WrappedDataflowError(item);
}

return item.as(Object.class);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.enso.base.polyglot;

import org.graalvm.polyglot.Value;

public class WrappedDataflowError extends RuntimeException {
private final Value error;

public WrappedDataflowError(Value error) {
this.error = error;
}

public Value getDataflowError() {
return error;
}

@Override
public String getMessage() {
return "A dataflow error has been returned from an Enso callback called from Java: "
+ error.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public void appendNoGrow(Object o) {
vals.set(size);
}
} else {
throw new UnsupportedOperationException("Cannot coerce " + o + " to a boolean type.");
throw new UnsupportedOperationException("Cannot coerce " + o + " (" + o.getClass().getCanonicalName() + ")" + " to a boolean type.");
}
}
size++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@

/** A builder for creating columns dynamically. */
public abstract class Builder {
/** Constructs a builder accepting values of a specific type.
* <p>
* If {@code type} is {@code null}, it will return an {@link InferredBuilder} that will infer the type from the data.
*/
public static Builder getForType(StorageType type, int size) {
Builder builder = switch (type) {
case AnyObjectType x -> new ObjectBuilder(size);
case AnyObjectType x -> new MixedBuilder(size);
case BooleanType x -> new BoolBuilder(size);
case DateType x -> new DateBuilder(size);
case DateTimeType x -> new DateTimeBuilder(size);
Expand All @@ -35,6 +39,7 @@ public static Builder getForType(StorageType type, int size) {

yield new StringBuilder(size);
}
case null -> new InferredBuilder(size);
};
assert builder.getType().equals(type);
return builder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private void initBuilderFor(Object o) {
} else if (o instanceof String) {
currentBuilder = new StringBuilder(initialCapacity);
} else {
currentBuilder = new ObjectBuilder(initialCapacity);
currentBuilder = new MixedBuilder(initialCapacity);
}
currentBuilder.appendNulls(currentSize);
}
Expand Down Expand Up @@ -143,12 +143,12 @@ private void retypeAndAppend(Object o) {
}
}

retypeToObject();
retypeToMixed();
currentBuilder.append(o);
}

private void retypeToObject() {
ObjectBuilder objectBuilder = new ObjectBuilder(initialSize);
private void retypeToMixed() {
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
currentBuilder.writeTo(objectBuilder.getData());
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
currentBuilder = objectBuilder;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.enso.table.data.column.builder;

import org.enso.table.data.column.storage.MixedStorage;
import org.enso.table.data.column.storage.Storage;

/** A builder for Mixed columns. It will create a MixedStorage. */
public class MixedBuilder extends ObjectBuilder {
public MixedBuilder(int size) {
super(size);
}

public MixedBuilder(Object[] data) {
super(data);
}

@Override
public Storage<Object> seal() {
resize(currentSize);
return new MixedStorage(data, currentSize);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

/** A builder for boxed object columns. */
public class ObjectBuilder extends TypedBuilder {
private Object[] data;
private int currentSize = 0;
protected Object[] data;
protected int currentSize = 0;

public ObjectBuilder(int size) {
this.data = new Object[size];
Expand Down Expand Up @@ -124,7 +124,7 @@ private void grow() {
resize(desiredCapacity);
}

private void resize(int desiredCapacity) {
protected void resize(int desiredCapacity) {
this.data = Arrays.copyOf(data, desiredCapacity);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public boolean canRetypeTo(StorageType type) {
public TypedBuilder retypeTo(StorageType type) {
if (Objects.equals(type, AnyObjectType.INSTANCE)) {
Object[] widenedData = Arrays.copyOf(data, data.length, Object[].class);
ObjectBuilder res = new ObjectBuilder(widenedData);
ObjectBuilder res = new MixedBuilder(widenedData);
res.setCurrentSize(currentSize);
return res;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public Storage<String> cast(Storage<?> storage, CastProblemBuilder problemBuilde
} else if (storage.getType() instanceof AnyObjectType) {
return castFromMixed(storage, problemBuilder);
} else {
throw new IllegalStateException("No known strategy for casting storage " + storage + " to Integer.");
throw new IllegalStateException("No known strategy for casting storage " + storage + " to Text.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,31 @@
*
* @param <I> the supported storage type.
*/
public abstract class MapOperation<T, I extends Storage<? super T>> {
public abstract class BinaryMapOperation<T, I extends Storage<? super T>> {
private final String name;

/**
* Creates a new operation with the given name.
*
* @param name the operation name
*/
public MapOperation(String name) {
public BinaryMapOperation(String name) {
this.name = name;
}

/**
* Run the operation in map mode
* Run the operation in map mode - combining every row of the storage with a scalar argument.
*
* @param storage the storage to run operation on
* @param arg the argument passed to the operation
* @param problemBuilder the builder allowing to report computation problems
* @return the result of running the operation
*/
public abstract Storage<?> runMap(
public abstract Storage<?> runBinaryMap(
I storage, Object arg, MapOperationProblemBuilder problemBuilder);

/**
* Run the operation in zip mode
* Run the operation in zip mode - combining corresponding rows of two storages.
*
* @param storage the storage to run operation on
* @param arg the storage providing second arguments to the operation
Expand All @@ -45,4 +45,16 @@ public abstract Storage<?> runZip(
public String getName() {
return name;
}

/**
* Specifies if the operation relies on specialized storage types.
*
* <p>Some operations, e.g. numeric operations, may only work with specialized numeric storages.
* In this case, the caller will ensure that if a mixed column pretending to be numeric is passed
* to such an operation, it will first be converted to a specialized type. If a given operation
* can handle any storage, this may return false to avoid an unnecessary costly conversion.
*/
public boolean reliesOnSpecializedStorage() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

public abstract class GenericBinaryObjectMapOperation<
InputType, InputStorageType extends Storage<InputType>, OutputType>
extends MapOperation<InputType, InputStorageType> {
extends BinaryMapOperation<InputType, InputStorageType> {

protected GenericBinaryObjectMapOperation(
String name,
Expand All @@ -28,7 +28,7 @@ protected GenericBinaryObjectMapOperation(
protected abstract OutputType run(InputType value, InputType other);

@Override
public Storage<?> runMap(
public Storage<?> runBinaryMap(
InputStorageType storage, Object arg, MapOperationProblemBuilder problemBuilder) {
arg = Polyglot_Utils.convertPolyglotValue(arg);
if (arg == null) {
Expand Down
Loading

0 comments on commit 4b5a2e2

Please sign in to comment.