Skip to content

Commit

Permalink
Transfer unique values from backend in binary
Browse files Browse the repository at this point in the history
  • Loading branch information
ianthomas23 committed Oct 29, 2024
1 parent e87b4e1 commit d2150f4
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 122 deletions.
47 changes: 26 additions & 21 deletions ipydatagrid/datagrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,7 @@ def _apply_frontend_transforms(self, frontend_transforms, dataframe):
elif operator == "notempty":
dataframe = dataframe[dataframe[column].notna()]
elif operator == "in":
value = pd.Series(value, dtype=dataframe[column].dtype)
dataframe = dataframe[dataframe[column].isin(value)]
elif operator == "between":
dataframe = dataframe[
Expand Down Expand Up @@ -1121,22 +1122,7 @@ def _handle_comm_msg(self, _, content, _buffs):

value = value.iloc[r1 : r2 + 1, c1 : c2 + 1]

# Primary key used
index_key = self.get_dataframe_index(value)

serialized = _data_serialization_impl(
self.generate_data_object(value, "ipydguuid", index_key), None
)

# Extract all buffers
buffers = []
for column in serialized["data"].keys():
if (
not isinstance(serialized["data"][column], list)
and not serialized["data"][column]["type"] == "raw"
):
buffers.append(serialized["data"][column]["value"])
serialized["data"][column]["value"] = len(buffers) - 1
serialized, buffers = self._serialize_helper(value)

answer = {
"event_type": "data-reply",
Expand All @@ -1151,14 +1137,15 @@ def _handle_comm_msg(self, _, content, _buffs):

elif event_type == "unique-values-request":
column = content.get("column")
unique = (
self.__dataframe_reference[column].drop_duplicates().to_numpy()
)
original = self.__dataframe_reference[column].drop_duplicates()
serialized, buffers = self._serialize_helper(pd.DataFrame(original))

answer = {
"event_type": "unique-values-reply",
"values": unique,
"column": column,
"value": serialized,
}
self.send(answer)
self.send(answer, buffers)

@observe("_transforms")
def _on_transforms_changed(self, change):
Expand All @@ -1178,3 +1165,21 @@ def _on_transforms_changed(self, change):

# Should only request a tick if the transforms have changed.
self.tick()

def _serialize_helper(self, dataframe):
# Primary key used
index_key = self.get_dataframe_index(dataframe)

serialized = _data_serialization_impl(
self.generate_data_object(dataframe, "ipydguuid", index_key), None
)

# Extract all buffers
buffers = []
for column in serialized["data"].keys():
col = serialized["data"][column]
if not isinstance(col, list) and col["type"] != "raw":
buffers.append(col["value"])
col["value"] = len(buffers) - 1

return serialized, buffers
96 changes: 96 additions & 0 deletions js/core/deserialize.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { Dict } from '@jupyter-widgets/base';
import { array_or_json_serializer } from 'bqplot';
import { DataSource } from '../datasource';

export function unpack_raw_data(
value: any | Dict<unknown> | string | (Dict<unknown> | string)[],
): any {
if (Array.isArray(value)) {
const unpacked: any[] = [];
value.forEach((sub_value, key) => {
unpacked.push(unpack_raw_data(sub_value));
});
return unpacked;
} else if (value instanceof Object && typeof value !== 'string') {
const unpacked: { [key: string]: any } = {};
Object.keys(value).forEach((key) => {
unpacked[key] = unpack_raw_data(value[key]);
});
return unpacked;
} else if (value === '$NaN$') {
return Number.NaN;
} else if (value === '$Infinity$') {
return Number.POSITIVE_INFINITY;
} else if (value === '$NegInfinity$') {
return Number.NEGATIVE_INFINITY;
} else if (value === '$NaT$') {
return new Date('INVALID');
} else {
return value;
}
}

export function deserialize_data_simple(data: any, manager: any): any {
const deserialized_data: any = {};

// Backward compatibility for when data.data was an array of rows
// (should be removed in ipydatagrid 2.x?)
if (Array.isArray(data.data)) {
if (data.data.length === 0) {
return deserialized_data;
}

const unpacked = unpack_raw_data(data.data);
// Turn array of rows (old approach) into a dictionary of columns as arrays (new approach)
for (const column of Object.keys(unpacked[0])) {
const columnData = new Array(unpacked.length);
let rowIdx = 0;

for (const row of unpacked) {
columnData[rowIdx++] = row[column];
}

deserialized_data[column] = columnData;
}

return deserialized_data;
}

for (const column of Object.keys(data.data)) {
deserialized_data[column] = [];

if (Array.isArray(data.data[column])) {
deserialized_data[column] = data.data[column];
continue;
}

if (data.data[column].type == 'raw') {
deserialized_data[column] = unpack_raw_data(data.data[column].value);
} else {
if (data.data[column].value.length !== 0) {
let deserialized_array = array_or_json_serializer.deserialize(
data.data[column],
manager,
);

// Turning back float32 dates into isoformat
if (deserialized_array.type === 'date') {
const float32Array = deserialized_array;
deserialized_array = [];

for (let i = 0; i < float32Array.length; i++) {
deserialized_array[i] = new Date(float32Array[i]).toISOString();
}
}

deserialized_data[column] = deserialized_array;
}
}
}
return deserialized_data
}

export function deserialize_data(data: any, manager: any): DataSource {
const deserialized = deserialize_data_simple(data, manager);
return new DataSource(deserialized, data.fields, data.schema, true);
}
34 changes: 21 additions & 13 deletions js/core/streamingviewbasedjsonmodel.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { PromiseDelegate } from '@lumino/coreutils';
import { DataModel } from '@lumino/datagrid';
import { deserialize_data_simple } from './deserialize';
import { StreamingView } from './streamingview';
import { TransformStateManager } from './transformStateManager';
import { ViewBasedJSONModel } from './viewbasedjsonmodel';
Expand Down Expand Up @@ -64,22 +64,30 @@ export class StreamingViewBasedJSONModel extends ViewBasedJSONModel {
return Promise.resolve(this._unique.values);
}

const promiseDelegate = new PromiseDelegate<any[]>();
this._dataModel.on('msg:custom', (content) => {
// when message received, want to drop this handler...
// Or keep it going but need a way of identifying where to put the received data??????
if (content.event_type === 'unique-values-reply') {
this._unique = { region, column, values: content.values };
promiseDelegate.resolve(this._unique.values);
}

// Do I need to cancel this callback?????????
const promise = new Promise<any>(resolve => {
this._dataModel.once('msg:custom', (content, buffers) => {
if (content.event_type === 'unique-values-reply') {
const { value } = content;

// Bring back buffers at their original position in the data structure
for (const col of Object.keys(value.data)) {
if (value.data[col].type !== 'raw') {
value.data[col].value = buffers[value.data[col].value];
}
}

const deserialized = deserialize_data_simple(value, null);
const values = deserialized[content.column];

this._unique = { region, column: content.column, values };
resolve(this._unique.values);
}
});
});

const msg = { type: 'unique-values-request', column: column };
this._dataModel.send(msg);

return promiseDelegate.promise;
return promise;
}

updateDataset(options: StreamingViewBasedJSONModel.IOptions): void {
Expand Down
89 changes: 1 addition & 88 deletions js/datagrid.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {

import { array_or_json_serializer } from 'bqplot';

import { deserialize_data } from './core/deserialize';
import { ViewBasedJSONModel } from './core/viewbasedjsonmodel';
import { StreamingViewBasedJSONModel } from './core/streamingviewbasedjsonmodel';

Expand All @@ -45,34 +46,6 @@ import { DataSource } from './datasource';
// Import CSS
import '../style/jupyter-widget.css';

function unpack_raw_data(
value: any | Dict<unknown> | string | (Dict<unknown> | string)[],
): any {
if (Array.isArray(value)) {
const unpacked: any[] = [];
value.forEach((sub_value, key) => {
unpacked.push(unpack_raw_data(sub_value));
});
return unpacked;
} else if (value instanceof Object && typeof value !== 'string') {
const unpacked: { [key: string]: any } = {};
Object.keys(value).forEach((key) => {
unpacked[key] = unpack_raw_data(value[key]);
});
return unpacked;
} else if (value === '$NaN$') {
return Number.NaN;
} else if (value === '$Infinity$') {
return Number.POSITIVE_INFINITY;
} else if (value === '$NegInfinity$') {
return Number.NEGATIVE_INFINITY;
} else if (value === '$NaT$') {
return new Date('INVALID');
} else {
return value;
}
}

function serialize_data(data: DataSource, manager: any): any {
const serialized_data: any = {};
for (const column of Object.keys(data.data)) {
Expand All @@ -84,66 +57,6 @@ function serialize_data(data: DataSource, manager: any): any {
return { data: serialized_data, fields: data.fields, schema: data.schema };
}

function deserialize_data(data: any, manager: any): DataSource {
const deserialized_data: any = {};

// Backward compatibility for when data.data was an array of rows
// (should be removed in ipydatagrid 2.x?)
if (Array.isArray(data.data)) {
if (data.data.length === 0) {
return new DataSource(deserialized_data, data.fields, data.schema, true);
}

const unpacked = unpack_raw_data(data.data);
// Turn array of rows (old approach) into a dictionary of columns as arrays (new approach)
for (const column of Object.keys(unpacked[0])) {
const columnData = new Array(unpacked.length);
let rowIdx = 0;

for (const row of unpacked) {
columnData[rowIdx++] = row[column];
}

deserialized_data[column] = columnData;
}

return new DataSource(deserialized_data, data.fields, data.schema, true);
}

for (const column of Object.keys(data.data)) {
deserialized_data[column] = [];

if (Array.isArray(data.data[column])) {
deserialized_data[column] = data.data[column];
continue;
}

if (data.data[column].type == 'raw') {
deserialized_data[column] = unpack_raw_data(data.data[column].value);
} else {
if (data.data[column].value.length !== 0) {
let deserialized_array = array_or_json_serializer.deserialize(
data.data[column],
manager,
);

// Turning back float32 dates into isoformat
if (deserialized_array.type === 'date') {
const float32Array = deserialized_array;
deserialized_array = [];

for (let i = 0; i < float32Array.length; i++) {
deserialized_array[i] = new Date(float32Array[i]).toISOString();
}
}

deserialized_data[column] = deserialized_array;
}
}
}
return new DataSource(deserialized_data, data.fields, data.schema, true);
}

export class DataGridModel extends DOMWidgetModel {
defaults() {
return {
Expand Down

0 comments on commit d2150f4

Please sign in to comment.