From d2150f40c2b0d7852c5117b932a3f9907e3d5499 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Mon, 28 Oct 2024 11:47:13 +0000 Subject: [PATCH] Transfer unique values from backend in binary --- ipydatagrid/datagrid.py | 47 +++++++------ js/core/deserialize.ts | 96 ++++++++++++++++++++++++++ js/core/streamingviewbasedjsonmodel.ts | 34 +++++---- js/datagrid.ts | 89 +----------------------- 4 files changed, 144 insertions(+), 122 deletions(-) create mode 100644 js/core/deserialize.ts diff --git a/ipydatagrid/datagrid.py b/ipydatagrid/datagrid.py index ea38a74a..cb9819d8 100644 --- a/ipydatagrid/datagrid.py +++ b/ipydatagrid/datagrid.py @@ -1072,6 +1072,7 @@ def _apply_frontend_transforms(self, frontend_transforms, dataframe): elif operator == "notempty": dataframe = dataframe[dataframe[column].notna()] elif operator == "in": + value = pd.Series(value, dtype=dataframe[column].dtype) dataframe = dataframe[dataframe[column].isin(value)] elif operator == "between": dataframe = dataframe[ @@ -1121,22 +1122,7 @@ def _handle_comm_msg(self, _, content, _buffs): value = value.iloc[r1 : r2 + 1, c1 : c2 + 1] - # Primary key used - index_key = self.get_dataframe_index(value) - - serialized = _data_serialization_impl( - self.generate_data_object(value, "ipydguuid", index_key), None - ) - - # Extract all buffers - buffers = [] - for column in serialized["data"].keys(): - if ( - not isinstance(serialized["data"][column], list) - and not serialized["data"][column]["type"] == "raw" - ): - buffers.append(serialized["data"][column]["value"]) - serialized["data"][column]["value"] = len(buffers) - 1 + serialized, buffers = self._serialize_helper(value) answer = { "event_type": "data-reply", @@ -1151,14 +1137,15 @@ def _handle_comm_msg(self, _, content, _buffs): elif event_type == "unique-values-request": column = content.get("column") - unique = ( - self.__dataframe_reference[column].drop_duplicates().to_numpy() - ) + original = self.__dataframe_reference[column].drop_duplicates() + serialized, buffers = self._serialize_helper(pd.DataFrame(original)) + answer = { "event_type": "unique-values-reply", - "values": unique, + "column": column, + "value": serialized, } - self.send(answer) + self.send(answer, buffers) @observe("_transforms") def _on_transforms_changed(self, change): @@ -1178,3 +1165,21 @@ def _on_transforms_changed(self, change): # Should only request a tick if the transforms have changed. self.tick() + + def _serialize_helper(self, dataframe): + # Primary key used + index_key = self.get_dataframe_index(dataframe) + + serialized = _data_serialization_impl( + self.generate_data_object(dataframe, "ipydguuid", index_key), None + ) + + # Extract all buffers + buffers = [] + for column in serialized["data"].keys(): + col = serialized["data"][column] + if not isinstance(col, list) and col["type"] != "raw": + buffers.append(col["value"]) + col["value"] = len(buffers) - 1 + + return serialized, buffers diff --git a/js/core/deserialize.ts b/js/core/deserialize.ts new file mode 100644 index 00000000..79ecfdf9 --- /dev/null +++ b/js/core/deserialize.ts @@ -0,0 +1,96 @@ +import { Dict } from '@jupyter-widgets/base'; +import { array_or_json_serializer } from 'bqplot'; +import { DataSource } from '../datasource'; + +export function unpack_raw_data( + value: any | Dict | string | (Dict | string)[], +): any { + if (Array.isArray(value)) { + const unpacked: any[] = []; + value.forEach((sub_value, key) => { + unpacked.push(unpack_raw_data(sub_value)); + }); + return unpacked; + } else if (value instanceof Object && typeof value !== 'string') { + const unpacked: { [key: string]: any } = {}; + Object.keys(value).forEach((key) => { + unpacked[key] = unpack_raw_data(value[key]); + }); + return unpacked; + } else if (value === '$NaN$') { + return Number.NaN; + } else if (value === '$Infinity$') { + return Number.POSITIVE_INFINITY; + } else if (value === '$NegInfinity$') { + return Number.NEGATIVE_INFINITY; + } else if (value === '$NaT$') { + return new Date('INVALID'); + } else { + return value; + } +} + +export function deserialize_data_simple(data: any, manager: any): any { + const deserialized_data: any = {}; + + // Backward compatibility for when data.data was an array of rows + // (should be removed in ipydatagrid 2.x?) + if (Array.isArray(data.data)) { + if (data.data.length === 0) { + return deserialized_data; + } + + const unpacked = unpack_raw_data(data.data); + // Turn array of rows (old approach) into a dictionary of columns as arrays (new approach) + for (const column of Object.keys(unpacked[0])) { + const columnData = new Array(unpacked.length); + let rowIdx = 0; + + for (const row of unpacked) { + columnData[rowIdx++] = row[column]; + } + + deserialized_data[column] = columnData; + } + + return deserialized_data; + } + + for (const column of Object.keys(data.data)) { + deserialized_data[column] = []; + + if (Array.isArray(data.data[column])) { + deserialized_data[column] = data.data[column]; + continue; + } + + if (data.data[column].type == 'raw') { + deserialized_data[column] = unpack_raw_data(data.data[column].value); + } else { + if (data.data[column].value.length !== 0) { + let deserialized_array = array_or_json_serializer.deserialize( + data.data[column], + manager, + ); + + // Turning back float32 dates into isoformat + if (deserialized_array.type === 'date') { + const float32Array = deserialized_array; + deserialized_array = []; + + for (let i = 0; i < float32Array.length; i++) { + deserialized_array[i] = new Date(float32Array[i]).toISOString(); + } + } + + deserialized_data[column] = deserialized_array; + } + } + } + return deserialized_data +} + +export function deserialize_data(data: any, manager: any): DataSource { + const deserialized = deserialize_data_simple(data, manager); + return new DataSource(deserialized, data.fields, data.schema, true); +} diff --git a/js/core/streamingviewbasedjsonmodel.ts b/js/core/streamingviewbasedjsonmodel.ts index d9a48f3e..31d4018d 100644 --- a/js/core/streamingviewbasedjsonmodel.ts +++ b/js/core/streamingviewbasedjsonmodel.ts @@ -1,5 +1,5 @@ -import { PromiseDelegate } from '@lumino/coreutils'; import { DataModel } from '@lumino/datagrid'; +import { deserialize_data_simple } from './deserialize'; import { StreamingView } from './streamingview'; import { TransformStateManager } from './transformStateManager'; import { ViewBasedJSONModel } from './viewbasedjsonmodel'; @@ -64,22 +64,30 @@ export class StreamingViewBasedJSONModel extends ViewBasedJSONModel { return Promise.resolve(this._unique.values); } - const promiseDelegate = new PromiseDelegate(); - this._dataModel.on('msg:custom', (content) => { - // when message received, want to drop this handler... - // Or keep it going but need a way of identifying where to put the received data?????? - if (content.event_type === 'unique-values-reply') { - this._unique = { region, column, values: content.values }; - promiseDelegate.resolve(this._unique.values); - } - - // Do I need to cancel this callback????????? + const promise = new Promise(resolve => { + this._dataModel.once('msg:custom', (content, buffers) => { + if (content.event_type === 'unique-values-reply') { + const { value } = content; + + // Bring back buffers at their original position in the data structure + for (const col of Object.keys(value.data)) { + if (value.data[col].type !== 'raw') { + value.data[col].value = buffers[value.data[col].value]; + } + } + + const deserialized = deserialize_data_simple(value, null); + const values = deserialized[content.column]; + + this._unique = { region, column: content.column, values }; + resolve(this._unique.values); + } + }); }); const msg = { type: 'unique-values-request', column: column }; this._dataModel.send(msg); - - return promiseDelegate.promise; + return promise; } updateDataset(options: StreamingViewBasedJSONModel.IOptions): void { diff --git a/js/datagrid.ts b/js/datagrid.ts index 26a864b5..0dc55322 100644 --- a/js/datagrid.ts +++ b/js/datagrid.ts @@ -31,6 +31,7 @@ import { import { array_or_json_serializer } from 'bqplot'; +import { deserialize_data } from './core/deserialize'; import { ViewBasedJSONModel } from './core/viewbasedjsonmodel'; import { StreamingViewBasedJSONModel } from './core/streamingviewbasedjsonmodel'; @@ -45,34 +46,6 @@ import { DataSource } from './datasource'; // Import CSS import '../style/jupyter-widget.css'; -function unpack_raw_data( - value: any | Dict | string | (Dict | string)[], -): any { - if (Array.isArray(value)) { - const unpacked: any[] = []; - value.forEach((sub_value, key) => { - unpacked.push(unpack_raw_data(sub_value)); - }); - return unpacked; - } else if (value instanceof Object && typeof value !== 'string') { - const unpacked: { [key: string]: any } = {}; - Object.keys(value).forEach((key) => { - unpacked[key] = unpack_raw_data(value[key]); - }); - return unpacked; - } else if (value === '$NaN$') { - return Number.NaN; - } else if (value === '$Infinity$') { - return Number.POSITIVE_INFINITY; - } else if (value === '$NegInfinity$') { - return Number.NEGATIVE_INFINITY; - } else if (value === '$NaT$') { - return new Date('INVALID'); - } else { - return value; - } -} - function serialize_data(data: DataSource, manager: any): any { const serialized_data: any = {}; for (const column of Object.keys(data.data)) { @@ -84,66 +57,6 @@ function serialize_data(data: DataSource, manager: any): any { return { data: serialized_data, fields: data.fields, schema: data.schema }; } -function deserialize_data(data: any, manager: any): DataSource { - const deserialized_data: any = {}; - - // Backward compatibility for when data.data was an array of rows - // (should be removed in ipydatagrid 2.x?) - if (Array.isArray(data.data)) { - if (data.data.length === 0) { - return new DataSource(deserialized_data, data.fields, data.schema, true); - } - - const unpacked = unpack_raw_data(data.data); - // Turn array of rows (old approach) into a dictionary of columns as arrays (new approach) - for (const column of Object.keys(unpacked[0])) { - const columnData = new Array(unpacked.length); - let rowIdx = 0; - - for (const row of unpacked) { - columnData[rowIdx++] = row[column]; - } - - deserialized_data[column] = columnData; - } - - return new DataSource(deserialized_data, data.fields, data.schema, true); - } - - for (const column of Object.keys(data.data)) { - deserialized_data[column] = []; - - if (Array.isArray(data.data[column])) { - deserialized_data[column] = data.data[column]; - continue; - } - - if (data.data[column].type == 'raw') { - deserialized_data[column] = unpack_raw_data(data.data[column].value); - } else { - if (data.data[column].value.length !== 0) { - let deserialized_array = array_or_json_serializer.deserialize( - data.data[column], - manager, - ); - - // Turning back float32 dates into isoformat - if (deserialized_array.type === 'date') { - const float32Array = deserialized_array; - deserialized_array = []; - - for (let i = 0; i < float32Array.length; i++) { - deserialized_array[i] = new Date(float32Array[i]).toISOString(); - } - } - - deserialized_data[column] = deserialized_array; - } - } - } - return new DataSource(deserialized_data, data.fields, data.schema, true); -} - export class DataGridModel extends DOMWidgetModel { defaults() { return {