Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a callback to get_parameter_data to follow data loading #4688

Merged
merged 25 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
049cb58
Add a callback to get_parameter_data to follow data loading
edumur Oct 4, 2022
bc19a7c
Make mypy happy
edumur Oct 4, 2022
7962f24
Replace CamelCase by snake case
edumur Oct 26, 2022
a8f6c98
Simplify typing
edumur Oct 26, 2022
f6bc436
Typing more precise
edumur Oct 26, 2022
293c244
Move callback_percent parameter to qcodesrc.json and add the matching…
edumur Oct 26, 2022
c8063a6
Fix some forgotten typing of callback
edumur Oct 27, 2022
2d43676
Put back `many` and `many_many` previous signature
edumur Oct 27, 2022
92de4f2
Add newsfragment file explaining the new callback parameter
edumur Oct 27, 2022
499eb46
Merge branch 'master' into add-feedback
edumur Oct 27, 2022
2cfc7bc
Remove unnecessary loop, fix mypy issue
edumur Nov 14, 2022
09acba1
add callback to make mypy happy
edumur Nov 14, 2022
6243736
Improve description
edumur Nov 15, 2022
ee717e7
Remove useless import
edumur Nov 15, 2022
36e17cb
Delete useless file
edumur Nov 15, 2022
5b7e4fe
Breakdown logics in smaller functions
edumur Nov 15, 2022
044bcc4
Add (incorrect) test of callback
jenshnielsen Nov 24, 2022
52aaea3
Run darker
jenshnielsen Nov 24, 2022
087ac55
Merge branch 'master' into add-feedback
jenshnielsen Nov 24, 2022
c3ba603
fix missing type
jenshnielsen Nov 24, 2022
409ee60
Take into account null column
edumur Nov 24, 2022
de0e21f
Merge branch 'add-feedback' of https://github.com/edumur/Qcodes into …
edumur Nov 24, 2022
84bbf14
extend callback test to cover multiple num params and < 100 rows
jenshnielsen Nov 28, 2022
ede4c4d
Improvement of callback algorithm
edumur Nov 28, 2022
83c828e
Merge branch 'master' into add-feedback
jenshnielsen Nov 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions qcodes/configuration/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class Config:
defaults_schema: DotDict
"""The default schema"""

callback_percent: int = 5
"""If user wants to callback a function while loading data, the callback is
done every callback_percent"""
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved

_diff_config: dict[str, Any] = {}
_diff_schema: dict[str, Any] = {}

Expand Down
5 changes: 4 additions & 1 deletion qcodes/dataset/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ def get_parameter_data(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
callback: Callable | None = None,
) -> ParameterData:
"""
Returns the values stored in the :class:`.DataSet` for the specified parameters
Expand Down Expand Up @@ -801,6 +802,8 @@ def get_parameter_data(
if None
end: end value of selection range (by results count); ignored if
None
callback: Function called during the data loading every
Config.callback_percent.

Returns:
Dictionary from requested parameters to Dict of parameter names
Expand All @@ -813,7 +816,7 @@ def get_parameter_data(
else:
valid_param_names = self._validate_parameters(*params)
return get_parameter_data(self.conn, self.table_name,
valid_param_names, start, end)
valid_param_names, start, end, callback)

def to_pandas_dataframe_dict(
self,
Expand Down
3 changes: 2 additions & 1 deletion qcodes/dataset/data_set_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import warnings
from collections.abc import Mapping, Sized
from enum import Enum
from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Tuple, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Sequence, Tuple, Union

import numpy as np
from typing_extensions import Protocol, TypeAlias, runtime_checkable
Expand Down Expand Up @@ -221,6 +221,7 @@ def get_parameter_data(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
callback: Callable | None = None,
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
) -> ParameterData:
pass

Expand Down
110 changes: 93 additions & 17 deletions qcodes/dataset/sqlite/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
update_where,
)
from qcodes.utils import deprecate, list_of_data_to_maybe_ragged_nd_array
from qcodes.configuration import Config

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,7 +162,7 @@ def get_data(
return [[]]
query = _build_data_query(table_name, columns, start, end)
c = atomic_transaction(conn, query)
res = many_many(c, *columns)
res = many_many(c, columns)

return res

Expand All @@ -172,6 +173,7 @@ def get_parameter_data(
columns: Sequence[str] = (),
start: int | None = None,
end: int | None = None,
callback: Callable | None = None,
) -> dict[str, dict[str, np.ndarray]]:
"""
Get data for one or more parameters and its dependencies. The data
Expand All @@ -197,6 +199,8 @@ def get_parameter_data(
are returned.
start: start of range; if None, then starts from the top of the table
end: end of range; if None, then ends at the bottom of the table
callback: Function called during the data loading every
Config.callback_percent.
"""
rundescriber = get_rundescriber_from_result_table_name(conn, table_name)

Expand All @@ -212,7 +216,8 @@ def get_parameter_data(
rundescriber,
output_param,
start,
end)
end,
callback)
return output


Expand All @@ -223,6 +228,7 @@ def get_shaped_parameter_data_for_one_paramtree(
output_param: str,
start: int | None,
end: int | None,
callback: Callable | None,
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
) -> dict[str, np.ndarray]:
"""
Get the data for a parameter tree and reshape it according to the
Expand All @@ -239,7 +245,8 @@ def get_shaped_parameter_data_for_one_paramtree(
rundescriber,
output_param,
start,
end
end,
callback
)
if rundescriber.shapes is not None:
shape = rundescriber.shapes.get(output_param)
Expand Down Expand Up @@ -285,10 +292,11 @@ def get_parameter_data_for_one_paramtree(
output_param: str,
start: int | None,
end: int | None,
callback: Callable | None,
) -> tuple[dict[str, np.ndarray], int]:
interdeps = rundescriber.interdeps
data, paramspecs, n_rows = _get_data_for_one_param_tree(
conn, table_name, interdeps, output_param, start, end
conn, table_name, interdeps, output_param, start, end, callback
)
if not paramspecs[0].name == output_param:
raise ValueError("output_param should always be the first "
Expand Down Expand Up @@ -384,6 +392,7 @@ def _get_data_for_one_param_tree(
output_param: str,
start: int | None,
end: int | None,
callback: Callable | None,
) -> tuple[list[list[Any]], list[ParamSpecBase], int]:
output_param_spec = interdeps._id_to_paramspec[output_param]
# find all the dependencies of this param
Expand All @@ -396,7 +405,8 @@ def _get_data_for_one_param_tree(
output_param,
*dependency_names,
start=start,
end=end)
end=end,
callback=callback)
n_rows = len(res)
return res, paramspecs, n_rows

Expand Down Expand Up @@ -436,6 +446,7 @@ def get_parameter_tree_values(
*other_param_names: str,
start: int | None = None,
end: int | None = None,
callback: Callable | None = None,
) -> list[list[Any]]:
"""
Get the values of one or more columns from a data table. The rows
Expand All @@ -456,18 +467,62 @@ def get_parameter_tree_values(
end: The (1-indexed) result to include as the last result to be
returned. None is equivalent to "all the rest". If start > end,
nothing is returned.
callback: Function called during the data loading every
Config.callback_percent.

Returns:
A list of list. The outer list index is row number, the inner list
index is parameter value (first toplevel_param, then other_param_names)
"""

cursor = conn.cursor()

offset: int | np.ndarray

offset = max((start - 1), 0) if start is not None else 0
limit = max((end - offset), 0) if end is not None else -1

if start is not None and end is not None and start > end:
limit = 0

# start and end currently not working with callback
if start is None and end is None and callback is not None:

jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
# Since sqlite3 does not allow to keep track of the data loading
# progress, we compute how many sqlite request correspond to
# a progress of Config.callback_percent

# First, we get the number of dependent parameters
rd = get_rundescriber_from_result_table_name(conn, result_table_name)._to_dict()
# New qcodes
if 'interdependencies_' in rd.keys():
nbParamDependent = len(rd['interdependencies_']['dependencies'])
# Old qcodes
else:
nbParamDependent = len([i for i in rd['interdependencies']['paramspecs'] if i['name']==toplevel_param_name][0]['depends_on'])
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved

# Second, we get the number of points
sql_callback_query = f"""
SELECT MAX(id)
FROM "{result_table_name}"
"""
cursor.execute(sql_callback_query, ())
rows = cursor.fetchall()
max_id = int(rows[0]['max(id)'])
nb_point = int(max_id/nbParamDependent)

# Third, we get the number of rows corresponding to a download of
# Config.callback_percent
if nb_point>=100:
limit = int(max_id/100*Config.callback_percent/2)

offset = np.arange(0, nb_point, limit)
# Ensure that the last call gets all the points
if offset[-1]!=nb_point:
offset = np.append(offset, nb_point)

iteration = 100/(len(offset)-1)

# Note: if we use placeholders for the SELECT part, then we get rows
# back that have "?" as all their keys, making further data extraction
# impossible
Expand All @@ -482,15 +537,31 @@ def get_parameter_tree_values(
FROM "{result_table_name}"
WHERE {toplevel_param_name} IS NOT NULL)
"""
sql = f"""
SELECT {columns_for_select}
FROM {sql_subquery}
LIMIT {limit} OFFSET {offset}
"""

cursor = conn.cursor()
cursor.execute(sql, ())
res = many_many(cursor, *columns)
if isinstance(offset, int) and callback is None:
sql = f"""
SELECT {columns_for_select}
FROM {sql_subquery}
LIMIT {limit} OFFSET {offset}
"""

cursor.execute(sql, ())
res = many_many(cursor, columns)
if isinstance(offset, np.ndarray) and callback is not None:

res = []
progress = 0.
callback(progress)
for i in range(len(offset)-1):
sql = f"""
SELECT {columns_for_select}
FROM {sql_subquery}
LIMIT {limit} OFFSET {offset[i]}
"""
cursor.execute(sql)
res = many_many(cursor, columns, res)
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
progress += iteration
callback(progress)

return res

Expand Down Expand Up @@ -800,7 +871,7 @@ def _get_dependencies(conn: ConnectionPlus, layout_id: int) -> list[list[int]]:
SELECT independent, axis_num FROM dependencies WHERE dependent=?
"""
c = atomic_transaction(conn, sql, layout_id)
res = many_many(c, 'independent', 'axis_num')
res = many_many(c, ['independent', 'axis_num'])
return res


Expand Down Expand Up @@ -1404,8 +1475,12 @@ def _get_paramspec(conn: ConnectionPlus,
WHERE parameter="{param_name}" and run_id={run_id}
"""
c = conn.execute(sql)
resp = many(c, 'layout_id', 'run_id', 'parameter', 'label', 'unit',
'inferred_from')
resp = many(c, ['layout_id',
'run_id',
'parameter',
'label',
'unit',
'inferred_from'])
(layout_id, _, _, label, unit, inferred_from_string) = resp

if inferred_from_string:
Expand Down Expand Up @@ -2093,7 +2168,8 @@ def load_new_data_for_rundescriber(
rundescriber=rundescriber,
output_param=meas_parameter,
start=start,
end=None
end=None,
callback=None,
)
new_data_dict[meas_parameter] = new_data
updated_read_status[meas_parameter] = start + n_rows_read - 1
Expand Down
21 changes: 16 additions & 5 deletions qcodes/dataset/sqlite/query_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import itertools
import sqlite3
from typing import Any, List, Mapping, Sequence, Union
from typing import Any, List, Mapping, Sequence, Tuple, Union
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved

import numpy as np
from numpy import ndarray
Expand Down Expand Up @@ -44,7 +44,8 @@ def one(curr: sqlite3.Cursor, column: int | str) -> Any:
return res[0][column]


def many(curr: sqlite3.Cursor, *columns: str) -> list[Any]:
def many(curr: sqlite3.Cursor,
columns: str | List[str] | Tuple[str]) -> list[Any]:
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
"""Get the values of many columns from one row
Args:
curr: cursor to operate on
Expand All @@ -60,19 +61,29 @@ def many(curr: sqlite3.Cursor, *columns: str) -> list[Any]:
return [res[0][c] for c in columns]


def many_many(curr: sqlite3.Cursor, *columns: str) -> list[list[Any]]:
def many_many(curr: sqlite3.Cursor,
columns: str | List[str] | Tuple[str],
jenshnielsen marked this conversation as resolved.
Show resolved Hide resolved
results: list | None = None) -> list[list[Any]]:
"""Get all values of many columns
Args:
curr: cursor to operate on
columns: names of the columns
results: list of list of all values

Returns:
list of lists of values
"""
res = curr.fetchall()
results = []

if isinstance(columns, str):
columns = [columns]

if results is None:
results = []

for r in res:
results.append([r[c] for c in columns])

return results


Expand Down Expand Up @@ -124,7 +135,7 @@ def select_many_where(
{where_column} = ?
"""
cur = atomic_transaction(conn, query, where_value)
res = many(cur, *columns)
res = many(cur, columns)
return res


Expand Down