Skip to content

Commit

Permalink
Add reading data with snapshot system test
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Aug 8, 2019
1 parent 6bb61e2 commit ad8bef3
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
2 changes: 2 additions & 0 deletions bigquery_storage/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ def system(session):
# Use pre-release gRPC for system tests.
session.install("--pre", "grpcio")

session.install("protobuf")

# Install all test dependencies, then install this package into the
# virtualenv's dist-packages.
session.install("mock", "pytest")
Expand Down
71 changes: 71 additions & 0 deletions bigquery_storage/tests/system/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@
# limitations under the License.
"""System tests for reading rows from tables."""

import datetime as dt
import json
import io

import pytest

from google.cloud import bigquery
from google.cloud import bigquery_storage_v1beta1
from google.protobuf import timestamp_pb2


@pytest.mark.parametrize(
Expand Down Expand Up @@ -145,3 +151,68 @@ def test_column_selection_read(client, project_id, table_with_data_ref, data_for

for row in rows:
assert sorted(row.keys()) == ["age", "first_name"]


def test_snapshot(client, project_id, table_with_data_ref):
before_new_data = timestamp_pb2.Timestamp()
before_new_data.GetCurrentTime()

# load additional data into the table
new_data = [
{u"first_name": u"NewGuyFoo", u"last_name": u"Smith", u"age": 46},
{u"first_name": u"NewGuyBar", u"last_name": u"Jones", u"age": 30},
]
_add_rows(table_with_data_ref, new_data)

# read data using the timestamp before the additional data load
session = client.create_read_session(
table_with_data_ref,
"projects/{}".format(project_id),
format_=bigquery_storage_v1beta1.enums.DataFormat.AVRO,
requested_streams=1,
table_modifiers={"snapshot_time": before_new_data},
)
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
stream=session.streams[0]
)

rows = list(client.read_rows(stream_pos).rows(session))

# verify that only the data before the timestamp was returned
assert len(rows) == 5 # all initial records

for row in rows:
assert "NewGuy" not in row["first_name"] # no new records


def _add_rows(table_ref, new_data):
"""Insert additional rows into an existing table.
Args:
table_ref (bigquery_storage_v1beta1.types.TableReference):
A reference to the target table.
new_data (Iterable[Dict[str, Any]]):
New data to insert with each row represented as a dictionary.
The keys must match the table column names, and the values
must be JSON serializable.
"""
bq_client = bigquery.Client()

job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
)

new_data_str = u"\n".join(json.dumps(item) for item in new_data)
new_data_file = io.BytesIO(new_data_str.encode())

destination_ref = bigquery.table.TableReference.from_api_repr(
{
"projectId": table_ref.project_id,
"datasetId": table_ref.dataset_id,
"tableId": table_ref.table_id,
}
)
job = bq_client.load_table_from_file(
new_data_file, destination=destination_ref, job_config=job_config
)
job.result() # wait for the load to complete

0 comments on commit ad8bef3

Please sign in to comment.