Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spanner: Add Cloud Spanner Backup samples #3101

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 273 additions & 0 deletions spanner/cloud-client/backup_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# Copyright 2019 Google Inc. All Rights Reserved.
larkee marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This application demonstrates how to create and restore from backups
using Cloud Spanner.

For more information, see the README.rst under /spanner.
"""

import argparse
from datetime import (
datetime,
timedelta
)
import time

from google.cloud import spanner


# [START spanner_create_backup]
def create_backup(instance_id, database_id, backup_id):
"""Creates a backup for a database."""
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
database = instance.database(database_id)

# Create a backup
expire_time = datetime.utcnow() + timedelta(days=14)
backup = instance.backup(
backup_id, database=database, expire_time=expire_time)
operation = backup.create()

# Wait for backup operation to complete.
operation.result()

# Verify that the backup is ready.
backup.reload()
assert backup.is_ready() is True

# Get the name, create time and backup size.
backup.reload()
print("Backup {} of size {} bytes was created at {}".format(
backup.name, backup.size_bytes, backup.create_time))
# [END spanner_create_backup]


# [START spanner_restore_database]
def restore_database(instance_id, new_database_id, backup_id):
"""Restores a database from a backup."""
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
# Create a backup on database_id.

# Start restoring backup to a new database.
backup = instance.backup(backup_id)
new_database = instance.database(new_database_id)
operation = new_database.restore(backup)

# Wait for restore operation to complete.
operation.result()

# Newly created database has restore information.
new_database.reload()
restore_info = new_database.restore_info
print("Database {} restored to {} from backup {}.".format(
restore_info.backup_info.source_database,
new_database_id,
restore_info.backup_info.backup))
# [END spanner_restore_database]


# [START spanner_cancel_backup]
def cancel_backup(instance_id, database_id, backup_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
database = instance.database(database_id)

expire_time = datetime.utcnow() + timedelta(days=30)

# Create a backup.
backup = instance.backup(
backup_id, database=database, expire_time=expire_time)
operation = backup.create()

# Cancel backup creation.
operation.cancel()

# Cancel operations are best effort so either it will complete or
# be cancelled.
while not operation.done():
time.sleep(300) # 5 mins

# Deal with resource if the operation succeeded.
if backup.exists():
print("Backup was created before the cancel completed.")
backup.delete()
print("Backup deleted.")
else:
print("Backup creation was successfully cancelled.")
# [END spanner_cancel_backup]


# [START spanner_list_backup_operations]
def list_backup_operations(instance_id, database_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)

# List the CreateBackup operations.
filter_ = (
"(metadata.database:{}) AND "
"(metadata.@type:type.googleapis.com/"
"google.spanner.admin.database.v1.CreateBackupMetadata)"
).format(database_id)
operations = instance.list_backup_operations(filter_=filter_)
for op in operations:
metadata = op.metadata
# List the pending backups on the instance.
if metadata.progress.progress_percent < 100:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized that this actually can't be tested in the samples because we always wait for backups to complete. So we should remove this check and say "List all create backup operations on this database."

That also reminded me that there's no test for this below. Is that intentional?

print("Backup {} on database {} pending: {}% complete.".format(
metadata.name, metadata.database,
metadata.progress.progress_percent))
# [END spanner_list_backup_operations]


# [START spanner_list_database_operations]
def list_database_operations(instance_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)

# List the progress of restore
larkee marked this conversation as resolved.
Show resolved Hide resolved
filter_ = (
"(metadata.@type:type.googleapis.com/"
"google.spanner.admin.database.v1.OptimizeRestoredDatabaseMetadata)"
)
operations = instance.list_database_operations(filter_=filter_)
for op in operations:
print("Database {} restored from backup is {}% optimized.".format(
op.metadata.name, op.metadata.progress.progress_percent))
# [END spanner_list_database_operations]


# [START spanner_list_backups]
def list_backups(instance_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)

# List all backups.
print("All backups:")
for backup in instance.list_backups():
print(backup.name)

# List all backups that contain a name.
print("All backups with backup name containing \"users\":")
for backup in instance.list_backups(filter_="name:users"):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got a question that does this list_backups return any results? I wonder whether or not there is a backup with name:users.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it probably wouldn't because we can't control what people would put in the command line. For Node.js, I'm filtering on the backup ID passed into the function but seems like Python doesn't have that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok so Node.js, PHP and Go are more or less consistent. I've asked Knut to do the same in Java. The changes we ended up making are:

  1. Get the backup ID as input to listBackups and filter on name:backupId.
  2. Database filter was changed to filter on the databaseId given as input.
  3. Expire time filter was changed to expire_time < (time now + 30 days).
  4. size_bytes > X where X depends on the size of the database you're creating in other parts of the test. PHP is using X = 500 and Node.js is using X = 100.
  5. create_time >= (time now - 1 day)

Hope it's ok to make these changes.

print(backup.name)

# List all backups for a database that contains a name.
print("All backups with database name containing \"bank\":")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here. Do we have a database name with bank?

for backup in instance.list_backups(filter_="database:bank"):
print(backup.name)

# List all backups that expire before a timestamp.
print("All backups with expire_time before \"2019-10-18T02:56:53Z\":")
for backup in instance.list_backups(
filter_="expire_time < \"2019-10-18T02:56:53Z\""):
print(backup.name)

# List all backups with a size greater than some bytes.
print("All backups with backup size more than 1000 bytes:")
for backup in instance.list_backups(filter_="size_bytes > 1000"):
print(backup.name)

# List backups that were created after a timestamp that are also ready.
print("All backups created after \"2019-10-18T02:56:53Z\" and are READY:")
for backup in instance.list_backups(filter_=(
"create_time >= \"2019-10-18T02:56:53Z\" AND "
"state:READY")):
print(backup.name)
skuruppu marked this conversation as resolved.
Show resolved Hide resolved
# [END spanner_list_backups]


# [START spanner_delete_backup]
def delete_backup(instance_id, backup_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
backup = instance.backup(backup_id)
backup.reload()

# Wait for databases that reference this backup to finish optimizing
larkee marked this conversation as resolved.
Show resolved Hide resolved
while backup.referencing_databases:
time.sleep(30)
backup.reload()

# Delete the backup.
backup.delete()

# Verify that the backup is deleted.
assert backup.exists() is False
print("Backup {} has been deleted.".format(backup.name))
# [END spanner_delete_backup]


# [START spanner_update_backup]
def update_backup(instance_id, backup_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
backup = instance.backup(backup_id)
backup.reload()

# Expire time must be within 366 days of the create time of the backup.
old_expire_time = backup.expire_time
new_expire_time = old_expire_time + timedelta(days=30)
backup.update_expire_time(new_expire_time)
print("Backup {} expire time was updated from {} to {}.".format(
backup.name, old_expire_time, new_expire_time))
# [END spanner_update_backup]


if __name__ == '__main__': # noqa: C901
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'instance_id', help='Your Cloud Spanner instance ID.')
parser.add_argument(
'--database-id', help='Your Cloud Spanner database ID.',
default='example_db')
parser.add_argument(
'--backup-id', help='Your Cloud Spanner backup ID.',
default='example_backup')

subparsers = parser.add_subparsers(dest='command')
subparsers.add_parser('create_backup', help=create_backup.__doc__)
subparsers.add_parser('cancel_backup', help=cancel_backup.__doc__)
subparsers.add_parser('update_backup', help=update_backup.__doc__)
subparsers.add_parser('restore_database', help=restore_database.__doc__)
subparsers.add_parser('list_backups', help=list_backups.__doc__)
subparsers.add_parser('list_backup_operations', help=list_backup_operations.__doc__)
subparsers.add_parser('list_database_operations',
help=list_database_operations.__doc__)
subparsers.add_parser('delete_backup', help=delete_backup.__doc__)

args = parser.parse_args()

if args.command == 'create_backup':
create_backup(args.instance_id, args.database_id, args.backup_id)
elif args.command == 'cancel_backup':
cancel_backup(args.instance_id, args.database_id, args.backup_id)
elif args.command == 'update_backup':
update_backup(args.instance_id, args.backup_id)
elif args.command == 'restore_database':
restore_database(args.instance_id, args.database_id, args.backup_id)
elif args.command == 'list_backups':
list_backups(args.instance_id)
elif args.command == 'list_backup_operations':
list_backup_operations(args.instance_id, args.database_id)
elif args.command == 'list_database_operations':
list_database_operations(args.instance_id)
elif args.command == 'delete_backup':
delete_backup(args.instance_id, args.backup_id)
else:
print("Command {} did not match expected commands.".format(args.command))
107 changes: 107 additions & 0 deletions spanner/cloud-client/backup_sample_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2019 Google Inc. All Rights Reserved.
larkee marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import spanner
import pytest
import random
import string

import backup_sample


def unique_instance_id():
""" Creates a unique id for the database. """
return 'test-instance-{}'.format(''.join(random.choice(
string.ascii_lowercase + string.digits) for _ in range(5)))


def unique_database_id():
""" Creates a unique id for the database. """
return 'test-db-{}'.format(''.join(random.choice(
string.ascii_lowercase + string.digits) for _ in range(5)))


def unique_backup_id():
""" Creates a unique id for the backup. """
return 'test-backup-{}'.format(''.join(random.choice(
string.ascii_lowercase + string.digits) for _ in range(5)))


INSTANCE_ID = unique_instance_id()
DATABASE_ID = unique_database_id()
RESTORE_DB_ID = unique_database_id()
BACKUP_ID = unique_backup_id()


@pytest.fixture(scope='module')
def spanner_instance():
spanner_client = spanner.Client()
instance_config = '{}/instanceConfigs/{}'.format(
spanner_client.project_name, 'regional-us-central1')
instance = spanner_client.instance(INSTANCE_ID, instance_config)
op = instance.create()
op.result(30) # block until completion
yield instance
instance.delete()


@pytest.fixture(scope='module')
def database(spanner_instance):
""" Creates a temporary database that is removed after testing. """
db = spanner_instance.database(DATABASE_ID)
db.create()
yield db
db.drop()


def test_create_backup(capsys, database):
backup_sample.create_backup(INSTANCE_ID, DATABASE_ID, BACKUP_ID)
out, _ = capsys.readouterr()
assert "Backup " in out
assert (BACKUP_ID + " of size 0 bytes was created at ") in out


def test_restore_database(capsys):
backup_sample.restore_database(INSTANCE_ID, RESTORE_DB_ID, BACKUP_ID)
out, _ = capsys.readouterr()
assert "Database " in out
assert (DATABASE_ID + " restored to ") in out
assert (RESTORE_DB_ID + " from backup ") in out
assert (BACKUP_ID + ".") in out


def test_update_backup(capsys):
backup_sample.update_backup(INSTANCE_ID, BACKUP_ID)
out, _ = capsys.readouterr()
assert "Backup " in out
assert (BACKUP_ID + " expire time was updated from ") in out
assert " to " in out


def test_delete_backup(capsys, spanner_instance):
backup_sample.delete_backup(INSTANCE_ID, BACKUP_ID)
out, _ = capsys.readouterr()
assert "Backup " in out
assert (BACKUP_ID + " has been deleted.") in out


def test_cancel_backup(capsys):
backup_sample.cancel_backup(INSTANCE_ID, DATABASE_ID, BACKUP_ID)
out, _ = capsys.readouterr()
cancel_success = "Backup creation was successfully cancelled." in out
cancel_failure = (
("Backup was created before the cancel completed." in out) and
("Backup deleted." in out)
)
assert cancel_success or cancel_failure
skuruppu marked this conversation as resolved.
Show resolved Hide resolved