Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QA data loader for alembic testing #3273

Merged
merged 11 commits into from
May 24, 2018
25 changes: 23 additions & 2 deletions docs/development/database_migrations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ command to see an output of the SQL that will be generated.

alembic upgrade head --sql

Testing Migrations
~~~~~~~~~~~~~~~~~~
Unit Testing Migrations
~~~~~~~~~~~~~~~~~~~~~~~

The test suite already comes with a test runner (``test_alembic.py``) that runs a series of checks
to ensure migration's upgrade and downgrade commands are idempotent and don't break the database.
Expand Down Expand Up @@ -157,3 +157,24 @@ accurately test your migration, and it will likely break during future code chan
should use as few dependencies as possible in your test including other ``securedrop`` code as well
as external packages. This may be a rather annoying requirement, but it will make the tests more
robust aginst future code changes.

Release Testing Migrations
~~~~~~~~~~~~~~~~~~~~~~~~~~

In order to ensure that migrations between from the previous to current version of SecureDrop apply
cleanly in production-like instances, we have a helper script that is designed to load
semi-randomized data into the database. You will need to modify the script ``qa_loader.py`` to
include sample data. This sample data should intentionally include edge cases that might behave
strangely such as data whose nullability is only enforced by the application or missing files.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For release managers and release testers, let's add a bit of docs saying:

  1. Provision staging or prod VMs.
  2. sudo su
  3. cd /var/www/securedrop
  4. ./qa_loader.py


During QA, the release manager should follow these steps to test the migrations.

1. Checkout the previous SecureDrop release
2. Build Debian packages locally
3. Provision staging VMs
4. ``vagrant ssh app-staging``
5. ``sudo su``
6. ``cd /var/www/securedrop && ./qa_loader.py``
7. Checkout the release candidate
8. Re-provision the staging VMs
9. Check that nothing went horribly wrong
1 change: 1 addition & 0 deletions securedrop/.rsync-filter
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include journalist_templates/
include journalist_templates/*.html
include management/
include management/*.py
include qa_loader.py
include requirements/
include requirements/**.txt
include source_app/
Expand Down
11 changes: 6 additions & 5 deletions securedrop/crypto_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
# to fix gpg error #78 on production
os.environ['USERNAME'] = 'www-data'

# safe characters for every possible word in the wordlist includes capital
# letters because codename hashes are base32-encoded with capital letters
DICEWARE_SAFE_CHARS = (' !#%$&)(+*-1032547698;:=?@acbedgfihkjmlonqpsrutwvyxzA'
'BCDEFGHIJKLMNOPQRSTUVWXYZ')


class CryptoException(Exception):
pass
Expand Down Expand Up @@ -232,12 +237,8 @@ def clean(s, also=''):
>>> clean("Helloworld")
'Helloworld'
"""
# safe characters for every possible word in the wordlist includes capital
# letters because codename hashes are base32-encoded with capital letters
ok = (' !#%$&)(+*-1032547698;:=?@acbedgfihkjmlonqpsrutwvyxzABCDEFGHIJ'
'KLMNOPQRSTUVWXYZ')
for c in s:
if c not in ok and c not in also:
if c not in DICEWARE_SAFE_CHARS and c not in also:
raise CryptoException("invalid input: {0}".format(s))
# scrypt.hash requires input of type str. Since the wordlist is all ASCII
# characters, this conversion is not problematic
Expand Down
239 changes: 239 additions & 0 deletions securedrop/qa_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import math
import os
import random
import string
import sys

from argparse import ArgumentParser
from datetime import datetime
from flask import current_app
from os import path
from sqlalchemy import text

from crypto_util import DICEWARE_SAFE_CHARS
from db import db
from journalist_app import create_app
from models import (Journalist, Source, Submission, SourceStar, Reply,
JournalistLoginAttempt)
from sdconfig import config as sdconfig

random.seed('~(=^–^)') # mrow?

JOURNALIST_COUNT = 10
SOURCE_COUNT = 50


def random_bool():
return bool(random.getrandbits(1))


def random_chars(len, nullable, chars=string.printable):
if nullable and random_bool():
return None
else:
return ''.join([random.choice(chars) for _ in range(len)])


def bool_or_none():
return random.choice([True, False, None])


def random_datetime(nullable):
if nullable and random_bool():
return None
else:
return datetime(
year=random.randint(1, 9999),
month=random.randint(1, 12),
day=random.randint(1, 28),
hour=random.randint(0, 23),
minute=random.randint(0, 59),
second=random.randint(0, 59),
microsecond=random.randint(0, 1000),
)


def new_journalist():
# Make a diceware-like password
pw = ' '.join([random_chars(3, nullable=False, chars=DICEWARE_SAFE_CHARS)
for _ in range(7)])
journalist = Journalist(random_chars(random.randint(3, 32),
nullable=False),
pw,
random_bool())
journalist.is_admin = bool_or_none()

journalist.is_totp = bool_or_none()
journalist.hotp_counter = (random.randint(-1000, 1000)
if random_bool() else None)
journalist.created_on = random_datetime(nullable=True)
journalist.last_access = random_datetime(nullable=True)

db.session.add(journalist)


def new_source():
fid_len = random.randint(4, 32)
designation_len = random.randint(4, 32)
source = Source(random_chars(fid_len, nullable=False,
chars=string.ascii_lowercase),
random_chars(designation_len, nullable=False))
source.flagged = bool_or_none()
source.last_updated = random_datetime(nullable=True)
source.pending = False

db.session.add(source)


def new_submission(config, source_id):
source = Source.query.get(source_id)

# A source may have a null fid according to the DB, but this will
# break storage.path.
if source.filesystem_id is None:
return

filename = fake_file(config, source.filesystem_id)
submission = Submission(source, filename)

# For issue #1189
if random_bool():
submission.source_id = None

submission.downloaded = bool_or_none()

db.session.add(submission)


def fake_file(config, source_fid):
source_dir = path.join(config.STORE_DIR, source_fid)
if not path.exists(source_dir):
os.mkdir(source_dir)

filename = random_chars(20, nullable=False, chars=string.ascii_lowercase)
num = random.randint(0, 100)
msg_type = 'msg' if random_bool() else 'doc.gz'
filename = '{}-{}-{}.gpg'.format(num, filename, msg_type)
f_len = int(math.floor(random.expovariate(100000) * 1024 * 1024 * 500))
sub_path = current_app.storage.path(source_fid, filename)
with open(sub_path, 'w') as f:
f.write('x' * f_len)

return filename


def new_source_star(source_id):
source = Source.query.get(source_id)
star = SourceStar(source, bool_or_none())
db.session.add(star)


def new_reply(config, journalist_id, source_id):
source = Source.query.get(source_id)

# A source may have a null fid according to the DB, but this will
# break storage.path.
if source.filesystem_id is None:
return

journalist = Journalist.query.get(journalist_id)
filename = fake_file(config, source.filesystem_id)
reply = Reply(journalist, source, filename)
db.session.add(reply)


def new_journalist_login_attempt(journalist_id):
journalist = Journalist.query.get(journalist_id)
attempt = JournalistLoginAttempt(journalist)
attempt.timestamp = random_datetime(nullable=True)
db.session.add(attempt)


def new_abandoned_submission(config, source_id):
'''For issue #1189'''

source = Source.query.filter(Source.filesystem_id.isnot(None)).all()[0]
filename = fake_file(config, source.filesystem_id)

# Use this as hack to create a real submission then swap out the source_id
submission = Submission(source, filename)
submission.source_id = source_id
db.session.add(submission)
db.session.commit()
delete_source(source_id)


def delete_source(source_id):
'''For issue #1189'''
db.session.execute(text('DELETE FROM sources WHERE id = :source_id'),
{'source_id': source_id})


def positive_int(s):
i = int(s)
if i < 1:
raise ValueError('{} is not >= 1'.format(s))
return i


def load_data(config, multiplier):
app = create_app(config)

with app.app_context():
for _ in range(JOURNALIST_COUNT * multiplier):
new_journalist()
db.session.commit()

for _ in range(SOURCE_COUNT * multiplier):
new_source()
db.session.commit()

for sid in range(1, SOURCE_COUNT * multiplier, 5):
for _ in range(1, multiplier + 1):
new_submission(config, sid)
db.session.commit()

for sid in range(1, SOURCE_COUNT * multiplier, 5):
new_source_star(sid)
db.session.commit()

for jid in range(1, JOURNALIST_COUNT * multiplier, 10):
for sid in range(1, SOURCE_COUNT * multiplier, 10):
for _ in range(1, 3):
new_reply(config, jid, sid)
db.session.commit()

for jid in range(1, JOURNALIST_COUNT * multiplier, 10):
new_journalist_login_attempt(jid)
db.session.commit()

for sid in range(SOURCE_COUNT * multiplier,
SOURCE_COUNT * multiplier + multiplier):
new_abandoned_submission(config, sid)


def arg_parser():
parser = ArgumentParser(
path.basename(__file__),
description='Loads data into the database for testing upgrades')
parser.add_argument('-m', '--multiplier', type=positive_int, default=100,
help=('Factor to multiply the loaded data by '
'(default 100)'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sweet!

return parser


def main():
args = arg_parser().parse_args()
print('Loading data. This make take a while.')
load_data(sdconfig, args.multiplier)


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print('') # for prompt on a newline
sys.exit(1)
2 changes: 1 addition & 1 deletion securedrop/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
gnupg_logger.setLevel(logging.ERROR)
valid_levels = {'INFO': logging.INFO, 'DEBUG': logging.DEBUG}
gnupg_logger.setLevel(
valid_levels.get(os.environ.get('GNUPG_LOG_LEVEL', None), logging.ERROR)
valid_levels.get(os.environ.get('GNUPG_LOG_LEVEL', ''), logging.ERROR)
)


Expand Down
8 changes: 8 additions & 0 deletions securedrop/tests/test_qa_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-

from qa_loader import load_data


def test_load_data(journalist_app, config):
# Use the journalist_app fixture to init the DB
load_data(config, multiplier=1)