Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract and display original document filenames #452

Merged
merged 4 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ mypy: ## Run static type checker
securedrop_client/resources/__init__.py \
securedrop_client/storage.py \
securedrop_client/queue.py \
securedrop_client/api_jobs/__init__.py \
securedrop_client/api_jobs/base.py \
securedrop_client/api_jobs/downloads.py \
securedrop_client/api_jobs/__init__.py \
securedrop_client/api_jobs/base.py \
securedrop_client/api_jobs/downloads.py \
securedrop_client/api_jobs/uploads.py

.PHONY: clean
Expand Down Expand Up @@ -56,7 +56,7 @@ safety: ## Runs `safety check` to check python dependencies for vulnerabilities
.PHONY: bandit
bandit: ## Run bandit with medium level excluding test-related folders
pip install --upgrade pip && \
pip install --upgrade bandit && \
pip install --upgrade bandit && \
bandit -ll --recursive . --exclude ./tests,./.venv

.PHONY: check
Expand Down
31 changes: 31 additions & 0 deletions alembic/versions/bafdcae12f97_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Add files.original_filename

Revision ID: bafdcae12f97
Revises: fecf1191b6f0
Create Date: 2019-06-24 13:45:47.239212

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "bafdcae12f97"
down_revision = "fecf1191b6f0"
branch_labels = None
depends_on = None


def upgrade():
op.add_column(
"files",
sa.Column("original_filename", sa.String(length=255), nullable=False, server_default=""),
)

conn = op.get_bind()
conn.execute("""UPDATE files SET original_filename = replace(filename, '.gz.gpg', '');""")


def downgrade():
with op.batch_alter_table('files', schema=None) as batch_op:
batch_op.drop_column('original_filename')
27 changes: 20 additions & 7 deletions securedrop_client/api_jobs/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def call_download_api(self, api: API, db_object: Union[File, Message]) -> Tuple[
'''
raise NotImplementedError

def call_decrypt(self, filepath: str, session: Session = None) -> None:
def call_decrypt(self, filepath: str, session: Session = None) -> str:
'''
Method for decrypting the file and storing the plaintext result.

Returns the original filename.

This MUST raise an exception if and only if the decryption fails.
'''
raise NotImplementedError
Expand Down Expand Up @@ -105,8 +107,10 @@ def _decrypt(self,
Decrypt the file located at the given filepath and mark it as decrypted.
'''
try:
self.call_decrypt(filepath, session)
mark_as_decrypted(type(db_object), db_object.uuid, session)
original_filename = self.call_decrypt(filepath, session)
mark_as_decrypted(
type(db_object), db_object.uuid, session, original_filename=original_filename
)
logger.info("File decrypted: {}".format(os.path.basename(filepath)))
except CryptoError as e:
mark_as_decrypted(type(db_object), db_object.uuid, session, is_decrypted=False)
Expand Down Expand Up @@ -166,14 +170,16 @@ def call_download_api(self, api: API, db_object: Reply) -> Tuple[str, str]:
sdk_object.source_uuid = db_object.source.uuid
return api.download_reply(sdk_object)

def call_decrypt(self, filepath: str, session: Session = None) -> None:
def call_decrypt(self, filepath: str, session: Session = None) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be helpful to update the comment for this method in the DownloadJob base class to explain what kind of string it should return since calling decrypt will return both the original filename and filepath strings. It would be helpful to know that we want to get the decrypted original filename so we can store it in the local db.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. Whatever we decide to return from decrypt_submission_or_reply should be explained here.

'''
Override DownloadJob.

Decrypt the file located at the given filepath and store its plaintext content in the local
database.

The file containing the plaintext should be deleted once the content is stored in the db.

The return value is an empty string; replies have no original filename.
'''
with NamedTemporaryFile('w+') as plaintext_file:
self.gpg.decrypt_submission_or_reply(filepath, plaintext_file.name, is_doc=False)
Expand All @@ -182,6 +188,7 @@ def call_decrypt(self, filepath: str, session: Session = None) -> None:
uuid=self.uuid,
session=session,
content=plaintext_file.read())
return ""


class MessageDownloadJob(DownloadJob):
Expand Down Expand Up @@ -209,14 +216,16 @@ def call_download_api(self, api: API, db_object: Message) -> Tuple[str, str]:
sdk_object.filename = db_object.filename
return api.download_submission(sdk_object)

def call_decrypt(self, filepath: str, session: Session = None) -> None:
def call_decrypt(self, filepath: str, session: Session = None) -> str:
'''
Override DownloadJob.

Decrypt the file located at the given filepath and store its plaintext content in the local
database.

The file containing the plaintext should be deleted once the content is stored in the db.

The return value is an empty string; messages have no original filename.
'''
with NamedTemporaryFile('w+') as plaintext_file:
self.gpg.decrypt_submission_or_reply(filepath, plaintext_file.name, is_doc=False)
Expand All @@ -225,6 +234,7 @@ def call_decrypt(self, filepath: str, session: Session = None) -> None:
uuid=self.uuid,
session=session,
content=plaintext_file.read())
return ""


class FileDownloadJob(DownloadJob):
Expand Down Expand Up @@ -252,7 +262,7 @@ def call_download_api(self, api: API, db_object: File) -> Tuple[str, str]:
sdk_object.filename = db_object.filename
return api.download_submission(sdk_object)

def call_decrypt(self, filepath: str, session: Session = None) -> None:
def call_decrypt(self, filepath: str, session: Session = None) -> str:
'''
Override DownloadJob.

Expand All @@ -264,4 +274,7 @@ def call_decrypt(self, filepath: str, session: Session = None) -> None:
'''
fn_no_ext, _ = os.path.splitext(os.path.splitext(os.path.basename(filepath))[0])
plaintext_filepath = os.path.join(self.data_dir, fn_no_ext)
self.gpg.decrypt_submission_or_reply(filepath, plaintext_filepath, is_doc=True)
original_filename = self.gpg.decrypt_submission_or_reply(
filepath, plaintext_filepath, is_doc=True
)
return original_filename
45 changes: 43 additions & 2 deletions securedrop_client/crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
import os
import shutil
import struct
import subprocess
import tempfile

Expand All @@ -37,8 +38,44 @@ class CryptoError(Exception):
pass


class GpgHelper:
GZIP_FILE_IDENTIFICATION = b"\037\213"
GZIP_FLAG_EXTRA_FIELDS = 4 # gzip.FEXTRA
GZIP_FLAG_FILENAME = 8 # gzip.FNAME


def read_gzip_header_filename(filename: str) -> str:
"""
Extract the original filename from the header of a gzipped file.

Adapted from Python's gzip._GzipReader._read_gzip_header.
"""
original_filename = ""
with open(filename, "rb") as f:
gzip_header_identification = f.read(2)
if gzip_header_identification != GZIP_FILE_IDENTIFICATION:
raise OSError("Not a gzipped file (%r)" % gzip_header_identification)

(gzip_header_compression_method, gzip_header_flags, _) = struct.unpack("<BBIxx", f.read(8))
if gzip_header_compression_method != 8:
raise OSError("Unknown compression method")

if gzip_header_flags & GZIP_FLAG_EXTRA_FIELDS:
extra_len, = struct.unpack("<H", f.read(2))
f.read(extra_len)

if gzip_header_flags & GZIP_FLAG_FILENAME:
fb = b""
while True:
s = f.read(1)
if not s or s == b"\000":
break
fb += s
original_filename = str(fb, "utf-8")

return original_filename


class GpgHelper:
def __init__(self, sdc_home: str, session_maker: scoped_session, is_qubes: bool) -> None:
'''
:param sdc_home: Home directory for the SecureDrop client
Expand All @@ -56,6 +93,9 @@ def decrypt_submission_or_reply(self,
filepath: str,
plaintext_filepath: str,
is_doc: bool = False) -> str:

original_filename, _ = os.path.splitext(os.path.splitext(os.path.basename(filepath))[0])

err = tempfile.NamedTemporaryFile(suffix=".message-error", delete=False)
with tempfile.NamedTemporaryFile(suffix=".message") as out:
cmd = self._gpg_cmd_base()
Expand Down Expand Up @@ -84,12 +124,13 @@ def decrypt_submission_or_reply(self,

# Store the plaintext in the file located at the specified plaintext_filepath
if is_doc:
original_filename = read_gzip_header_filename(out.name)
with gzip.open(out.name, 'rb') as infile, open(plaintext_filepath, 'wb') as outfile:
shutil.copyfileobj(infile, outfile)
else:
shutil.copy(out.name, plaintext_filepath)

return plaintext_filepath # This return is just used by tests and should be removed
return original_filename

def _gpg_cmd_base(self) -> list:
if self.is_qubes: # pragma: no cover
Expand Down
10 changes: 10 additions & 0 deletions securedrop_client/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ class File(Base):
id = Column(Integer, primary_key=True)
uuid = Column(String(36), unique=True, nullable=False)
filename = Column(String(255), nullable=False)

# Files from the SecureDrop journalist API are gzipped, then
# encrypted with GPG. The gzip header contains the original
# filename, which makes it easier for the client to open the file
# with the right application. We'll record that filename here
# after we've downloaded, decrypted and extracted the file.
# If the header does not contain the filename for some reason,
# this should be the same as filename.
original_filename = Column(String(255), nullable=False, server_default="")

file_counter = Column(Integer, nullable=False)
size = Column(Integer, nullable=False)
download_url = Column(String(255), nullable=False)
Expand Down
2 changes: 1 addition & 1 deletion securedrop_client/gui/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,7 @@ def update(self) -> None:
icon.setPixmap(load_image('file.png'))

if self.file.is_downloaded:
description = QLabel("Open")
description = QLabel(html.escape(self.file.original_filename or self.file.filename))
else:
human_filesize = humanize_filesize(self.file.size)
description = QLabel("Download ({})".format(human_filesize))
Expand Down
8 changes: 6 additions & 2 deletions securedrop_client/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,19 +528,23 @@ def on_file_open(self, file_uuid: str) -> None:
file = self.get_file(file_uuid)
fn_no_ext, _ = os.path.splitext(os.path.splitext(file.filename)[0])
submission_filepath = os.path.join(self.data_dir, fn_no_ext)
original_filepath = os.path.join(self.data_dir, file.original_filename)

if os.path.exists(original_filepath):
os.remove(original_filepath)
os.link(submission_filepath, original_filepath)
if self.proxy:
# Running on Qubes.
command = "qvm-open-in-vm"
args = ['$dispvm:sd-svs-disp', submission_filepath]
args = ['$dispvm:sd-svs-disp', original_filepath]

# QProcess (Qt) or Python's subprocess? Who cares? They do the
# same thing. :-)
process = QProcess(self)
process.start(command, args)
else: # pragma: no cover
# Non Qubes OS. Just log the event for now.
logger.info('Opening file "{}".'.format(submission_filepath))
logger.info('Opening file "{}".'.format(original_filepath))

def on_submission_download(
self,
Expand Down
34 changes: 25 additions & 9 deletions securedrop_client/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,21 @@ def mark_as_decrypted(
model_type: Union[Type[File], Type[Message], Type[Reply]],
uuid: str,
session: Session,
is_decrypted: bool = True
is_decrypted: bool = True,
original_filename: str = None
) -> None:
"""
Mark object as downloaded in the database.
"""
db_obj = session.query(model_type).filter_by(uuid=uuid).one()
db_obj.is_decrypted = is_decrypted

if model_type == File:
if original_filename:
db_obj.original_filename = original_filename
else:
db_obj.original_filename = db_obj.filename

session.add(db_obj)
session.commit()

Expand All @@ -397,18 +405,26 @@ def delete_single_submission_or_reply_on_disk(obj_db: Union[File, Message, Reply
"""
Delete on disk a single submission or reply.
"""
files_to_delete = []
try:
if obj_db.original_filename:
files_to_delete.append(os.path.join(data_dir, obj_db.original_filename))
except AttributeError:
# only files have it
pass

filename_without_extensions = obj_db.filename.split('.')[0]
files_to_delete = os.path.join(
file_glob_pattern = os.path.join(
data_dir,
'{}*'.format(filename_without_extensions))
logging.info('Deleting all {} files'.format(filename_without_extensions))
'{}*'.format(filename_without_extensions)
)
files_to_delete.extend(glob.glob(file_glob_pattern))

try:
for file_to_delete in glob.glob(files_to_delete):
for file_to_delete in files_to_delete:
try:
os.remove(file_to_delete)
except FileNotFoundError:
logging.info(
'File {} already deleted, skipping'.format(file_to_delete))
except FileNotFoundError:
logging.info('File %s already deleted, skipping', file_to_delete)


def rename_file(data_dir: str, filename: str, new_filename: str) -> None:
Expand Down
Loading