Skip to content

Commit

Permalink
Merge pull request #49 from kchason/pre-commit
Browse files Browse the repository at this point in the history
Pre-Commit
  • Loading branch information
dc3-tsd authored Nov 4, 2024
2 parents 76d3c32 + 3c37742 commit 6c0a742
Show file tree
Hide file tree
Showing 73 changed files with 12,481 additions and 5,463 deletions.
20 changes: 12 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ on:
env:
# The Python version for the build jobs as well as the primary one for the test and artifact generation. This MUST be
# in the python-version matrix in the `test` job.
PYTHON_VERSION: "3.13"
PYTHON_VERSION: "3.13"
jobs:
test:
test:
runs-on: ubuntu-latest
strategy:
matrix:
# This allows the pipeline to be run against multiple Python versions. eg. [3.6, 3.7, 3.8, 3.9, 3.10]. This results
# in linting and unit tests running for all listed versions as well as the creation of packages and wheels on
# creation of a tag in Git.
python-version: [ "3.8", "3.10", "3.12", "3.13" ]
python-version: [ "3.9", "3.11", "3.13" ]

steps:
# Get the code from the repository to be packaged
Expand All @@ -35,7 +35,7 @@ jobs:
sudo apt install python3-setuptools
python -m pip install -q --upgrade pip
pip install .
pip install -q flake8 pytest pytest-cov build twine wheel
pip install -q flake8 pytest pytest-cov build twine wheel pre-commit
# Lint the Python code to check for syntax errors
- name: Lint with Flake8
Expand All @@ -45,6 +45,10 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# Run pre-commit on the repository
- name: Pre-Commit
run: pre-commit run -a

# Test the Python unit tests
- name: PyTest
run: |
Expand Down Expand Up @@ -81,7 +85,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

# Install the packages to build the SQLite Dissect package
- name: Prepare Build Environment
run: |
Expand All @@ -105,8 +109,8 @@ jobs:
path: sqlite-dissect-windows-x64-${{ env.PYTHON_VERSION }}-binary.zip

linux-build:
runs-on: ubuntu-latest
runs-on: ubuntu-latest

steps:
# Get the code from the repository to be packaged
- name: Get Repo
Expand Down Expand Up @@ -146,7 +150,7 @@ jobs:
# Build the Sphinx documentation into a PDF for easier distribution
- name: Build Documentation
run: |
pip install -q sphinx
pip install -q sphinx
pip install -q sphinx-rtd-theme
sphinx-build -b html ./docs/source/ ./docs/build/
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,3 @@
/htmlcov
/docs/build
*.coverage

45 changes: 45 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
exclude_types:
- svg
- id: trailing-whitespace
exclude_types:
- svg
- repo: https://github.com/asottile/pyupgrade
rev: v3.18.0
hooks:
- id: pyupgrade
exclude: migrations/
args:
- --py37-plus
- repo: https://github.com/myint/autoflake
rev: v2.3.1
hooks:
- id: autoflake
exclude: ^migrations/
args:
- --remove-all-unused-imports
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]
- id: isort
name: isort (cython)
types: [cython]
args: ["--profile", "black", "--filter-files"]
- id: isort
name: isort (pyi)
types: [pyi]
args: ["--profile", "black", "--filter-files"]
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black

exclude: ^.+.(min|pack).(js|css)|vendor/.+$
44 changes: 22 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SQLite Dissect is a SQLite parser with recovery abilities over SQLite databases
and their accompanying journal files. If no options are set other than the file
name, the default behaviour will be to check for any journal files and print to
the console the output of the SQLite files. The directory of the SQLite file
specified will be searched through to find the associated journal files. If
specified will be searched through to find the associated journal files. If
they are not in the same directory as the specified file, they will not be found
and their location will need to be specified in the command. SQLite carving
will not be done by default. Please see the options below to enable carving.
Expand All @@ -23,7 +23,7 @@ will not be done by default. Please see the options below to enable carving.

| Argument | Description | Example Usage |
|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|
| SQLITE_PATH | The path and filename of the SQLite file or directory to be carved. If a directory is provided, it will recursively search for files with the extensions: `.db`, `.sqlite`, `.sqlite3`. | `sqlite_dissect SQLITE_PATH` |
| SQLITE_PATH | The path and filename of the SQLite file or directory to be carved. If a directory is provided, it will recursively search for files with the extensions: `.db`, `.sqlite`, `.sqlite3`. | `sqlite_dissect SQLITE_PATH` |


#### Optional Arguments:
Expand Down Expand Up @@ -85,18 +85,18 @@ sqlite_dissect [SQLITE_PATH] --signatures -d [OUTPUT_DIRECTORY] -e sqlite --carv
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e sqlite --carve --carve-freelists -b [TABLES]
```

6. Parse a SQLite database file and print the output to a xlsx workbook along with generating signatures and
carving entries. The schema history (schema updates throughout the WAL are included if a WAL file is detected) and
6. Parse a SQLite database file and print the output to a xlsx workbook along with generating signatures and
carving entries. The schema history (schema updates throughout the WAL are included if a WAL file is detected) and
signatures will be printed to standard output. The log level will be set to debug and all log messages will be
output to the specified log file.

```shell
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e xlsx --schema-history --carve --signatures --log-level debug -i [LOG_FILE]
```

7. Parse a SQLite database file along with a specified rollback journal file and send the output to CSV files.
7. Parse a SQLite database file along with a specified rollback journal file and send the output to CSV files.
(CSV is the only output option currently implemented for rollback journal files)

```shell
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e csv --carve -j [ROLLBACK_JOURNAL]
```
Expand Down Expand Up @@ -149,17 +149,17 @@ export SQLD_EXPORT_TYPE="[text, sqlite, case]"
This application focuses on carving by analyzing the allocated content within each of the SQLite
database tables and creating signatures. Where there is no content in the table, the signature
is based off of analyzing the create table statement in the master schema table. The signature
contains the series of possible serial types that can be stored within the file for that table.
contains the series of possible serial types that can be stored within the file for that table.
This signature is then applied to the unallocated content and freeblocks of the table b-tree in
the file. This includes both interior and leaf table b-tree pages for that table. The signatures
the file. This includes both interior and leaf table b-tree pages for that table. The signatures
are only applied to the pages belonging to the particular b-tree page it was generated from due
to initial research showing that the pages when created or pulled from the freelist set are
overwritten with zeros for the unallocated portions. Fragments within the pages can be reported
on but, due to the size (<4 bytes), are not carved. Due to the fact that entries are added into
tables in SQLite from the end of the page and moving toward the beginning, the carving works
in the same manner in order to detect previously partially overwritten entries better. This
in the same manner in order to detect previously partially overwritten entries better. This
carving can also be applied to the set of freelist pages within the SQLite file if specified
but the freelist pages are currently treated as sets of unallocated data with the exception
but the freelist pages are currently treated as sets of unallocated data with the exception
of the freelist page metadata.

The carving process does not currently account for index b-trees as the more pertinent information
Expand All @@ -185,12 +185,12 @@ a full unallocated block and only support export to csv files.
SQLite Dissect can support output to various forms: text, csv, xlsx, and sqlite. Due to certain
constraints on what can be written to some file types, certain modifications need to be made. For
instance, when writing SQLite columns such as row_id that are already going to pre-exist in the table
for export to a SQLite file we need to preface the columns with "sd_" so they will not conflict with
the actual row_id column. This also applies to internal schema objects. If certain SQLite tables are
requested to be written to a SQLite file, than these will be prefaced with "iso_" so they will not
conflict with similar internal schema objects that may already exist in the SQLite file bring written
to. In xlsx or csv, due to a "=" symbol indicating a type of equation, these are prefaced with a " "
character to avoid this issue. More details can be found in the code documentation of the export classes
for export to a SQLite file we need to preface the columns with "sd_" so they will not conflict with
the actual row_id column. This also applies to internal schema objects. If certain SQLite tables are
requested to be written to a SQLite file, than these will be prefaced with "iso_" so they will not
conflict with similar internal schema objects that may already exist in the SQLite file bring written
to. In xlsx or csv, due to a "=" symbol indicating a type of equation, these are prefaced with a " "
character to avoid this issue. More details can be found in the code documentation of the export classes
themselves.

SQLite Dissect opens the file as read only and acts as a read only interpreter when parsing and carving
Expand All @@ -203,10 +203,10 @@ specified for output.
(WAL or rollback) file. Journal files by themselves are not supported yet.

#### Currently not implemented:
1. Signatures and carving are not implemented for "without rowid" tables or indexes. This will not cause an error
1. Signatures and carving are not implemented for "without rowid" tables or indexes. This will not cause an error
but will skip signature generation and carving processes.
2. Signatures and carving are not implemented for virtual tables. This will not cause an error but will skip
signature generation and carving processes. `Note: Even though virtual tables are skipped, virtual tables may
2. Signatures and carving are not implemented for virtual tables. This will not cause an error but will skip
signature generation and carving processes. `Note: Even though virtual tables are skipped, virtual tables may
create other non-virtual tables which are not skipped. Currently nothing ties these tables back to the virtual
table that created them.`
3. Invalidated frames in WAL files are currently skipped and not parsed. `Note: This applies to previous WAL records
Expand Down Expand Up @@ -286,16 +286,16 @@ TODO:
- [ ] Incorporate signature generation input and output files once implemented.
- [ ] Incorporate "store in memory" arguments (currently set to False, more in depth operations may want it True).
- [ ] Implement multiple passes/depths.
- [ ] Test use cases for exempted tables with rollback journal and when combined with specified tables.
- [ ] Check on name vs table_name properties of the master schema entry.
- [ ] Test use cases for exempted tables with rollback journal and when combined with specified tables.
- [ ] Check on name vs table_name properties of the master schema entry.
- [ ] Test cases where the schema changes throughout the WAL file.
- [ ] Investigate handling of virtual and "without rowid" tables when creating table signatures through the interface.
- [ ] Documentation on "without rowid" tables and indexes in references to carving in help documentation.
- [ ] Make sure to address/print unallocated space (especially uncarved) from updated page numbers in commit records.
- [ ] Research if there can be journal files with a zero length database file or zero-length journal files.
- [ ] Research if there can be combinations and of multiple rollback journal and WAL files with the SQLite database.
- [ ] Validate initial research that allocation of freelist pages to a b-tree results in a wipe of the page data.
- [ ] Add additional logging messages to the master schema entries skipped in signature generation.
- [ ] Add additional logging messages to the master schema entries skipped in signature generation.
- [ ] Integrate in the SQLite Forensic Corpus into tests.
- [ ] Look into updating terminology for versioning to timelining.
- [ ] Create PyUnit tests.
Expand Down
45 changes: 33 additions & 12 deletions api_usage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os

import sqlite_dissect.constants as sqlite_constants
import sqlite_dissect.interface as sqlite_interface

Expand All @@ -13,9 +14,11 @@

# Setup logging
logging_level = logging.ERROR
logging_format = '%(levelname)s %(asctime)s [%(pathname)s] %(funcName)s at line %(lineno)d: %(message)s'
logging_date_format = '%d %b %Y %H:%M:%S'
logging.basicConfig(level=logging_level, format=logging_format, datefmt=logging_date_format)
logging_format = "%(levelname)s %(asctime)s [%(pathname)s] %(funcName)s at line %(lineno)d: %(message)s"
logging_date_format = "%d %b %Y %H:%M:%S"
logging.basicConfig(
level=logging_level, format=logging_format, datefmt=logging_date_format
)

# Setup console logging
console_logger = logging.StreamHandler()
Expand Down Expand Up @@ -47,35 +50,53 @@

# Create the write ahead log
wal_file_name = file_name + sqlite_constants.WAL_FILE_POSTFIX
write_ahead_log = sqlite_interface.create_write_ahead_log(wal_file_name) if os.path.exists(wal_file_name) else None
write_ahead_log = (
sqlite_interface.create_write_ahead_log(wal_file_name)
if os.path.exists(wal_file_name)
else None
)

# Create the version history
version_history = sqlite_interface.create_version_history(database, write_ahead_log)

# Create the signature we are interested in carving
table_signature = sqlite_interface.create_table_signature(table_name, database, version_history)
table_signature = sqlite_interface.create_table_signature(
table_name, database, version_history
)

# Account for "without rowid"/virtual table signatures until supported
if not table_signature:
print("Table signature not supported (\"without rowid\" table or virtual table)")
print('Table signature not supported ("without rowid" table or virtual table)')
exit(0)

# Get the column indices of the columns we are interested in
column_name_indices = {}
for column_name in column_names:
column_name_indices[column_name] = sqlite_interface.get_column_index(column_name, table_name, version_history)
column_name_indices[column_name] = sqlite_interface.get_column_index(
column_name, table_name, version_history
)

# Get a version history iterator for the table
carve_freelists = True
table_history_iterator = sqlite_interface.get_version_history_iterator(table_name, version_history,
table_signature, carve_freelists)
table_history_iterator = sqlite_interface.get_version_history_iterator(
table_name, version_history, table_signature, carve_freelists
)
# Iterate through the commits in the history for this table
for commit in table_history_iterator:
# The table was only modified if the commit was updated for this table and make sure there were carved cells
if commit.updated and commit.carved_cells:
carved_cells = commit.carved_cells
for carved_cell in carved_cells.itervalues():
for column_name in column_name_indices.keys():
record_column = carved_cell.payload.record_columns[column_name_indices.get(column_name)]
print("Commit version: %s table record column: %s has serial type: %s with value of: \"%s\"." %\
(commit.version_number, column_name, record_column.serial_type, record_column.value))
record_column = carved_cell.payload.record_columns[
column_name_indices.get(column_name)
]
print(
'Commit version: %s table record column: %s has serial type: %s with value of: "%s".'
% (
commit.version_number,
column_name,
record_column.serial_type,
record_column.value,
)
)
28 changes: 14 additions & 14 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,32 @@
import os
import sys

sys.path.insert(0, os.path.abspath('../..'))
sys.path.insert(0, os.path.abspath("../.."))

# -- Project information -----------------------------------------------------

project = 'DC3 SQLite Dissect'
copyright = '2022, Department of Defense Cyber Crime Center (DC3)'
author = 'Department of Defense Cyber Crime Center (DC3)'
project = "DC3 SQLite Dissect"
copyright = "2022, Department of Defense Cyber Crime Center (DC3)"
author = "Department of Defense Cyber Crime Center (DC3)"

# The full version, including alpha/beta/rc tags
release = '1.0.0'
release = "1.0.0"

# -- General configuration ---------------------------------------------------
master_doc = 'index'
master_doc = "index"

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.coverage',
'sphinx.ext.napoleon'
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.coverage",
"sphinx.ext.napoleon",
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand All @@ -50,15 +50,15 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]

# These paths are either relative to html_static_path
# or fully qualified paths (eg. https://...)
html_css_files = [
'docs.css',
"docs.css",
]
Loading

0 comments on commit 6c0a742

Please sign in to comment.