diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml index b32268f..5d73a75 100644 --- a/.github/workflows/unit-testing.yml +++ b/.github/workflows/unit-testing.yml @@ -5,4 +5,5 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 + - run: sudo apt-get update && sudo apt-get install -y python3-mysql.connector python3-requests - run: python3 -m unittest diff --git a/.github/workflows/validate-code-styles.yml b/.github/workflows/validate-code-styles.yml index 60ba8d9..549118b 100644 --- a/.github/workflows/validate-code-styles.yml +++ b/.github/workflows/validate-code-styles.yml @@ -2,9 +2,10 @@ name: "Validate Code Styles" on: [push] jobs: flake8: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 - - run: sudo apt-get install -y flake8 python3-pip - - run: pip3 install flake8-bugbear - - run: flake8 --require-plugins pycodestyle,flake8-bugbear + - run: sudo apt-get update && sudo apt-get install -y python3-full + - run: python3 -m venv ./venv + - run: ./venv/bin/pip install flake8 flake8-bugbear + - run: ./venv/bin/python -m flake8 --exclude venv --require-plugins pycodestyle,flake8-bugbear diff --git a/README.md b/README.md index 857bdab..4ce0e53 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ If you'd like to install Wordfence CLI manually or use CLI for development, you - Python packages: - `packaging` >= 21.0 - `requests` >= 2.3 + - `mysql-connector-python` >= 8.0 ### Obtaining a license diff --git a/pyproject.toml b/pyproject.toml index b67f6bc..61f1406 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,8 @@ classifiers = [ ] dependencies = [ "packaging>=21.0", - "requests>=2.3" + "requests>=2.3", + "mysql-connector-python>=8.0" ] dynamic = [ "version" ] diff --git a/requirements.txt b/requirements.txt index a2c0854..86309c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # Runtime dependencies packaging >= 21.0 requests >= 2.3 +mysql-connector-python >= 8.0 # Build requirements build ~= 0.10 setuptools ~= 68.0 diff --git a/wordfence/api/noc1.py b/wordfence/api/noc1.py index 7b3f755..8b5fb94 100644 --- a/wordfence/api/noc1.py +++ b/wordfence/api/noc1.py @@ -9,6 +9,8 @@ from ..intel.signatures import CommonString, Signature, SignatureSet, \ PrecompiledSignatureSet, deserialize_precompiled_signature_set +from ..intel.database_rules import DatabaseRuleSet, JSON_VALIDATOR as \ + DATABASE_RULES_JSON_VALIDATOR, parse_database_rules from ..util.validation import DictionaryValidator, ListValidator, Validator, \ OptionalValueValidator from ..util.platform import Platform @@ -258,3 +260,11 @@ def get_wp_file_content( body=parameters ) return response + + def get_database_rules(self) -> DatabaseRuleSet: + response = self.request('get_database_rules') + validator = DictionaryValidator({ + 'rules': DATABASE_RULES_JSON_VALIDATOR + }) + self.validate_response(response, validator) + return parse_database_rules(response['rules'], pre_validated=True) diff --git a/wordfence/cli/config/base_config_definitions.py b/wordfence/cli/config/base_config_definitions.py index 7a34ef4..3cbce52 100644 --- a/wordfence/cli/config/base_config_definitions.py +++ b/wordfence/cli/config/base_config_definitions.py @@ -86,9 +86,7 @@ "if supported using the STARTTLS SMTP command.", "context": "ALL", "argument_type": "OPTION", - "meta": { - "valid_options": [mode.value for mode in SmtpTlsMode] - }, + "meta": {"valid_options": [mode.value for mode in SmtpTlsMode]}, "default": SmtpTlsMode.STARTTLS.value, "category": "Email" }, diff --git a/wordfence/cli/countsites/definition.py b/wordfence/cli/countsites/definition.py index 5dde9db..7afa179 100644 --- a/wordfence/cli/countsites/definition.py +++ b/wordfence/cli/countsites/definition.py @@ -20,7 +20,7 @@ "default_type": "base64" }, "require-path": { - "description": "When enabled, invoking the remediate command without " + "description": "When enabled, invoking the count command without " "specifying at least one path will trigger an error. " "This is the default behavior when running in a " "terminal.", diff --git a/wordfence/cli/dbscan/dbscan.py b/wordfence/cli/dbscan/dbscan.py new file mode 100644 index 0000000..c2910e1 --- /dev/null +++ b/wordfence/cli/dbscan/dbscan.py @@ -0,0 +1,209 @@ +from wordfence.wordpress.database import WordpressDatabase, \ + WordpressDatabaseServer, DEFAULT_PORT, DEFAULT_COLLATION +from wordfence.wordpress.site import WordpressLocator, \ + WordpressSite +from wordfence.wordpress.exceptions import WordpressException +from wordfence.intel.database_rules import DatabaseRuleSet, load_database_rules +from wordfence.databasescanning.scanner import DatabaseScanner +from wordfence.util.validation import ListValidator, DictionaryValidator, \ + OptionalValueValidator +from wordfence.util import caching +from getpass import getpass +from typing import Optional, List, Generator +import os +import json + +from ...logging import log +from ..subcommands import Subcommand +from ..io import IoManager +from ..exceptions import ConfigurationException +from ..config import not_set_token + +from .reporting import DatabaseScanReportManager + + +class DbScanSubcommand(Subcommand): + + def _resolve_password(self) -> Optional[str]: + if self.config.password is not None: + log.warning( + 'Providing passwords via command line parameters is ' + 'insecure as they can be exposed to other users' + ) + return self.config.password + elif self.config.prompt_for_password: + return getpass() + return os.environ.get(self.config.password_env) + + def _get_base_database(self) -> Optional[WordpressDatabase]: + name = self.config.database_name + if name is None: + return None + server = WordpressDatabaseServer( + host=self.config.host, + port=self.config.port, + user=self.config.user, + password=self._resolve_password() + ) + return WordpressDatabase( + name=name, + server=server, + collation=self.config.collation + ) + + def _get_search_paths( + self, + io_manager: IoManager, + include_current: bool = False + ) -> Generator[bytes, None, None]: + if len(self.config.trailing_arguments): + yield from self.config.trailing_arguments + elif include_current and not io_manager.should_read_stdin(): + yield os.fsencode(os.getcwd()) + if io_manager.should_read_stdin(): + for path in io_manager.get_input_reader().read_all_entries(): + yield path + + def _locate_site_databases( + self, + io_manager: IoManager + ) -> Generator[WordpressDatabase, None, None]: + for path in self._get_search_paths(io_manager, include_current=True): + locator = WordpressLocator( + path=path, + allow_nested=self.config.allow_nested, + allow_io_errors=self.config.allow_io_errors + ) + for core_path in locator.locate_core_paths(): + site = WordpressSite(core_path) + log.debug( + 'Located WordPress site at ' + os.fsdecode(core_path) + ) + try: + database = site.get_database() + yield database + except WordpressException: + if self.config.allow_io_errors: + log.warning( + 'Failed to extract database credentials ' + 'for site at ' + os.fsdecode(core_path) + ) + else: + raise + + def _get_json_validator(self) -> ListValidator: + return ListValidator( + DictionaryValidator({ + 'name': str, + 'user': str, + 'password': str, + 'host': str, + 'port': OptionalValueValidator(int), + 'collation': OptionalValueValidator(str) + }, optional_keys={'port', 'collation'}) + ) + + def _parse_configured_databases( + self, + io_manager: IoManager + ) -> Generator[WordpressDatabase, None, None]: + validator = self._get_json_validator() + for path in self._get_search_paths(io_manager): + with open(path, 'rb') as file: + configList = json.load(file) + validator.validate(configList) + for config in configList: + try: + port = config['port'] + except KeyError: + port = DEFAULT_PORT + try: + collation = config['collation'] + except KeyError: + collation = DEFAULT_COLLATION + yield WordpressDatabase( + name=config['name'], + server=WordpressDatabaseServer( + host=config['host'], + port=port, + user=config['user'], + password=config['password'] + ), + collation=collation + ) + + def _get_databases( + self, + io_manager: IoManager + ) -> List[WordpressDatabase]: + databases = [] + base = self._get_base_database() + if base is not None: + databases.append(base) + generator = self._locate_site_databases(io_manager) if \ + self.config.locate_sites else \ + self._parse_configured_databases(io_manager) + for database in generator: + databases.append(database) + return databases + + def _load_remote_rules(self) -> DatabaseRuleSet: + + def fetch_rules() -> DatabaseRuleSet: + client = self.context.get_noc1_client() + return client.get_database_rules() + + cacheable = caching.Cacheable( + 'database_rules', + fetch_rules, + caching.DURATION_ONE_DAY + ) + + return cacheable.get(self.cache) + + def _filter_rules(self, rule_set: DatabaseRuleSet) -> None: + included = None + if self.config.include_rules: + included = set(self.config.include_rules) + excluded = None + if self.config.exclude_rules: + excluded = set(self.config.exclude_rules) + rule_set.filter_rules(included, excluded) + + def _load_rules(self) -> DatabaseRuleSet: + rule_set = self._load_remote_rules() \ + if self.config.use_remote_rules \ + else DatabaseRuleSet() + if self.config.rules_file is not not_set_token: + for rules_file in self.config.rules_file: + load_database_rules(rules_file, rule_set) + self._filter_rules(rule_set) + return rule_set + + def invoke(self) -> int: + report_manager = DatabaseScanReportManager(self.context) + io_manager = report_manager.get_io_manager() + rule_set = self._load_rules() + scanner = DatabaseScanner(rule_set) + with report_manager.open_output_file() as output_file: + report = report_manager.initialize_report(output_file) + for database in self._get_databases(io_manager): + for result in scanner.scan(database): + report.add_result(result) + report.database_count = scanner.scan_count + report.complete() + if self.context.requires_input(self.config.require_database) \ + and scanner.scan_count == 0: + raise ConfigurationException( + 'At least one database to scan must be specified' + ) + elapsed_time = round(scanner.get_elapsed_time()) + log.info( + f'Found {report.result_count} result(s) after scanning ' + f'{scanner.scan_count} database(s) over {elapsed_time} ' + 'second(s)' + ) + return 0 + + +factory = DbScanSubcommand diff --git a/wordfence/cli/dbscan/definition.py b/wordfence/cli/dbscan/definition.py new file mode 100644 index 0000000..683a8e0 --- /dev/null +++ b/wordfence/cli/dbscan/definition.py @@ -0,0 +1,200 @@ +from wordfence.wordpress.database import DEFAULT_HOST, DEFAULT_PORT, \ + DEFAULT_USER, DEFAULT_PREFIX, DEFAULT_COLLATION + +from ..subcommands import SubcommandDefinition, UsageExample +from ..config.typing import ConfigDefinitions + +from .reporting import DATABASE_SCAN_REPORT_CONFIG_OPTIONS + +config_definitions: ConfigDefinitions = { + "host": { + "short_name": "H", + "description": "The database hostname", + "context": "CLI", + "argument_type": "OPTION", + "default": DEFAULT_HOST, + "category": "Database Connectivity" + }, + "port": { + "short_name": "P", + "description": "The database port", + "context": "CLI", + "argument_type": "OPTION", + "default": DEFAULT_PORT, + "meta": { + "value_type": int + }, + "category": "Database Connectivity" + }, + "user": { + "short_name": "u", + "description": "The database user", + "context": "CLI", + "argument_type": "OPTION", + "default": DEFAULT_USER, + "category": "Database Connectivity" + }, + "password": { + "description": "The database password (this option is insecure)", + "context": "CLI", + "argument_type": "OPTION", + "default": None, + "category": "Database Connectivity" + }, + "prompt-for-password": { + "short_name": "p", + "description": "Prompt for a database password", + "context": "CLI", + "argument_type": "FLAG", + "default": False, + "category": "Database Connectivity" + }, + "password-env": { + "description": "The environment variable name to check for a password", + "context": "ALL", + "argument_type": "OPTION", + "default": "WFCLI_DB_PASSWORD", + "category": "Database Connectivity" + }, + "prefix": { + "short_name": "x", + "description": "The WordPress database prefix", + "context": "CLI", + "argument_type": "OPTION", + "default": DEFAULT_PREFIX, + "category": "Database Connectivity" + }, + "database-name": { + "short_name": "D", + "description": "The MySQL database name", + "context": "CLI", + "argument_type": "OPTION", + "default": None, + "category": "Database Connectivity" + }, + "collation": { + "short_name": "C", + "description": "The collation to use when connecting to MySQL", + "context": "CLI", + "argument_type": "OPTION", + "default": DEFAULT_COLLATION + }, + "read-stdin": { + "description": "Read paths from stdin. If not specified, paths will " + "automatically be read from stdin when input is not " + "from a TTY.", + "context": "ALL", + "argument_type": "OPTIONAL_FLAG", + "default": None + }, + "path-separator": { + "short_name": "s", + "description": "Separator used to delimit paths when reading from " + "stdin. Defaults to the null byte.", + "context": "ALL", + "argument_type": "OPTION", + "default": "AA==", + "default_type": "base64" + }, + **DATABASE_SCAN_REPORT_CONFIG_OPTIONS, + "require-database": { + "description": "When enabled, invoking the db-scan command without " + "specifying at least one database will trigger an " + "error. This is the default behavior when running in " + "a terminal.", + "context": "CLI", + "argument_type": "OPTIONAL_FLAG", + "default": None + }, + "locate-sites": { + "short_name": "S", + "description": ( + "Automatically locate WordPress config files to extract " + "database connection details" + ), + "context": "CLI", + "argument_type": "FLAG", + "default": None, + "category": "Site Location" + }, + "allow-nested": { + "description": "Allow WordPress installations nested below other " + "installations to be identified as targets for " + "database scanning", + "context": "ALL", + "argument_type": "FLAG", + "default": True, + "category": "Site Location" + }, + "allow-io-errors": { + "description": "Allow scanning to continue even if an IO error occurs" + "while locating WordPress sites. Sites that cannot " + "be identified due to IO errors will be excluded from " + "scanning. This is the default behavior.", + "context": "ALL", + "argument_type": "FLAG", + "default": True, + "category": "Site Location" + }, + "use-remote-rules": { + "description": "If enabled, scanning rules will be pulled from " + "the Wordfence API", + "context": "ALL", + "argument_type": "FLAG", + "default": True + }, + "rules-file": { + "short_name": "R", + "description": "Path to a JSON file containing scanning rules", + "context": "ALL", + "argument_type": "OPTION_REPEATABLE", + "meta": { + "accepts_file": True + } + }, + "exclude-rules": { + "short_name": "e", + "description": "Specify rule IDs to ignore when scanning. May be " + "comma-delimited and/or repeated.", + "context": "ALL", + "argument_type": "OPTION_REPEATABLE", + "default": None, + "meta": { + "separator": ",", + "value_type": int + } + }, + "include-rules": { + "short_name": "i", + "description": "Specify rule IDs to include when scanning. May be " + "comma-delimited and/or repeated.", + "context": "ALL", + "argument_type": "OPTION_REPEATABLE", + "default": None, + "meta": { + "separator": ",", + "value_type": int + } + } +} + +examples = [ + UsageExample( + 'Scan the WordPress database at db.example.com', + 'wordfence db-scan -h db.example.com -p wordpress' + ) +] + +definition = SubcommandDefinition( + name='db-scan', + usage='[OPTIONS] [DATABASE_CONFIG_PATH or WORDPRESS_INSTALLATION_PATH]...', + description='Scan for malicious content in a WordPress databases', + config_definitions=config_definitions, + config_section='DB_SCAN', + cacheable_types={ + 'wordfence.intel.database_rules.DatabaseRuleSet', + 'wordfence.intel.database_rules.DatabaseRule' + }, + examples=examples, + accepts_directories=True +) diff --git a/wordfence/cli/dbscan/reporting.py b/wordfence/cli/dbscan/reporting.py new file mode 100644 index 0000000..c3af162 --- /dev/null +++ b/wordfence/cli/dbscan/reporting.py @@ -0,0 +1,151 @@ +from typing import List, Optional, Dict + +from wordfence.databasescanning.scanner import DatabaseScanResult +from wordfence.util.terminal import Color, escape, RESET +from wordfence.util.json import safe_json_encode +from ..reporting import ReportManager, ReportColumnEnum, ReportFormatEnum, \ + ReportRecord, Report, ReportFormat, ReportColumn, ReportEmail, \ + BaseHumanReadableWriter, \ + get_config_options, generate_html_table, generate_report_email_html, \ + REPORT_FORMAT_CSV, REPORT_FORMAT_TSV, REPORT_FORMAT_NULL_DELIMITED, \ + REPORT_FORMAT_LINE_DELIMITED +from ..context import CliContext +from ..email import Mailer + + +class DatabaseScanReportColumn(ReportColumnEnum): + TABLE = 'table', lambda record: record.result.table + RULE_ID = 'rule_id', lambda record: record.result.rule.identifier + RULE_DESCRIPTION = 'rule_description', \ + lambda record: record.result.rule.description + ROW = 'row', lambda record: safe_json_encode(record.result.row) + + +class HumanReadableWriter(BaseHumanReadableWriter): + + def format_record(self, record) -> str: + result = record.result + return ( + escape(Color.YELLOW) + + 'Suspicious database record found in table ' + f'"{result.table}" matching rule "{result.rule.description}"' + ': ' + safe_json_encode(record.result.row) + RESET + ) + + +REPORT_FORMAT_HUMAN = ReportFormat( + 'human', + lambda stream, columns: HumanReadableWriter(stream), + allows_headers=False, + allows_column_customization=False + ) + + +class DatabaseScanReportFormat(ReportFormatEnum): + CSV = REPORT_FORMAT_CSV + TSV = REPORT_FORMAT_TSV + NULL_DELIMITED = REPORT_FORMAT_NULL_DELIMITED + LINE_DELIMITED = REPORT_FORMAT_LINE_DELIMITED + HUMAN = REPORT_FORMAT_HUMAN + + +class DatabaseScanReportRecord(ReportRecord): + + def __init__(self, result: DatabaseScanResult): + self.result = result + + +class DatabaseScanReport(Report): + + def __init__( + self, + format: ReportFormat, + columns: List[ReportColumn], + email_addresses: List[str], + mailer: Optional[Mailer], + write_headers: bool = False, + only_unremediated: bool = False + ): + super().__init__( + format, + columns, + email_addresses, + mailer, + write_headers + ) + self.result_count = 0 + self.database_count = 0 + + def add_result(self, result: DatabaseScanResult): + self.result_count += 1 + self.write_record( + DatabaseScanReportRecord(result) + ) + + def generate_email( + self, + recipient: str, + attachments: Dict[str, str], + hostname: str + ) -> ReportEmail: + plain = ( + 'Database Scan Complete\n\n' + f'Scanned Databases: {self.database_count}\n\n' + f'Results Found: {self.result_count}\n\n' + ) + + results = { + 'Scanned Databases': self.database_count, + 'Results Found': self.result_count + } + + table = generate_html_table(results) + + document = generate_report_email_html( + table, + 'Database Scan Results', + hostname + ) + + return ReportEmail( + recipient=recipient, + subject=f'Database Scan Results for {hostname}', + plain_content=plain, + html_content=document.to_html() + ) + + +class DatabaseScanReportManager(ReportManager): + + def __init__(self, context: CliContext): + super().__init__( + formats=DatabaseScanReportFormat, + columns=DatabaseScanReportColumn, + context=context, + read_stdin=context.config.read_stdin, + input_delimiter=context.config.path_separator, + binary_input=True + ) + + def _instantiate_report( + self, + format: ReportFormat, + columns: List[ReportColumn], + email_addresses: List[str], + mailer: Optional[Mailer], + write_headers: bool + ) -> Report: + return DatabaseScanReport( + format, + columns, + email_addresses, + mailer, + write_headers + ) + + +DATABASE_SCAN_REPORT_CONFIG_OPTIONS = get_config_options( + DatabaseScanReportFormat, + DatabaseScanReportColumn, + default_format='human' + ) diff --git a/wordfence/cli/reporting.py b/wordfence/cli/reporting.py index 29c94c2..8721a87 100644 --- a/wordfence/cli/reporting.py +++ b/wordfence/cli/reporting.py @@ -455,7 +455,8 @@ def has_writers(self) -> bool: def generate_email( self, recipient: str, - attachments: Dict[str, str] + attachments: Dict[str, str], + hostname: str ) -> ReportEmail: raise NotImplementedError( 'This report does not support email generation' diff --git a/wordfence/cli/subcommands.py b/wordfence/cli/subcommands.py index f127547..89baa45 100644 --- a/wordfence/cli/subcommands.py +++ b/wordfence/cli/subcommands.py @@ -14,6 +14,7 @@ 'vuln-scan', 'remediate', 'count-sites', + 'db-scan', 'help', 'version', 'terms' diff --git a/wordfence/databasescanning/__init__.py b/wordfence/databasescanning/__init__.py new file mode 100644 index 0000000..f7f624b --- /dev/null +++ b/wordfence/databasescanning/__init__.py @@ -0,0 +1,3 @@ +from . import scanner + +__all__ = ['scanner'] diff --git a/wordfence/databasescanning/scanner.py b/wordfence/databasescanning/scanner.py new file mode 100644 index 0000000..4d86599 --- /dev/null +++ b/wordfence/databasescanning/scanner.py @@ -0,0 +1,106 @@ +from typing import Union, Generator, List +from wordfence.intel.database_rules import DatabaseRuleSet, DatabaseRule +from wordfence.wordpress.database import WordpressDatabase, \ + WordpressDatabaseConnection +from wordfence.logging import log +from wordfence.util.timing import Timer + + +class DatabaseScanResult: + + def __init__( + self, + rule: DatabaseRule, + table: str, + row: dict + ): + self.rule = rule + self.table = table + self.row = row + + +class DatabaseScanner: + + def __init__( + self, + rule_set: DatabaseRuleSet + ): + self.rule_set = rule_set + self.scan_count = 0 + self.timer = Timer(start=False) + + def _get_valid_columns( + self, + connection: WordpressDatabaseConnection, + prefixed_table: str + ) -> List: + columns = connection.get_column_types(prefixed_table) + try: + del columns['rule_id'] + except KeyError: + pass # If the column doesn't exist, that's fine + return list(columns.keys()) + + def _scan_table( + self, + connection: WordpressDatabaseConnection, + table: str + ) -> Generator[DatabaseScanResult, None, None]: + prefixed_table = connection.prefix_table(table) + conditions = [] + rule_selects = [] + for rule in self.rule_set.get_rules(table): + conditions.append(f'({rule.condition})') + rule_selects.append( + f'WHEN {rule.condition} THEN {rule.identifier}' + ) + rule_case = 'CASE\n' + '\n'.join(rule_selects) + '\nEND' + selected_columns = self._get_valid_columns(connection, prefixed_table) + selected_columns.append(f'{rule_case} as rule_id') + selected_columns = ', '.join(selected_columns) + query = ( + f'SELECT {selected_columns} FROM ' + f'{prefixed_table} WHERE ' + + ' OR '.join(conditions) + ) + # Using a dict as the query parameters avoids %s from being + # interpreted as a placeholder (there is apparently no way + # to escape "%s" ("%%s" doesn't work) + for result in connection.query(query, {}): + rule = self.rule_set.get_rule(result['rule_id']) + del result['rule_id'] + yield DatabaseScanResult( + rule=rule, + table=prefixed_table, + row=result + ) + + def _scan_connection( + self, + connection: WordpressDatabaseConnection + ) -> Generator[DatabaseScanResult, None, None]: + self.timer.resume() + log.debug(f'Scanning database: {connection.database.debug_string}...') + for table in self.rule_set.get_targeted_tables(): + yield from self._scan_table(connection, table) + log.debug(f'Scan completed for: {connection.database.debug_string}') + self.timer.stop() + + def scan( + self, + database: Union[WordpressDatabase, WordpressDatabaseConnection] + ) -> Generator[DatabaseScanResult, None, None]: + self.scan_count += 1 + if isinstance(database, WordpressDatabaseConnection): + yield from self._scan_connection(database) + else: + log.debug(f'Connecting to database: {database.debug_string}...') + with database.connect() as connection: + log.debug( + 'Successfully connected to database: ' + f'{database.debug_string}' + ) + yield from self._scan_connection(connection) + + def get_elapsed_time(self) -> int: + return self.timer.get_elapsed() diff --git a/wordfence/intel/database_rules.py b/wordfence/intel/database_rules.py new file mode 100644 index 0000000..9205ac6 --- /dev/null +++ b/wordfence/intel/database_rules.py @@ -0,0 +1,130 @@ +from wordfence.util.validation import ListValidator, DictionaryValidator, \ + OptionalValueValidator +from typing import Optional, Set, List +import json + + +class DatabaseRule: + + def __init__( + self, + identifier: int, + tables: Optional[Set[str]] = None, + condition: Optional[str] = None, + description: Optional[str] = None + ): + self.identifier = identifier + self.tables = tables + self.condition = condition + self.description = description + + def __hash__(self): + return hash(self.identifier) + + def __eq__(self, other) -> bool: + return ( + type(other) is type(self) + and other.identifier == self.identifier + ) + + +class DatabaseRuleSet: + + def __init__(self): + self.rules = {} + self.table_rules = {} + self.global_rules = set() + + def add_rule(self, rule: DatabaseRule) -> None: + if rule.identifier in self.rules: + raise Exception('Duplicate rule ID: {rule.identifier}') + self.rules[rule.identifier] = rule + if rule.tables is None: + self.global_rules.add(rule) + else: + for table in rule.tables: + if table not in self.table_rules: + self.table_rules[table] = set() + self.table_rules[table].add(rule) + + def remove_rule(self, rule_id: int) -> None: + try: + rule = self.rules.pop(rule_id) + if rule.tables is None: + self.global_rules.discard(rule) + else: + for table in rule.tables: + if table in list(self.table_rules.keys()): + table_rules = self.table_rules[table] + table_rules.discard(rule) + if len(table_rules) == 0: + del self.table_rules[table] + except KeyError: + pass # Rule doesn't exist, no need to remove + + def get_rules(self, table: str) -> List[DatabaseRule]: + rules = [] + try: + rules.extend(self.table_rules[table]) + except KeyError: + pass # There are no table rules + rules.extend(self.global_rules) + return rules + + def get_targeted_tables(self) -> List[str]: + return self.table_rules.keys() + + def get_rule(self, identifier: int) -> DatabaseRule: + return self.rules[identifier] + + def filter_rules( + self, + included: Optional[Set[int]] = None, + excluded: Optional[Set[int]] = None + ): + if included is not None: + for rule_id in list(self.rules.keys()): + if rule_id not in included: + self.remove_rule(rule_id) + if excluded is not None: + for rule_id in excluded: + self.remove_rule(rule_id) + + +JSON_VALIDATOR = ListValidator( + DictionaryValidator({ + 'id': int, + 'tables': ListValidator(str), + 'condition': str, + 'description': OptionalValueValidator(str) + }, optional_keys={'description'}) + ) + + +def parse_database_rules( + data, + pre_validated: bool = False, + rule_set: Optional[DatabaseRuleSet] = None + ) -> DatabaseRuleSet: + if not pre_validated: + JSON_VALIDATOR.validate(data) + if rule_set is None: + rule_set = DatabaseRuleSet() + for rule_data in data: + rule = DatabaseRule( + identifier=rule_data['id'], + tables=rule_data['tables'], + condition=rule_data['condition'], + description=rule_data['description'] + ) + rule_set.add_rule(rule) + return rule_set + + +def load_database_rules( + path: bytes, + rule_set: Optional[DatabaseRuleSet] = None + ) -> DatabaseRuleSet: + with open(path, 'rb') as file: + data = json.load(file) + return parse_database_rules(data, rule_set=rule_set) diff --git a/wordfence/util/json.py b/wordfence/util/json.py new file mode 100644 index 0000000..11b9037 --- /dev/null +++ b/wordfence/util/json.py @@ -0,0 +1,40 @@ +import json +from typing import Any +from base64 import b64encode + + +UNFILTERED_TYPES = { + bool, + int, + float, + str + } + + +def encode_invalid_data(data) -> Any: + for unfiltered_type in UNFILTERED_TYPES: + if isinstance(data, unfiltered_type): + return data + if isinstance(data, dict): + filtered = {} + for key, value in data.items(): + filtered[encode_invalid_data(key)] = encode_invalid_data(value) + return filtered + elif isinstance(data, list): + filtered = [] + for value in data: + filtered.append(encode_invalid_data(value)) + return filtered + elif isinstance(data, bytes): + return b64encode(data).decode('utf-8') + else: + try: + json.dumps(data) + except Exception: + return None + + +# Encode any data that cannot be represented as valid JSON +# prior to attempting to encode data as JSON +def safe_json_encode(data) -> str: + return json.dumps(encode_invalid_data(data)) diff --git a/wordfence/util/timing.py b/wordfence/util/timing.py index bcd8d3c..993bf5e 100644 --- a/wordfence/util/timing.py +++ b/wordfence/util/timing.py @@ -1,6 +1,10 @@ import time +def unit_nanoseconds(ns: int) -> int: + return ns + + def unit_seconds(ns: int) -> int: return ns / 1000000000 @@ -17,21 +21,32 @@ def __init__(self, start: bool = True): else: self.start_time = None self.end_time = None + self.previous_time = 0 def _capture_time(self) -> int: return time.monotonic_ns() - def start(self): + def start(self) -> None: self.start_time = self._capture_time() + self.end_time = None - def reset(self): + def reset(self) -> None: self.start() - def stop(self): + def stop(self) -> None: self.end_time = self._capture_time() - def get_elapsed(self, unit=unit_seconds): + def resume(self) -> None: + if self.start_time is not None: + self.previous_time += self.get_elapsed( + unit=unit_nanoseconds, + total=False + ) + self.start() + + def get_elapsed(self, unit=unit_seconds, total: bool = True) -> int: + previous_time = self.previous_time if total else 0 end_time = \ - self.end_time if self.end_time is not None \ - else self._capture_time() - return unit(end_time - self.start_time) + self.end_time if self.end_time is not None \ + else self._capture_time() + return unit(previous_time + end_time - self.start_time) diff --git a/wordfence/wordpress/database.py b/wordfence/wordpress/database.py new file mode 100644 index 0000000..567648e --- /dev/null +++ b/wordfence/wordpress/database.py @@ -0,0 +1,112 @@ +import mysql.connector +from typing import Optional, Generator, Dict + +from .exceptions import WordpressDatabaseException + + +DEFAULT_HOST = 'localhost' +DEFAULT_PORT = 3306 +DEFAULT_USER = 'root' +DEFAULT_PREFIX = 'wp_' +DEFAULT_COLLATION = 'utf8mb4_unicode_ci' + + +class WordpressDatabaseConnection: + + def __init__(self, database): + self.database = database + try: + self.connection = mysql.connector.connect( + host=database.server.host, + port=database.server.port, + user=database.server.user, + password=database.server.password, + database=database.name, + collation=database.collation + ) + except mysql.connector.Error: + raise WordpressDatabaseException( + database, + f'Failed to connect to database: {database.debug_string}' + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return + + def prefix_table(self, table: str) -> str: + return self.database.prefix_table(table) + + def query( + self, + query: str, + parameters: tuple = () + ) -> Generator[tuple, None, None]: + try: + cursor = self.connection.cursor(dictionary=True) + cursor.execute(query, parameters) + for result in cursor: + yield result + cursor.close() + except mysql.connector.Error: + raise WordpressDatabaseException( + self.database, + 'Failed to execute query' + ) + + def get_column_types( + self, + table: str, + prefix: bool = False + ) -> Dict[str, str]: + if prefix: + table = self.prefix_table(table) + columns = {} + for result in self.query(f'SHOW COLUMNS FROM {table}'): + columns[result['Field'].lower()] = result['Type'] + return columns + + +class WordpressDatabaseServer: + + def __init__( + self, + host: str = DEFAULT_HOST, + port: int = DEFAULT_PORT, + user: str = DEFAULT_USER, + password: Optional[str] = None + ): + self.host = host + self.port = port + self.user = user + self.password = password + + +class WordpressDatabase: + + def __init__( + self, + name: str, + server: WordpressDatabaseServer, + prefix: str = DEFAULT_PREFIX, + collation: str = DEFAULT_COLLATION + ): + self.name = name + self.server = server + self.prefix = prefix + self.collation = collation + self.debug_string = self._build_debug_string() + + def connect(self) -> WordpressDatabaseConnection: + return WordpressDatabaseConnection(self) + + def _build_debug_string(self) -> str: + return ( + f'{self.server.user}@{self.server.host}:' + f'{self.server.port}/{self.name}' + ) + + def prefix_table(self, table: str) -> str: + return self.prefix + table diff --git a/wordfence/wordpress/exceptions.py b/wordfence/wordpress/exceptions.py index d957c64..e1d680c 100644 --- a/wordfence/wordpress/exceptions.py +++ b/wordfence/wordpress/exceptions.py @@ -4,3 +4,9 @@ class WordpressException(Exception): class ExtensionException(WordpressException): pass + + +class WordpressDatabaseException(Exception): + + def __init__(self, database, message): + self.database = database diff --git a/wordfence/wordpress/site.py b/wordfence/wordpress/site.py index e2e8290..9b57252 100644 --- a/wordfence/wordpress/site.py +++ b/wordfence/wordpress/site.py @@ -1,7 +1,7 @@ import os import os.path from dataclasses import dataclass, field -from typing import Optional, List, Generator +from typing import Optional, List, Generator, Dict, Callable, Any from ..php.parsing import parse_php_file, PhpException, PhpState, \ PhpEvaluationOptions @@ -11,6 +11,8 @@ from .exceptions import WordpressException, ExtensionException from .plugin import Plugin, PluginLoader from .theme import Theme, ThemeLoader +from .database import WordpressDatabase, WordpressDatabaseServer, \ + DEFAULT_PORT, DEFAULT_COLLATION WP_BLOG_HEADER_NAME = b'wp-blog-header.php' WP_CONFIG_NAME = b'wp-config.php' @@ -32,11 +34,19 @@ b'../app' ] +DATABASE_CONFIG_CONSTANTS = { + b'DB_NAME': 'name', + b'DB_USER': 'user', + b'DB_PASSWORD': 'password', + b'DB_HOST': 'host', + b'DB_COLLATE': 'collation' + } + @dataclass class WordpressStructureOptions: relative_content_paths: List[str] = field(default_factory=list) - relative_plugins_paths: List[str] = field(default_factory=list) + relaGtive_plugins_paths: List[str] = field(default_factory=list) relative_mu_plugins_paths: List[str] = field(default_factory=list) @@ -307,18 +317,16 @@ def _get_parsed_config_state(self) -> PhpState: def _extract_string_from_config( self, - constant: str, - default: Optional[str] = None - ) -> str: + constant: bytes, + default: Optional[bytes], + extractor: Callable[[PhpState], Any] + ) -> bytes: try: state = self._get_parsed_config_state() if state is not None: - path = state.get_constant_value( - name=constant, - default_to_name=False - ) - if isinstance(path, str): - return path + value = extractor(state) + if isinstance(value, bytes): + return value except PhpException as exception: # Just use the default if parsing errors occur log.warning( @@ -327,8 +335,43 @@ def _extract_string_from_config( ) return default + def _extract_string_from_config_constant( + self, + constant: bytes, + default: Optional[bytes] = None + ): + def get_constant_value(state: PhpState): + return state.get_constant_value( + name=constant, + default_to_name=False + ) + return self._extract_string_from_config( + constant, + default, + get_constant_value + ) + + def _extract_string_from_config_variable( + self, + variable: bytes, + default: Optional[bytes] = None + ): + def get_variable_value(state: PhpState): + return state.get_variable_value(variable) + return self._extract_string_from_config( + variable, + default, + get_variable_value + ) + + def get_config_constant(self, constant: bytes) -> bytes: + return self._extract_string_from_config_constant(constant) + + def get_config_variable(self, variable: bytes) -> bytes: + return self._extract_string_from_config_variable(variable) + def _generate_possible_content_paths(self) -> Generator[str, None, None]: - configured = self._extract_string_from_config( + configured = self._extract_string_from_config_constant( 'WP_CONTENT_DIR' ) if configured is not None: @@ -357,7 +400,7 @@ def get_content_directory(self) -> str: return self.content_path def get_configured_plugins_directory(self, mu: bool = False) -> str: - return self._extract_string_from_config( + return self._extract_string_from_config_constant( 'WPMU_PLUGIN_DIR' if mu else 'WP_PLUGIN_DIR', ) @@ -434,3 +477,50 @@ def get_themes(self, allow_io_errors: bool = False) -> List[Theme]: raise loader = ThemeLoader(directory, allow_io_errors) return loader.load_all() + + def _extract_database_config(self) -> Dict[str, str]: + config = {} + + def add_config(key: str, value: Any): + if value is None: + raise WordpressException( + 'Unable to extract database connection details from ' + f'WordPress config (Key: {key}, Value: ' + + repr(value) + ')' + ) + config[key] = value.decode('latin1') + + for constant, attribute in DATABASE_CONFIG_CONSTANTS.items(): + add_config( + key=attribute, + value=self.get_config_constant(constant) + ) + add_config( + key='prefix', + value=self.get_config_variable(b'table_prefix') + ) + return config + + def get_database(self) -> WordpressDatabase: + config = self._extract_database_config() + host_components = config['host'].split(':', 1) + host = host_components[0] + try: + port = host_components[1] + except IndexError: + port = DEFAULT_PORT + try: + collation = config['collation;'] + except KeyError: + collation = DEFAULT_COLLATION + server = WordpressDatabaseServer( + host=host, + port=port, + user=config['user'], + password=config['password'] + ) + return WordpressDatabase( + name=config['name'], + server=server, + collation=collation + )