From 53f544fcb914ec5cbf9522ad42a3d83c772c5691 Mon Sep 17 00:00:00 2001 From: Namitha Sudheendra Date: Mon, 18 Oct 2021 14:50:56 -0700 Subject: [PATCH] Issue #89 Initial commit for base64 and hexadecimal entropy score override (#223) Updated readme Co-authored-by: Namitha Sudheendra Co-authored-by: Joey Wilhelm --- README.md | 18 +++++++++++ tartufo/cli.py | 20 ++++++++++++ tartufo/scanner.py | 16 +++++++--- tartufo/types.py | 4 +++ tests/test_base_scanner.py | 61 ++++++++++++++++++++++++++++++------ tests/test_folder_scanner.py | 2 ++ 6 files changed, 107 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ae1b0058..c353c9a8 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,24 @@ Options: Enable or disable timestamps in logging messages. [default: True] + -b64, --b64-entropy-score FLOAT + Modify the base64 entropy score. If you + specify a value greater than the default, + tartufo lists higher entropy base64 strings + (longer or more randomized strings). A lower + value lists lower entropy base64 strings + (shorter or less randomized strings). + [default: 4.5] + + -hex, --hex-entropy-score FLOAT + Modify the hexadecimal entropy score. If you + specify a value greater than the default, + tartufo lists higher entropy hexadecimal + strings (longer or more randomized strings). + A lower value lists lower entropy + hexadecimal strings (shorter or less + randomized strings). [default: 3.0] + -V, --version Show the version and exit. -h, --help Show this message and exit. diff --git a/tartufo/cli.py b/tartufo/cli.py index 03d6f3b7..b99c28ff 100644 --- a/tartufo/cli.py +++ b/tartufo/cli.py @@ -207,6 +207,26 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma show_default=True, help="Enable or disable timestamps in logging messages.", ) +@click.option( + "-b64", + "--b64-entropy-score", + default=4.5, + show_default=True, + help="Modify the base64 entropy score. If a value greater than the default is " + "specified, tartufo lists higher entropy base64 strings (longer or more randomized " + "strings). A lower value lists lower entropy base64 strings (shorter or less " + "randomized strings).", +) +@click.option( + "-hex", + "--hex-entropy-score", + default=3.0, + show_default=True, + help="Modify the hexadecimal entropy score. If a value greater than the default is " + "specified, tartufo lists higher entropy hexadecimal strings (longer or more randomized " + "strings). A lower value lists lower entropy hexadecimal strings (shorter or less " + "randomized strings).", +) # The first positional argument here would be a hard-coded version, hence the `None` @click.version_option(None, "-V", "--version") @click.pass_context diff --git a/tartufo/scanner.py b/tartufo/scanner.py index cb805b87..ccc65e62 100755 --- a/tartufo/scanner.py +++ b/tartufo/scanner.py @@ -360,15 +360,23 @@ def scan(self) -> List[Issue]: if self.global_options.regex and self.rules_regexes: issues += self.scan_regex(chunk) if self.global_options.entropy: - issues += self.scan_entropy(chunk) + issues += self.scan_entropy( + chunk, + self.global_options.b64_entropy_score, + self.global_options.hex_entropy_score, + ) self._issues = issues self.logger.info("Found %d issues.", len(self._issues)) return self._issues - def scan_entropy(self, chunk: types.Chunk) -> List[Issue]: + def scan_entropy( + self, chunk: types.Chunk, b64_entropy_score: float, hex_entropy_score: float + ) -> List[Issue]: """Scan a chunk of data for apparent high entropy. :param chunk: The chunk of data to be scanned + :param b64_entropy_score: Base64 entropy score + :param hex_entropy_score: Hexadecimal entropy score """ issues: List[Issue] = [] for line in chunk.contents.split("\n"): @@ -378,12 +386,12 @@ def scan_entropy(self, chunk: types.Chunk) -> List[Issue]: for string in b64_strings: issues += self.evaluate_entropy_string( - chunk, line, string, BASE64_CHARS, 4.5 + chunk, line, string, BASE64_CHARS, b64_entropy_score ) for string in hex_strings: issues += self.evaluate_entropy_string( - chunk, line, string, HEX_CHARS, 3 + chunk, line, string, HEX_CHARS, hex_entropy_score ) return issues diff --git a/tartufo/types.py b/tartufo/types.py index 8b5a4b81..9b460159 100644 --- a/tartufo/types.py +++ b/tartufo/types.py @@ -26,6 +26,8 @@ class GlobalOptions: "quiet", "log_timestamps", "compact", + "b64_entropy_score", + "hex_entropy_score", ) json: bool rules: Tuple[TextIO, ...] @@ -46,6 +48,8 @@ class GlobalOptions: quiet: bool log_timestamps: bool compact: bool + b64_entropy_score: float + hex_entropy_score: float @dataclass diff --git a/tests/test_base_scanner.py b/tests/test_base_scanner.py index 0b5791a4..0c2ce8dc 100644 --- a/tests/test_base_scanner.py +++ b/tests/test_base_scanner.py @@ -60,10 +60,17 @@ def test_scan_aborts_due_to_invalid_regex(self, mock_config: mock.MagicMock): def test_scan_iterates_through_all_chunks(self, mock_entropy: mock.MagicMock): # Make sure we do at least one type of scan self.options.entropy = True + self.options.b64_entropy_score = 4.5 + self.options.hex_entropy_score = 3 test_scanner = TestScanner(self.options) test_scanner.scan() mock_entropy.assert_has_calls( - (mock.call("foo"), mock.call("bar"), mock.call("baz")), any_order=True + ( + mock.call("foo", 4.5, 3), + mock.call("bar", 4.5, 3), + mock.call("baz", 4.5, 3), + ), + any_order=True, ) @mock.patch("tartufo.scanner.ScannerBase.scan_entropy") @@ -391,7 +398,9 @@ def test_scan_entropy_find_b64_strings_for_every_word_in_diff( self, mock_strings: mock.MagicMock ): mock_strings.return_value = [] - self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score) mock_strings.assert_has_calls( ( mock.call("foo", scanner.BASE64_CHARS), @@ -414,7 +423,11 @@ def test_issues_are_not_created_for_b64_string_excluded_signatures( ): mock_strings.side_effect = (["foo"], [], [], [], [], []) mock_signature.return_value = True - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) mock_calculate.assert_not_called() self.assertEqual(issues, []) @@ -429,7 +442,11 @@ def test_issues_are_not_created_for_hex_string_excluded_signatures( ): mock_strings.side_effect = ([], ["foo"], [], [], [], []) mock_signature.return_value = True - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) mock_calculate.assert_not_called() self.assertEqual(issues, []) @@ -445,7 +462,11 @@ def test_issues_are_created_for_high_entropy_b64_strings( mock_strings.side_effect = (["foo"], [], [], [], [], []) mock_signature.return_value = False mock_calculate.return_value = 9.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 1) self.assertEqual(issues[0].issue_type, types.IssueType.Entropy) self.assertEqual(issues[0].matched_string, "foo") @@ -462,7 +483,11 @@ def test_issues_are_created_for_high_entropy_hex_strings( mock_strings.side_effect = ([], ["foo"], [], [], [], []) mock_signature.return_value = False mock_calculate.return_value = 9.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 1) self.assertEqual(issues[0].issue_type, types.IssueType.Entropy) self.assertEqual(issues[0].matched_string, "foo") @@ -482,7 +507,11 @@ def test_issues_are_not_created_for_high_entropy_hex_strings_given_entropy_is_ex mock_entropy.return_value = True mock_signature.return_value = False mock_calculate.return_value = 9.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 0) @mock.patch("tartufo.scanner.ScannerBase.calculate_entropy") @@ -500,7 +529,11 @@ def test_issues_are_not_created_for_low_entropy_b64_strings_given_entropy_is_exc mock_entropy.return_value = True mock_signature.return_value = False mock_calculate.return_value = 9.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 0) @mock.patch("tartufo.scanner.ScannerBase.calculate_entropy") @@ -515,7 +548,11 @@ def test_issues_are_not_created_for_low_entropy_b64_strings( mock_strings.side_effect = (["foo"], [], [], [], [], []) mock_signature.return_value = False mock_calculate.return_value = 1.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 0) @mock.patch("tartufo.scanner.ScannerBase.calculate_entropy") @@ -530,7 +567,11 @@ def test_issues_are_not_created_for_low_entropy_hex_strings( mock_strings.side_effect = ([], ["foo"], [], [], [], []) mock_signature.return_value = False mock_calculate.return_value = 1.0 - issues = self.scanner.scan_entropy(self.chunk) + b64_entropy_score = 4.5 + hex_entropy_score = 3 + issues = self.scanner.scan_entropy( + self.chunk, b64_entropy_score, hex_entropy_score + ) self.assertEqual(len(issues), 0) diff --git a/tests/test_folder_scanner.py b/tests/test_folder_scanner.py index bdc7d6b7..6090f9da 100644 --- a/tests/test_folder_scanner.py +++ b/tests/test_folder_scanner.py @@ -17,6 +17,8 @@ def setUp(self) -> None: def test_scan_should_detect_entropy_and_not_binary(self): folder_path = pathlib.Path(__file__).parent / "data/scan_folder" self.global_options.entropy = True + self.global_options.b64_entropy_score = 4.5 + self.global_options.hex_entropy_score = 3 self.global_options.exclude_signatures = [] self.global_options.exclude_path_patterns = [r"donotscan\.txt"]