Skip to content

Commit

Permalink
Issue #89 Initial commit for base64 and hexadecimal entropy score ove…
Browse files Browse the repository at this point in the history
…rride (#223)

Updated readme

Co-authored-by: Namitha Sudheendra <[email protected]>
Co-authored-by: Joey Wilhelm <[email protected]>
  • Loading branch information
3 people authored Oct 18, 2021
1 parent afe6221 commit 53f544f
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 14 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,24 @@ Options:
Enable or disable timestamps in logging
messages. [default: True]
-b64, --b64-entropy-score FLOAT
Modify the base64 entropy score. If you
specify a value greater than the default,
tartufo lists higher entropy base64 strings
(longer or more randomized strings). A lower
value lists lower entropy base64 strings
(shorter or less randomized strings).
[default: 4.5]
-hex, --hex-entropy-score FLOAT
Modify the hexadecimal entropy score. If you
specify a value greater than the default,
tartufo lists higher entropy hexadecimal
strings (longer or more randomized strings).
A lower value lists lower entropy
hexadecimal strings (shorter or less
randomized strings). [default: 3.0]
-V, --version Show the version and exit.
-h, --help Show this message and exit.
Expand Down
20 changes: 20 additions & 0 deletions tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,26 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
show_default=True,
help="Enable or disable timestamps in logging messages.",
)
@click.option(
"-b64",
"--b64-entropy-score",
default=4.5,
show_default=True,
help="Modify the base64 entropy score. If a value greater than the default is "
"specified, tartufo lists higher entropy base64 strings (longer or more randomized "
"strings). A lower value lists lower entropy base64 strings (shorter or less "
"randomized strings).",
)
@click.option(
"-hex",
"--hex-entropy-score",
default=3.0,
show_default=True,
help="Modify the hexadecimal entropy score. If a value greater than the default is "
"specified, tartufo lists higher entropy hexadecimal strings (longer or more randomized "
"strings). A lower value lists lower entropy hexadecimal strings (shorter or less "
"randomized strings).",
)
# The first positional argument here would be a hard-coded version, hence the `None`
@click.version_option(None, "-V", "--version")
@click.pass_context
Expand Down
16 changes: 12 additions & 4 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,15 +360,23 @@ def scan(self) -> List[Issue]:
if self.global_options.regex and self.rules_regexes:
issues += self.scan_regex(chunk)
if self.global_options.entropy:
issues += self.scan_entropy(chunk)
issues += self.scan_entropy(
chunk,
self.global_options.b64_entropy_score,
self.global_options.hex_entropy_score,
)
self._issues = issues
self.logger.info("Found %d issues.", len(self._issues))
return self._issues

def scan_entropy(self, chunk: types.Chunk) -> List[Issue]:
def scan_entropy(
self, chunk: types.Chunk, b64_entropy_score: float, hex_entropy_score: float
) -> List[Issue]:
"""Scan a chunk of data for apparent high entropy.
:param chunk: The chunk of data to be scanned
:param b64_entropy_score: Base64 entropy score
:param hex_entropy_score: Hexadecimal entropy score
"""
issues: List[Issue] = []
for line in chunk.contents.split("\n"):
Expand All @@ -378,12 +386,12 @@ def scan_entropy(self, chunk: types.Chunk) -> List[Issue]:

for string in b64_strings:
issues += self.evaluate_entropy_string(
chunk, line, string, BASE64_CHARS, 4.5
chunk, line, string, BASE64_CHARS, b64_entropy_score
)

for string in hex_strings:
issues += self.evaluate_entropy_string(
chunk, line, string, HEX_CHARS, 3
chunk, line, string, HEX_CHARS, hex_entropy_score
)

return issues
Expand Down
4 changes: 4 additions & 0 deletions tartufo/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class GlobalOptions:
"quiet",
"log_timestamps",
"compact",
"b64_entropy_score",
"hex_entropy_score",
)
json: bool
rules: Tuple[TextIO, ...]
Expand All @@ -46,6 +48,8 @@ class GlobalOptions:
quiet: bool
log_timestamps: bool
compact: bool
b64_entropy_score: float
hex_entropy_score: float


@dataclass
Expand Down
61 changes: 51 additions & 10 deletions tests/test_base_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,17 @@ def test_scan_aborts_due_to_invalid_regex(self, mock_config: mock.MagicMock):
def test_scan_iterates_through_all_chunks(self, mock_entropy: mock.MagicMock):
# Make sure we do at least one type of scan
self.options.entropy = True
self.options.b64_entropy_score = 4.5
self.options.hex_entropy_score = 3
test_scanner = TestScanner(self.options)
test_scanner.scan()
mock_entropy.assert_has_calls(
(mock.call("foo"), mock.call("bar"), mock.call("baz")), any_order=True
(
mock.call("foo", 4.5, 3),
mock.call("bar", 4.5, 3),
mock.call("baz", 4.5, 3),
),
any_order=True,
)

@mock.patch("tartufo.scanner.ScannerBase.scan_entropy")
Expand Down Expand Up @@ -391,7 +398,9 @@ def test_scan_entropy_find_b64_strings_for_every_word_in_diff(
self, mock_strings: mock.MagicMock
):
mock_strings.return_value = []
self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
mock_strings.assert_has_calls(
(
mock.call("foo", scanner.BASE64_CHARS),
Expand All @@ -414,7 +423,11 @@ def test_issues_are_not_created_for_b64_string_excluded_signatures(
):
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = True
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
mock_calculate.assert_not_called()
self.assertEqual(issues, [])

Expand All @@ -429,7 +442,11 @@ def test_issues_are_not_created_for_hex_string_excluded_signatures(
):
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = True
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
mock_calculate.assert_not_called()
self.assertEqual(issues, [])

Expand All @@ -445,7 +462,11 @@ def test_issues_are_created_for_high_entropy_b64_strings(
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 9.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0].issue_type, types.IssueType.Entropy)
self.assertEqual(issues[0].matched_string, "foo")
Expand All @@ -462,7 +483,11 @@ def test_issues_are_created_for_high_entropy_hex_strings(
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 9.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0].issue_type, types.IssueType.Entropy)
self.assertEqual(issues[0].matched_string, "foo")
Expand All @@ -482,7 +507,11 @@ def test_issues_are_not_created_for_high_entropy_hex_strings_given_entropy_is_ex
mock_entropy.return_value = True
mock_signature.return_value = False
mock_calculate.return_value = 9.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -500,7 +529,11 @@ def test_issues_are_not_created_for_low_entropy_b64_strings_given_entropy_is_exc
mock_entropy.return_value = True
mock_signature.return_value = False
mock_calculate.return_value = 9.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -515,7 +548,11 @@ def test_issues_are_not_created_for_low_entropy_b64_strings(
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 1.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -530,7 +567,11 @@ def test_issues_are_not_created_for_low_entropy_hex_strings(
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 1.0
issues = self.scanner.scan_entropy(self.chunk)
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = self.scanner.scan_entropy(
self.chunk, b64_entropy_score, hex_entropy_score
)
self.assertEqual(len(issues), 0)


Expand Down
2 changes: 2 additions & 0 deletions tests/test_folder_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def setUp(self) -> None:
def test_scan_should_detect_entropy_and_not_binary(self):
folder_path = pathlib.Path(__file__).parent / "data/scan_folder"
self.global_options.entropy = True
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3
self.global_options.exclude_signatures = []
self.global_options.exclude_path_patterns = [r"donotscan\.txt"]

Expand Down

0 comments on commit 53f544f

Please sign in to comment.