From 36d5ab91c72a556a6c9be4c5bfdf8871141a1377 Mon Sep 17 00:00:00 2001 From: Serhii Tereshchenko Date: Tue, 29 Oct 2024 23:58:09 +0200 Subject: [PATCH 1/3] feat: Add an option to limit maximum commits used for frecency Refs #711 --- docs/configuration.md | 3 +++ seagoat/repository.py | 3 +++ seagoat/utils/config.py | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index 58ce2aad..c3ef3d85 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -25,6 +25,9 @@ This is an example of a configuration file: server: port: 31134 # A port number to run the server on + # Increase number of commits used for computing frecency score + # Default is `1000`, set to `null` to read all history + readMaxCommits: 5000 # globs to ignore in addition to .gitignore ignorePatterns: diff --git a/seagoat/repository.py b/seagoat/repository.py index 848e8cb0..d21662ba 100644 --- a/seagoat/repository.py +++ b/seagoat/repository.py @@ -92,6 +92,9 @@ def analyze_files(self): "--no-merges", ] + if (max_commits := self.config["server"]["readMaxCommits"]) is not None: + cmd.append(f"--max-count={max_commits}") + self.file_changes.clear() files = set( diff --git a/seagoat/utils/config.py b/seagoat/utils/config.py index 6f718144..d27b78f2 100644 --- a/seagoat/utils/config.py +++ b/seagoat/utils/config.py @@ -13,6 +13,7 @@ "server": { "port": None, "ignorePatterns": [], + "readMaxCommits": 1000, "chroma": { "embeddingFunction": { "name": "DefaultEmbeddingFunction", @@ -35,6 +36,11 @@ "additionalProperties": False, "properties": { "port": {"type": "integer", "minimum": 1, "maximum": 65535}, + "readMaxCommits": { + "type": ["integer", "null"], + "minimum": 1, + "maximum": 65535, + }, "ignorePatterns": { "type": "array", "items": {"type": "string"}, From 2b748244b2249bf1c1c99443d68ea858e2f3ddee Mon Sep 17 00:00:00 2001 From: Serhii Tereshchenko Date: Wed, 30 Oct 2024 00:14:45 +0200 Subject: [PATCH 2/3] chore: Add debug logging --- seagoat/utils/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seagoat/utils/config.py b/seagoat/utils/config.py index d27b78f2..01a5ef08 100644 --- a/seagoat/utils/config.py +++ b/seagoat/utils/config.py @@ -13,7 +13,7 @@ "server": { "port": None, "ignorePatterns": [], - "readMaxCommits": 1000, + "readMaxCommits": 5_000, "chroma": { "embeddingFunction": { "name": "DefaultEmbeddingFunction", From 43ccb45ec775ee879df11a01d846c316ccf61325 Mon Sep 17 00:00:00 2001 From: Serhii Tereshchenko Date: Mon, 18 Nov 2024 15:07:39 +0200 Subject: [PATCH 3/3] chore: Add tests/refactor --- seagoat/repository.py | 12 +++++++++--- tests/test_repository.py | 28 +++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/seagoat/repository.py b/seagoat/repository.py index d21662ba..a2fa8b04 100644 --- a/seagoat/repository.py +++ b/seagoat/repository.py @@ -90,11 +90,9 @@ def analyze_files(self): "--name-only", "--pretty=format:###%h:::%ai:::%an <%ae>:::%s", "--no-merges", + *self._git_log_extra_options(), ] - if (max_commits := self.config["server"]["readMaxCommits"]) is not None: - cmd.append(f"--max-count={max_commits}") - self.file_changes.clear() files = set( @@ -122,6 +120,14 @@ def analyze_files(self): self._compute_frecency() + def _git_log_extra_options(self): + cmd = [] + + if (max_commits := self.config["server"]["readMaxCommits"]) is not None: + cmd.append(f"--max-count={max_commits}") + + return cmd + def _compute_frecency(self): self.frecency_scores = {} for file, commits in self.file_changes.items(): diff --git a/tests/test_repository.py b/tests/test_repository.py index c404dbb9..a03418b5 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -1,7 +1,8 @@ from pathlib import Path +from typing import Any from freezegun import freeze_time - +from pytest_mock import MockerFixture from seagoat.engine import Engine from tests.conftest import MockRepo from tests.test_server import pytest @@ -312,3 +313,28 @@ async def test_allows_limiting_how_many_files_are_automatically_analized( seagoat.analyze_codebase(minimum_chunks_to_analyze=chunks_to_analyze) assert len(seagoat.cache.data["chunks_already_analyzed"]) == chunks_to_analyze + + +@pytest.mark.parametrize( + "config,expected_extra_args", + [ + pytest.param({}, ["--max-count=5000"], id="default"), + pytest.param( + {"readMaxCommits": 100}, ["--max-count=100"], id="specific-number" + ), + pytest.param({"readMaxCommits": None}, [], id="unlimited"), + ], +) +def test_max_count_is_added_for_read_max_commits_setting( + repo: MockRepo, + mocker: MockerFixture, + config: dict[str, Any], + expected_extra_args: list[str], +): + repository = Engine(repo.working_dir).repository + repository.config["server"].update(config) + mock = mocker.spy(repository, "_git_log_extra_options") + + repository.analyze_files() + + assert mock.spy_return == expected_extra_args