Skip to content

Commit

Permalink
Do not download input_urls in CLI commands #1437 (#1535)
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez authored Jan 14, 2025
1 parent a2da4b1 commit b75e8b3
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 72 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ v34.9.4 (unreleased)
at once from a directory containing input files.
https://github.com/aboutcode-org/scancode.io/issues/1437

- Do not download input_urls in management commands. The fetch/download is delegated to
the pipeline execution.
https://github.com/aboutcode-org/scancode.io/issues/1437

- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
https://github.com/aboutcode-org/scancode.io/issues/1524

Expand Down
25 changes: 7 additions & 18 deletions scanpipe/management/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from scanpipe.models import Project
from scanpipe.models import ProjectMessage
from scanpipe.pipes import count_group_by
from scanpipe.pipes.fetch import fetch_urls

scanpipe_app = apps.get_app_config("scanpipe")

Expand Down Expand Up @@ -354,24 +353,14 @@ def handle_input_files(project, input_files_data, command=None):


def handle_input_urls(project, input_urls, command=None):
"""
Fetch provided `input_urls` and stores it in the project's `input`
directory.
"""
downloads, errors = fetch_urls(input_urls)
"""Add provided `input_urls` as input sources of the project."""
for url in input_urls:
project.add_input_source(download_url=url)

if downloads:
project.add_downloads(downloads)
msg = "File(s) downloaded to the project inputs directory:"
if command and command.verbosity > 0:
command.stdout.write(msg, command.style.SUCCESS)
msg = "\n".join(["- " + downloaded.filename for downloaded in downloads])
command.stdout.write(msg)

if errors and command:
msg = "Could not fetch URL(s):\n"
msg += "\n".join(["- " + url for url in errors])
command.stderr.write(msg)
if input_urls and command and command.verbosity > 0:
msg = "URL(s) added as project input sources:"
command.stdout.write(msg, command.style.SUCCESS)
command.stdout.write("\n".join([f"- {url}" for url in input_urls]))


def handle_copy_codebase(project, copy_from, command=None):
Expand Down
7 changes: 7 additions & 0 deletions scanpipe/management/commands/batch-create.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def add_arguments(self, parser):

def handle(self, *args, **options):
self.verbosity = options["verbosity"]
self.created_project_count = 0

input_directory = options["input_directory"]
input_list = options["input_list"]
Expand All @@ -82,6 +83,10 @@ def handle(self, *args, **options):
if input_list:
self.handle_input_list(**options)

if self.verbosity > 0 and self.created_project_count:
msg = f"{self.created_project_count} projects created."
self.stdout.write(msg, self.style.SUCCESS)

def handle_input_directory(self, **options):
timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
project_name_suffix = options.get("project_name_suffix") or timestamp
Expand All @@ -102,6 +107,7 @@ def handle_input_directory(self, **options):
execute=options["execute"],
run_async=options["async"],
)
self.created_project_count += 1

def handle_input_list(self, **options):
input_file = Path(options["input_list"])
Expand All @@ -125,6 +131,7 @@ def handle_input_list(self, **options):
execute=options["execute"],
run_async=options["async"],
)
self.created_project_count += 1


def process_csv(file_path):
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/management/commands/purldb-scan-worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def handle(self, *args, **options):

except Exception:
tb = traceback.format_exc()
error_log = f"Exception occured during scan project:\n\n{tb}"
error_log = f"Exception occurred during scan project:\n\n{tb}"
purldb.update_status(
scannable_uri_uuid,
status="failed",
Expand Down
93 changes: 40 additions & 53 deletions scanpipe/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def test_scanpipe_management_command_batch_create(self):
call_command("batch-create", *options, stdout=out)
self.assertIn("Project a.txt suffix created", out.getvalue())
self.assertIn("Project b.txt suffix created", out.getvalue())
self.assertIn("2 projects created.", out.getvalue())

self.assertEqual(2, Project.objects.count())
project = Project.objects.get(name="a.txt suffix")
Expand All @@ -227,30 +228,7 @@ def test_scanpipe_management_command_batch_create(self):
self.assertEqual("scan_single_package", project.runs.get().pipeline_name)
self.assertEqual(["a.txt"], project.input_files)

@mock.patch("requests.sessions.Session.get")
def test_scanpipe_management_command_batch_create_input_list_csv(self, mock_get):
mock_responses = [
mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url="https://example.com/source.zip",
),
mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url="https://example.com/binary.bin",
),
mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url="https://example.com/filename.zip",
),
]
mock_get.side_effect = mock_responses

def test_scanpipe_management_command_batch_create_input_list_csv(self):
input_list = self.data / "commands" / "batch-create-list" / "project_list.csv"
options = [
"--input-list",
Expand All @@ -263,19 +241,36 @@ def test_scanpipe_management_command_batch_create_input_list_csv(self, mock_get)
call_command("batch-create", *options, stdout=out)
self.assertIn("Project project-v1", out.getvalue())
self.assertIn("Project project-v2", out.getvalue())
self.assertIn("URL(s) added as project input sources", out.getvalue())
self.assertIn("https://example.com/source.zip#from", out.getvalue())
self.assertIn("https://example.com/binary.bin#to", out.getvalue())
self.assertIn("https://example.com/filename.zip", out.getvalue())
self.assertIn("2 projects created.", out.getvalue())

self.assertEqual(2, Project.objects.count())
project1 = Project.objects.filter(name__contains="project-v1")[0]
self.assertEqual("map_deploy_to_develop", project1.runs.get().pipeline_name)
self.assertEqual(["binary.bin", "source.zip"], sorted(project1.input_files))
input_source = project1.inputsources.get(filename="source.zip")
self.assertEqual("from", input_source.tag)
input_source = project1.inputsources.get(filename="binary.bin")
self.assertEqual("to", input_source.tag)

input_source1 = project1.inputsources.get(
download_url="https://example.com/source.zip#from"
)
self.assertFalse(input_source1.is_uploaded)
self.assertEqual("from", input_source1.tag)
self.assertFalse(input_source1.exists())
input_source2 = project1.inputsources.get(
download_url="https://example.com/binary.bin#to"
)
self.assertFalse(input_source2.is_uploaded)
self.assertEqual("to", input_source2.tag)
self.assertFalse(input_source2.exists())

project2 = Project.objects.filter(name__contains="project-v2")[0]
self.assertEqual("map_deploy_to_develop", project1.runs.get().pipeline_name)
self.assertEqual(["filename.zip"], sorted(project2.input_files))
input_source3 = project2.inputsources.get()
self.assertEqual("https://example.com/filename.zip", input_source3.download_url)
self.assertFalse(input_source3.is_uploaded)
self.assertEqual("", input_source3.tag)
self.assertFalse(input_source3.exists())

def test_scanpipe_management_command_add_input_file(self):
out = StringIO()
Expand Down Expand Up @@ -306,16 +301,7 @@ def test_scanpipe_management_command_add_input_file(self):
with self.assertRaisesMessage(CommandError, expected):
call_command("add-input", *options, stdout=out)

@mock.patch("requests.sessions.Session.get")
def test_scanpipe_management_command_add_input_url(self, mock_get):
mock_get.side_effect = None
mock_get.return_value = mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url="https://example.com/archive.zip",
)

def test_scanpipe_management_command_add_input_url(self):
project = Project.objects.create(name="my_project")
options = [
"--input-url",
Expand All @@ -325,10 +311,15 @@ def test_scanpipe_management_command_add_input_url(self, mock_get):
]
out = StringIO()
call_command("add-input", *options, stdout=out)
expected = "File(s) downloaded to the project inputs directory"
self.assertIn(expected, out.getvalue())
self.assertIn("- archive.zip", out.getvalue())
self.assertEqual(["archive.zip"], project.input_root)
self.assertIn("URL(s) added as project input sources:", out.getvalue())
self.assertIn("- https://example.com/archive.zip", out.getvalue())

input_source = project.inputsources.get()
self.assertEqual("https://example.com/archive.zip", input_source.download_url)
self.assertEqual("", input_source.filename)
self.assertFalse(input_source.is_uploaded)
self.assertEqual("", input_source.tag)
self.assertFalse(input_source.exists())

def test_scanpipe_management_command_add_input_copy_codebase(self):
out = StringIO()
Expand Down Expand Up @@ -898,8 +889,6 @@ def test_scanpipe_management_command_purldb_scan_queue_worker(
self.assertIn(
"Project httpsregistrynpmjsorgasdf-asdf-122tgz-97627c6e created", out_value
)
self.assertIn("File(s) downloaded to the project inputs directory:", out_value)
self.assertIn("asdf-1.2.2.tgz", out_value)
self.assertIn(
"scan_single_package successfully executed on project "
"httpsregistrynpmjsorgasdf-asdf-122tgz-97627c6e",
Expand Down Expand Up @@ -948,7 +937,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_failure(
call_command("purldb-scan-worker", *options, stdout=out, stderr=out)

out_value = out.getvalue()
self.assertIn("Exception occured during scan project:", out_value)
self.assertIn("Exception occurred during scan project:", out_value)
self.assertIn("Error during scan_single_package execution:", out_value)
self.assertIn("Error log", out_value)

Expand All @@ -961,7 +950,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_failure(
self.assertEqual(purldb_update_status_url, mock_request_post_call_kwargs["url"])
self.assertEqual("failed", mock_request_post_call_kwargs["data"]["scan_status"])
self.assertIn(
"Exception occured during scan project:",
"Exception occurred during scan project:",
mock_request_post_call_kwargs["data"]["scan_log"],
)

Expand Down Expand Up @@ -1028,9 +1017,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai
self.assertIn(
"Project httpsregistrynpmjsorgasdf-asdf-121tgz-0bbdcf88 created", out_value
)
self.assertIn("- asdf-1.2.2.tgz", out_value)
self.assertIn("- asdf-1.2.1.tgz", out_value)
self.assertIn("Exception occured during scan project:", out_value)
self.assertIn("Exception occurred during scan project:", out_value)
self.assertIn("Error during scan_single_package execution:", out_value)
self.assertIn("Error log", out_value)

Expand All @@ -1044,7 +1031,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai
self.assertEqual(update_status_url1, mock_post_call1.kwargs["url"])
self.assertEqual("failed", mock_post_call1.kwargs["data"]["scan_status"])
self.assertIn(
"Exception occured during scan project:",
"Exception occurred during scan project:",
mock_post_call1.kwargs["data"]["scan_log"],
)

Expand All @@ -1055,7 +1042,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai
self.assertEqual(update_status_url2, mock_post_call2.kwargs["url"])
self.assertEqual("failed", mock_post_call1.kwargs["data"]["scan_status"])
self.assertIn(
"Exception occured during scan project:",
"Exception occurred during scan project:",
mock_post_call2.kwargs["data"]["scan_log"],
)

Expand Down

0 comments on commit b75e8b3

Please sign in to comment.