Skip to content

Commit

Permalink
Add extra_data_prefix support for XLSX in load_inventory #926
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez committed Jan 6, 2025
1 parent fe87a83 commit 4759c77
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 3 deletions.
6 changes: 5 additions & 1 deletion scanpipe/pipelines/load_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,21 @@ def build_inventory_from_scans(self):
extra_data_prefix = None if is_single_input else input_path.name

if input_path.suffix.endswith(".xlsx"):
input.load_inventory_from_xlsx(self.project, input_path)
input.load_inventory_from_xlsx(
self.project, input_path, extra_data_prefix
)
continue

scan_data = json.loads(input_path.read_text())
tool_name = input.get_tool_name_from_scan_headers(scan_data)

if tool_name == "scancode-toolkit":
input.load_inventory_from_toolkit_scan(self.project, input_path)

elif tool_name == "scanpipe":
input.load_inventory_from_scanpipe(
self.project, scan_data, extra_data_prefix
)

else:
raise Exception(f"Input not supported: {str(input_path)} ")
10 changes: 8 additions & 2 deletions scanpipe/pipes/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,13 @@ def clean_xlsx_data_to_model_data(model_class, xlsx_data):
return cleaned_data


def load_inventory_from_xlsx(project, input_location):
def load_inventory_from_xlsx(project, input_location, extra_data_prefix=None):
"""
Create packages, dependencies, resources, and relations loaded from XLSX file
located at ``input_location``.
An ``extra_data_prefix`` can be provided in case multiple input files are loaded
into the same project. The prefix is usually the filename of the input.
"""
workbook = openpyxl.load_workbook(input_location, read_only=True, data_only=True)

Expand All @@ -217,4 +220,7 @@ def load_inventory_from_xlsx(project, input_location):

if "LAYERS" in workbook:
layers_data = get_worksheet_data(worksheet=workbook["LAYERS"])
project.update_extra_data({"layers": layers_data})
extra_data = {"layers": layers_data}
if extra_data_prefix:
extra_data = {extra_data_prefix: extra_data}
project.update_extra_data(extra_data)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"layers": [
{
"layer_tag": "img-12ebda-layer-01-1a058d",
"created_by": "/bin/sh -c #(nop) ADD file:762c899ec0505d1a32930ee804c5b008825f41611161be104076cba33b7e5b2b in / ",
"layer_id": "1a058d5342cc722ad5439cacae4b2b4eedde51d8fe8800fcf28444302355c16d",
"image_id": "12ebda3111cec73a788b0e802a00de04ebf5e9765043925dd396c2d03a7c1e66",
"created": "2021-11-12T17:19:44.795237917Z",
"size": "5886464",
"author": null,
"comment": null,
"archive_location": "ghcr_io_kyverno_sbom.tar-extract/1a058d5342cc722ad5439cacae4b2b4eedde51d8fe8800fcf28444302355c16d.tar",
"xlsx_errors": null
}
]
}
8 changes: 8 additions & 0 deletions scanpipe/tests/pipes/test_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ def test_scanpipe_pipes_input_load_inventory_from_xlsx_layers_sheet(self):
expected = json.loads(expected_location.read_text())
self.assertEqual(expected, project1.extra_data)

project1.extra_data = {}
project1.save()
input.load_inventory_from_xlsx(
project1, input_location, extra_data_prefix="file.ext"
)
project1.refresh_from_db()
self.assertEqual({"file.ext": expected}, project1.extra_data)

def test_scanpipe_pipes_input_load_inventory_from_project_xlsx_output(self):
fixtures = self.data / "asgiref" / "asgiref-3.3.0_fixtures.json"
call_command("loaddata", fixtures, **{"verbosity": 0})
Expand Down

0 comments on commit 4759c77

Please sign in to comment.