Skip to content
This repository has been archived by the owner on Mar 24, 2023. It is now read-only.

[OpenKAT v1.6] Feature/multipart download #61

Merged
merged 20 commits into from
Jan 24, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
1ca5ac5
rough ouline of using multiple normalizers on multiple output streams…
underdarknl Jan 2, 2023
8faf459
Add tests and normalizer for images, fix code with right imports
Donnype Jan 3, 2023
abc5a9e
Test images with content-type as well. Fix some type hints and the "p…
Donnype Jan 3, 2023
7571b28
Only parse text/html for body normalizer that finds image tags and re…
Donnype Jan 10, 2023
095110e
Fix style
Donnype Jan 11, 2023
9329214
Merge branch 'main' into feature/multipart-download
Darwinkel Jan 16, 2023
64c838e
Merge branch 'main' into feature/multipart-download
Darwinkel Jan 16, 2023
ed3da7f
Check if mime types are allowed before adding them
Donnype Jan 17, 2023
94c4260
Merge branch 'feature/multipart-download' of github.com:minvws/nl-kat…
Donnype Jan 17, 2023
5b42396
Some refactoring: do not raise exceptions to the application runner, …
Donnype Jan 24, 2023
80e26e6
Update Octopoes version
Donnype Jan 24, 2023
37ccac4
Update Octopoes version again
Donnype Jan 24, 2023
9bff457
Add release fixes to branch
Donnype Jan 24, 2023
188f752
Merge branch 'main' into feature/multipart-download
Lisser Jan 24, 2023
71a89de
Remove chdir fix for tests
Donnype Jan 24, 2023
012c4da
Merge remote-tracking branch 'origin/feature/multipart-download' into…
Donnype Jan 24, 2023
8ebaea3
Specify pillow as no binary in Debian build
dekkers Jan 24, 2023
a9da87d
Merge branch 'main' into feature/multipart-download
Donnype Jan 24, 2023
da4c748
Specify no-binary for pillow in the correct place
dekkers Jan 24, 2023
1d54434
Use mimetypes package to list allowed mimetypes
Donnype Jan 24, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion boefjes/katalogus/local_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def _normalizer_to_plugin(normalizer: NormalizerResource) -> Normalizer:
def_file = normalizer.path / "normalizer.json"
def_obj = json.loads(def_file.read_text())
def_obj["repository_id"] = LocalPluginRepository.RESERVED_ID
def_obj["produces"] = []

return Normalizer.parse_obj(def_obj)

Expand Down
2 changes: 1 addition & 1 deletion boefjes/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class LocalBoefjeJobRunner(BoefjeJobRunner):
def __init__(self, local_repository: LocalPluginRepository):
self.local_repository = local_repository

def run(self, boefje_meta: BoefjeMeta, environment: Dict[str, str]) -> Tuple[BoefjeMeta, Union[str, bytes]]:
def run(self, boefje_meta: BoefjeMeta, environment: Dict[str, str]) -> List[Tuple[set, Union[bytes, str]]]:
logger.info("Running local boefje plugin")

boefjes = self.local_repository.resolve_boefjes()
Expand Down
4 changes: 2 additions & 2 deletions boefjes/plugins/kat_snyk/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
requests
beautifulsoup4
requests==2.28.1
beautifulsoup4==4.11.1
59 changes: 59 additions & 0 deletions boefjes/plugins/kat_webpage_analysis/check_images/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Union, Iterator
from octopoes.models import OOI, Reference

from boefjes.job_models import NormalizerMeta
from octopoes.models.ooi.findings import KATFindingType, Finding
from octopoes.models.ooi.web import ImageMetadata

from io import BytesIO

from PIL import Image, UnidentifiedImageError
from PIL.ExifTags import TAGS


def run(normalizer_meta: NormalizerMeta, raw: Union[bytes, str]) -> Iterator[OOI]:
# fetch a reference to the original resource where these headers where downloaded from
resource = Reference.from_str(normalizer_meta.raw_data.boefje_meta.input_ooi)
image = Image.open(BytesIO(raw))
image.MAX_IMAGE_PIXELS = 7680 * 4320 # 8K pixels for now

try:
image_info = {
"size": image.size,
"height": image.height,
"width": image.width,
"format": image.format,
"mode": image.mode,
"is_animated": getattr(image, "is_animated", False),
"frames": getattr(image, "n_frames", 1),
}
exif_data = image.getexif()

for tag_id in exif_data:
# human readable tag name
tag = TAGS.get(tag_id, tag_id)
tag_data = exif_data.get(tag_id)

if isinstance(tag_data, bytes):
tag_data = tag_data.decode()

image_info[tag] = tag_data

yield ImageMetadata(resource=resource, image_info=image_info)
except UnidentifiedImageError:
kat_number = "BrokenImage"
kat_ooi = KATFindingType(id=kat_number)
yield Finding(
finding_type=kat_ooi.reference,
ooi=resource,
description="Image is not recognized, possibly served with broken mime-type.",
)

except Image.DecompressionBombWarning:
kat_number = "DecompressionBomb"
kat_ooi = KATFindingType(id=kat_number)
yield Finding(
finding_type=kat_ooi.reference,
ooi=resource,
description="Image ended up bigger than %d Pixels, possible decompression Bomb" % image.MAX_IMAGE_PIXELS,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"id": "kat_check_images",
"consumes": [
"image/jpeg", "image/jpg", "image/gif", "image/png", "image/bpm", "image/ico"
],
"produces": [
"ImageMetadata"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pillow==9.4.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import Union, Iterator
from urllib.parse import urljoin

from bs4 import BeautifulSoup

import validators

from octopoes.models import OOI
from octopoes.models.ooi.network import Network
from octopoes.models.ooi.web import URL

from boefjes.job_models import NormalizerMeta


def run(normalizer_meta: NormalizerMeta, raw: Union[bytes, str]) -> Iterator[OOI]:
soup = BeautifulSoup(raw, "html.parser")
images = set([img["src"] for img in soup.find_all("img") if hasattr(img, "src")])
Donnype marked this conversation as resolved.
Show resolved Hide resolved

network_name = normalizer_meta.raw_data.boefje_meta.arguments["input"]["website"]["hostname"]["network"]["name"]
host = normalizer_meta.raw_data.boefje_meta.arguments["input"]["website"]["hostname"]["name"]
service = normalizer_meta.raw_data.boefje_meta.arguments["input"]["website"]["ip_service"]["service"]["name"]

url = f"{service}://{host}/"

for img in images:
if not validators.url(img):
img = urljoin(url, img)

yield URL(
network=Network(name=network_name).reference,
raw=img,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"id": "kat_find_images_in_html",
"consumes": [
"text/html"
],
"produces": [
"HTTPResource"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
beautifulsoup4==4.11.1
validators==0.20.0
18 changes: 18 additions & 0 deletions boefjes/plugins/kat_webpage_analysis/headers/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json
from typing import Union, Iterator
from octopoes.models import OOI, Reference
from octopoes.models.ooi.web import HTTPHeader

from boefjes.job_models import NormalizerMeta


def run(normalizer_meta: NormalizerMeta, raw: Union[bytes, str]) -> Iterator[OOI]:
# fetch a reference to the original resource where these headers where downloaded from
resource = Reference.from_str(normalizer_meta.raw_data.boefje_meta.input_ooi)

for key, value in json.loads(raw).items():
yield HTTPHeader(
resource=resource,
key=key,
value=value,
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "kat_webpage_analysis_headers_normalize",
"consumes": [
"webpage-analysis"
"openkat-http/headers"
],
"produces": [
"HTTPHeader"
Expand Down
75 changes: 47 additions & 28 deletions boefjes/plugins/kat_webpage_analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,22 @@
from typing import Tuple, Union, List
from boefjes.job_models import BoefjeMeta

from os import getenv
import requests
from requests import Session

from urllib.parse import urlparse, urlunsplit
from forcediphttpsadapter.adapters import ForcedIPHTTPSAdapter


def run(boefje_meta: BoefjeMeta) -> List[Tuple[set, Union[bytes, str]]]:
input_ = boefje_meta.arguments["input"]
useragent = getenv("useragent", default="OpenKAT")

port = f":{input_['web_url']['port']}"
netloc = (
input_["web_url"]["netloc"]["address"]
if "address" in input_["web_url"]["netloc"]
else input_["web_url"]["netloc"]["name"]
)

url = f"{input_['web_url']['scheme']}://{netloc}{port}{input_['web_url']['path']}"
uri = get_uri(input_)
ip = input_["website"]["ip_service"]["ip_port"]["address"]["address"]

# Code from https://github.com/Roadmaster/forcediphttpsadapter/blob/master/example.py
uri = url
url_parts = urlparse(uri)
hostname = url_parts.netloc
session = requests.Session()
Expand All @@ -31,8 +27,7 @@ def run(boefje_meta: BoefjeMeta) -> List[Tuple[set, Union[bytes, str]]]:
base_url = urlunsplit((url_parts.scheme, url_parts.netloc, "", "", ""))
session.mount(base_url, ForcedIPHTTPSAdapter(dest_ip=ip))
else:
# Fall back to old hack-ip-into-url behavior, for either
# https with no adapter, or http.
# Fall back to old hack-ip-into-url behavior, for either https with no adapter, or http.
if ip:
url_parts = url_parts._replace(netloc=ip)
uri = urlunsplit(
Expand All @@ -45,21 +40,45 @@ def run(boefje_meta: BoefjeMeta) -> List[Tuple[set, Union[bytes, str]]]:
]
)

body_mimetypes = {"openkat-http/body"}
try:
response = session.get(
uri,
headers={"Host": hostname, "Accept": "application/json"},
verify=False,
allow_redirects=False,
)
result = {
# "content": response.content,
"cookies": response.cookies.get_dict(),
"headers": dict(response.headers),
"code": response.status_code,
"text": response.text,
}
except requests.exceptions.ConnectionError:
result = {}

return [(set(), json.dumps(result))]
response = do_request(hostname, session, uri, useragent)
except requests.exceptions.RequestException as request_error:
return [({"openkat-http/error"}, str(request_error))]

if "content-type" in response.headers:
content_type = response.headers.get("content-type")
Donnype marked this conversation as resolved.
Show resolved Hide resolved
body_mimetypes.add(content_type)

# Pick up the content type for the body from the server and split away encodings to make normalization easier
content_type = content_type.split(";")
body_mimetypes.add(content_type[0])

return [
({"openkat-http/full"}, f"{response.headers}\n\n{response.content}"),
({"openkat-http/headers"}, json.dumps(dict(response.headers))),
(body_mimetypes, response.content),
]


def do_request(hostname: str, session: Session, uri: str, useragent: str):
response = session.get(
uri,
headers={"Host": hostname, "User-Agent": useragent},
verify=False,
allow_redirects=False,
)

return response


def get_uri(input_: dict) -> str:
port = f":{input_['web_url']['port']}"
netloc = (
input_["web_url"]["netloc"]["address"]
if "address" in input_["web_url"]["netloc"]
else input_["web_url"]["netloc"]["name"]
)
uri = f"{input_['web_url']['scheme']}://{netloc}{port}{input_['web_url']['path']}"

return uri
16 changes: 0 additions & 16 deletions boefjes/plugins/kat_webpage_analysis/normalize.py

This file was deleted.

4 changes: 4 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os
from pathlib import Path

os.chdir(Path(__file__).parent.parent)
Donnype marked this conversation as resolved.
Show resolved Hide resolved
65 changes: 65 additions & 0 deletions tests/examples/body-normalize.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"id": "dns-lookup-job",
"raw_data": {
"id": "webpage-analysis-job",
"boefje_meta": {
"id": "webpage-analysis-job",
"boefje": {
"id": "webpage-analysis"
},
"organization": "_dev",
"input_ooi": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/",
"arguments": {
"input": {
"object_type": "HTTPResource",
"scan_profile": "reference=Reference('HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/') level=4 scan_profile_type='inherited'",
"primary_key": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/",
"website": {
"ip_service": {
"ip_port": {
"address": {
"network": {
"name": "internet"
},
"address": "134.209.85.72"
},
"protocol": "tcp",
"port": "443"
},
"service": {
"name": "https"
}
},
"hostname": {
"network": {
"name": "internet"
},
"name": "mispo.es"
}
},
"web_url": {
"scheme": "https",
"netloc": {
"network": {
"name": "internet"
},
"name": "mispo.es"
},
"port": "443",
"path": "/"
},
"redirects_to": "None"
}
}
},
"mime_types": [
{
"value": "boefje/dns-records"
}
]
},
"normalizer": {
"id": "kat_find_images_in_html",
"version": null
}
}
Loading