Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SH Runner False Negative #14

Merged
merged 2 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 73 additions & 11 deletions gatox/workflow_parser/components/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,13 @@
from gatox.workflow_parser.components.step import Step
from gatox.workflow_parser.expression_parser import ExpressionParser
from gatox.workflow_parser.expression_evaluator import ExpressionEvaluator
from gatox.configuration.configuration_manager import ConfigurationManager

class Job():
"""Wrapper class for a Github Actions workflow job.
"""
LARGER_RUNNER_REGEX_LIST = re.compile(
r'(windows|ubuntu)-(22.04|20.04|2019-2022)-(4|8|16|32|64)core-(16|32|64|128|256)gb'
)
MATRIX_KEY_EXTRACTION_REGEX = re.compile(
r'{{\s*matrix\.([\w-]+)\s*}}'
)
LARGER_RUNNER_REGEX_LIST = re.compile(r'(windows|ubuntu)-(22.04|20.04|2019-2022)-(4|8|16|32|64)core-(16|32|64|128|256)gb')
MATRIX_KEY_EXTRACTION_REGEX = re.compile(r'{{\s*matrix\.([\w-]+)\s*}}')

EVALUATOR = ExpressionEvaluator()

Expand Down Expand Up @@ -109,16 +106,66 @@ def evaluateIf(self):

return self.if_condition

def __process_runner(self, runs_on):
"""
Processes the runner for the job.
"""
if type(runs_on) == list:
for label in runs_on:
if label in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS']:
break
if self.LARGER_RUNNER_REGEX_LIST.match(label):
break
else:
return True
elif type(runs_on) == str:
if runs_on in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS']:
return False
if self.LARGER_RUNNER_REGEX_LIST.match(runs_on):
return False
return True

def __process_matrix(self, runs_on):
"""Process case where runner is specified via matrix.
"""
matrix_match = self.MATRIX_KEY_EXTRACTION_REGEX.search(runs_on)

if matrix_match:
matrix_key = matrix_match.group(1)
else:
return False
# Check if strategy exists in the yaml file
if 'strategy' in self.job_data and 'matrix' in self.job_data['strategy']:
matrix = self.job_data['strategy']['matrix']

# Use previously acquired key to retrieve list of OSes
if matrix_key in matrix:
os_list = matrix[matrix_key]
elif 'include' in matrix:
inclusions = matrix['include']
os_list = []
for inclusion in inclusions:
if matrix_key in inclusion:
os_list.append(inclusion[matrix_key])
else:
return False

# We only need ONE to be self hosted, others can be
# GitHub hosted
for key in os_list:
if type(key) == str:
if key not in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS'] \
and not self.LARGER_RUNNER_REGEX_LIST.match(key):
return True
# list of labels
elif type(key) == list:
return True

def gated(self):
"""Check if the workflow is gated.
"""
return self.has_gate or (self.evaluateIf() and self.evaluateIf().startswith("RESTRICTED"))

def __process_runner(self):
"""
Processes the runner for the job.
"""
raise NotImplementedError("Not Implemented!")

def getJobDependencies(self):
"""Returns Job objects for jobs that must complete
Expand All @@ -131,3 +178,18 @@ def isCaller(self):
references a reusable workflow that runs on workflow_call)
"""
return self.caller

def isSelfHosted(self):
"""Returns true if the job might run on a self-hosted runner.
"""
if 'runs-on' in self.job_data:
runs_on = self.job_data['runs-on']
# Easy
if 'self-hosted' in runs_on:
return True
# Process a matrix job
elif 'matrix.' in runs_on:
return self.__process_matrix(runs_on)
# Process standard label
else:
return self.__process_runner(runs_on)
68 changes: 8 additions & 60 deletions gatox/workflow_parser/workflow_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ class WorkflowParser():
performing any API queries to augment the analysis.
"""

LARGER_RUNNER_REGEX_LIST = re.compile(r'(windows|ubuntu)-(22.04|20.04|2019-2022)-(4|8|16|32|64)core-(16|32|64|128|256)gb')
MATRIX_KEY_EXTRACTION_REGEX = re.compile(r'{{\s*matrix\.([\w-]+)\s*}}')

def __init__(self, workflow_wrapper: Workflow, non_default=None):
"""Initialize class with workflow file.

Expand All @@ -66,6 +63,7 @@ def __init__(self, workflow_wrapper: Workflow, non_default=None):
self.repo_name = workflow_wrapper.repo_name
self.wf_name = workflow_wrapper.workflow_name
self.callees = []
self.sh_callees = []
self.external_ref = False

if workflow_wrapper.special_path:
Expand Down Expand Up @@ -374,63 +372,13 @@ def self_hosted(self):
"""
sh_jobs = []

# Old Code
if not self.parsed_yml or 'jobs' not in self.parsed_yml or not self.parsed_yml['jobs']:
return sh_jobs

for jobname, job_details in self.parsed_yml['jobs'].items():
if 'runs-on' in job_details:
runs_on = job_details['runs-on']
if 'self-hosted' in runs_on:
# Clear cut
sh_jobs.append((jobname, job_details))
elif 'matrix.' in runs_on:
# We need to check each OS in the matrix strategy.
# Extract the matrix key from the variable
matrix_match = self.MATRIX_KEY_EXTRACTION_REGEX.search(runs_on)

if matrix_match:
matrix_key = matrix_match.group(1)
else:
continue
# Check if strategy exists in the yaml file
if 'strategy' in job_details and 'matrix' in job_details['strategy']:
matrix = job_details['strategy']['matrix']

# Use previously acquired key to retrieve list of OSes
if matrix_key in matrix:
os_list = matrix[matrix_key]
elif 'include' in matrix:
inclusions = matrix['include']
os_list = []
for inclusion in inclusions:
if matrix_key in inclusion:
os_list.append(inclusion[matrix_key])
else:
continue

# We only need ONE to be self hosted, others can be
# GitHub hosted
for key in os_list:
if type(key) == str:
if key not in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS'] \
and not self.LARGER_RUNNER_REGEX_LIST.match(key):
sh_jobs.append((jobname, job_details))
break
for job in self.jobs:
if job.isSelfHosted():
sh_jobs.append((job.job_name,job.job_data))
elif job.isCaller():
if job.external_caller:
self.sh_callees.append(job.uses)
else:
if type(runs_on) == list:
for label in runs_on:
if label in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS']:
break
if self.LARGER_RUNNER_REGEX_LIST.match(label):
break
else:
sh_jobs.append((jobname, job_details))
elif type(runs_on) == str:
if runs_on in ConfigurationManager().WORKFLOW_PARSING['GITHUB_HOSTED_LABELS']:
break
if self.LARGER_RUNNER_REGEX_LIST.match(runs_on):
break
sh_jobs.append((jobname, job_details))
self.sh_callees.append(job.uses.split('/')[-1])

return sh_jobs
45 changes: 44 additions & 1 deletion unit_test/test_workflow_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,42 @@
uses: actions/checkout@v4
"""

TEST_WF7 = """
name: build

on:
push:
branches: [ 'master' ]
pull_request:
branches: [ 'master' ]

concurrency:
group: ${{ github.ref }}-build
cancel-in-progress: true

jobs:
build:
strategy:
matrix:
profile: [ 'jdk17', 'jdk17-aarch64' ]
include:
- jdk_version: '17'
- profile: 'jdk17'
runs_on: ubuntu-latest
- profile: 'jdk17-aarch64'
runs_on: [ linux, ARM64 ]
fail-fast: false

runs-on: ${{ matrix.runs_on }}

steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
driver: docker

"""


def test_parse_workflow():

Expand Down Expand Up @@ -298,4 +334,11 @@ def test_check_pwn_request():
parser = WorkflowParser(workflow)

result = parser.check_pwn_request()
assert result['candidates']
assert result['candidates']

def test_check_sh_runnner():
workflow = Workflow('unit_test', TEST_WF7, 'build.yml')
parser = WorkflowParser(workflow)

result = parser.self_hosted()
assert len(result) > 0
Loading