Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move from BCC to libbpf with CO-RE #6027

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@ steps:
path: /tmpfs
- name: dockersock
path: /var/run
- name: Run root-only unit tests
image: docker
commands:
- apk add --no-cache make
- cd /tmpfs/go/src/github.com/gravitational/teleport
- make -C build.assets test-root
environment:
GOCACHE: /tmpfs/go/cache
GOPATH: /tmpfs/go
volumes:
- name: tmpfs
path: /tmpfs
- name: dockersock
path: /var/run
- name: Run root-only integration tests
image: docker
commands:
Expand Down
79 changes: 69 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,31 @@ BPF_MESSAGE := "without BPF support"

# BPF cannot currently be compiled on ARMv7 due to this bug: https://github.com/iovisor/gobpf/issues/272
# ARM64 builds are not affected.
with_bpf := no
ifneq ("$(ARCH)","arm")
ifneq ("$(wildcard /usr/include/bcc/libbpf.h)","")
ifneq ("$(wildcard /usr/include/bpf/libbpf.h)","")
with_bpf := yes
BPF_TAG := bpf
BPF_MESSAGE := "with BPF support"
CLANG ?= $(shell which clang || which clang-10)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CO-RE requires Clang 10+ so that's what buildbox installs. Here we look for regular clang or clang-10. This is not ideal for folks building from source outside of buildbox. However it would require a more elaborate search to look for a working clang.
Same logic is also used below for clang-format and llvm-strip

CLANG_FORMAT ?= $(shell which clang-format || which clang-format-10)
LLVM_STRIP ?= $(shell which llvm-strip || which llvm-strip-10)
KERNEL_ARCH := $(shell uname -m | sed 's/x86_64/x86/')
INCLUDES :=
BPF_BUILDDIR := lib/bpf/bytecode

# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES = $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')

CGOFLAG = CGO_ENABLED=1 CGO_LDFLAGS="-Wl,-Bstatic -lbpf -Wl,-Bdynamic"
endif
endif

Expand Down Expand Up @@ -120,13 +141,39 @@ $(BUILDDIR)/tctl:
GOOS=$(OS) GOARCH=$(ARCH) $(CGOFLAG) go build -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" -o $(BUILDDIR)/tctl $(BUILDFLAGS) ./tool/tctl

.PHONY: $(BUILDDIR)/teleport
$(BUILDDIR)/teleport: ensure-webassets
$(BUILDDIR)/teleport: ensure-webassets bpf-bytecode
GOOS=$(OS) GOARCH=$(ARCH) $(CGOFLAG) go build -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG) $(WEBASSETS_TAG)" -o $(BUILDDIR)/teleport $(BUILDFLAGS) ./tool/teleport

.PHONY: $(BUILDDIR)/tsh
$(BUILDDIR)/tsh:
GOOS=$(OS) GOARCH=$(ARCH) $(CGOFLAG) go build -tags "$(PAM_TAG) $(FIPS_TAG)" -o $(BUILDDIR)/tsh $(BUILDFLAGS) ./tool/tsh

#
# BPF support (IF ENABLED)
# Requires a recent version of clang and libbpf installed.
#
ifeq ("$(with_bpf)","yes")
$(BPF_BUILDDIR):
mkdir -p $(BPF_BUILDDIR)

# Build BPF code
$(BPF_BUILDDIR)/%.bpf.o: bpf/%.bpf.c $(wildcard bpf/*.h) | $(BPF_BUILDDIR)
$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(KERNEL_ARCH) $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) -c $(filter %.c,$^) -o $@
$(LLVM_STRIP) -g $@ # strip useless DWARF info

.PHONY: bpf-bytecode
bpf-bytecode: $(BPF_BUILDDIR)/command.bpf.o $(BPF_BUILDDIR)/disk.bpf.o $(BPF_BUILDDIR)/network.bpf.o $(BPF_BUILDDIR)/counter_test.bpf.o

# Generate vmlinux.h based on the installed kernel
.PHONY: update-vmlinux-h
update-vmlinux-h:
bpftool btf dump file /sys/kernel/btf/vmlinux format c >bpf/vmlinux.h

else
.PHONY: bpf-bytecode
bpf-bytecode:
endif

#
# make full - Builds Teleport binaries with the built-in web assets and
# places them into $(BUILDDIR). On Windows, this target is skipped because
Expand Down Expand Up @@ -154,6 +201,7 @@ endif
clean:
@echo "---> Cleaning up OSS build artifacts."
rm -rf $(BUILDDIR)
rm -rf $(BPF_BUILDDIR)
-go clean -cache
rm -rf $(GOPKGDIR)
rm -rf teleport
Expand Down Expand Up @@ -272,13 +320,24 @@ test: test-sh test-api test-go
# Chaos tests have high concurrency, run without race detector and have TestChaos prefix.
#
.PHONY: test-go
test-go: ensure-webassets
test-go: ensure-webassets bpf-bytecode
test-go: FLAGS ?= '-race'
test-go: PACKAGES := $(shell go list ./... | grep -v integration)
test-go: CHAOS_FOLDERS := $(shell find . -type f -name '*chaos*.go' -not -path '*/vendor/*' | xargs dirname | uniq)
test-go: $(VERSRC)
go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS) $(ADDFLAGS)
go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" -test.run=TestChaos $(CHAOS_FOLDERS) -cover
$(CGOFLAG) go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS) $(ADDFLAGS)
$(CGOFLAG) go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" -test.run=TestChaos $(CHAOS_FOLDERS) -cover

#
# Runs all Go tests except integration and chaos, called by CI/CD.
#
UNIT_ROOT_REGEX := ^TestRoot
.PHONY: test-go-root
test-go-root: ensure-webassets bpf-bytecode
test-go-root: FLAGS ?= '-race'
test-go-root: PACKAGES := $(shell go list ./... | grep -v integration)
test-go-root: $(VERSRC)
$(CGOFLAG) go test -run "$(UNIT_ROOT_REGEX)" -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS) $(ADDFLAGS)

# Runs API Go tests. These have to be run separately as the package name is different.
#
Expand All @@ -287,7 +346,7 @@ test-api:
test-api: FLAGS ?= '-race'
test-api: PACKAGES := $(shell cd api && go list ./...)
test-api: $(VERSRC)
GOMODCACHE=/tmp go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS) $(ADDFLAGS)
GOMODCACHE=/tmp $(CGOFLAG) go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS) $(ADDFLAGS)

# Find and run all shell script unit tests (using https://github.com/bats-core/bats-core)
.PHONY: test-sh
Expand All @@ -308,7 +367,7 @@ integration: FLAGS ?= -v -race
integration: PACKAGES := $(shell go list ./... | grep integration)
integration:
@echo KUBECONFIG is: $(KUBECONFIG), TEST_KUBE: $(TEST_KUBE)
go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS)
$(CGOFLAG) go test -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG)" $(PACKAGES) $(FLAGS)

#
# Integration tests which need to be run as root in order to complete successfully
Expand All @@ -319,7 +378,7 @@ INTEGRATION_ROOT_REGEX := ^TestRoot
integration-root: FLAGS ?= -v -race
integration-root: PACKAGES := $(shell go list ./... | grep integration)
integration-root:
go test -run "$(INTEGRATION_ROOT_REGEX)" $(PACKAGES) $(FLAGS)
$(CGOFLAG) go test -run "$(INTEGRATION_ROOT_REGEX)" $(PACKAGES) $(FLAGS)

#
# Lint the Go code.
Expand Down Expand Up @@ -461,8 +520,8 @@ grpc:
buildbox-grpc:
# standard GRPC output
echo $$PROTO_INCLUDE
find lib/ -iname *.proto | xargs clang-format -i -style='{ColumnLimit: 100, IndentWidth: 4, Language: Proto}'
find api/ -iname *.proto | xargs clang-format -i -style='{ColumnLimit: 100, IndentWidth: 4, Language: Proto}'
find lib/ -iname *.proto | xargs $(CLANG_FORMAT) -i -style='{ColumnLimit: 100, IndentWidth: 4, Language: Proto}'
find api/ -iname *.proto | xargs $(CLANG_FORMAT) -i -style='{ColumnLimit: 100, IndentWidth: 4, Language: Proto}'

protoc -I=.:$$PROTO_INCLUDE \
--proto_path=api/types/events \
Expand Down
2 changes: 1 addition & 1 deletion api/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const (

// EnhancedRecordingMinKernel is the minimum kernel version for the enhanced
// recording feature.
EnhancedRecordingMinKernel = "4.18.0"
EnhancedRecordingMinKernel = "5.8.0"

// EnhancedRecordingCommand is a role option that implies command events are
// captured.
Expand Down
156 changes: 156 additions & 0 deletions bpf/command.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
Copyright 2021 Gravitational, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#include "vmlinux.h"
#include <bpf/bpf_helpers.h> /* most used helpers: SEC, __always_inline, etc */
#include <bpf/bpf_core_read.h> /* for BPF CO-RE helpers */
#include <bpf/bpf_tracing.h> /* for getting kprobe arguments */

#include "helpers.h"

#define ARGSIZE 128
#define MAXARGS 20

// Size, in bytes, of the ring buffer used to report
// audit events to userspace. This is the default,
// the userspace can adjust this value based on config.
#define EVENTS_BUF_SIZE (4096*8)

char LICENSE[] SEC("license") = "Dual BSD/GPL";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be Apache License Version 2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it has the same license requirements as a kernel module: https://elixir.bootlin.com/linux/v5.8/source/include/linux/module.h#L182

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@klizhentas Is Dual BSD/GPL okay?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you tell me more about how we distribute it? Are we using dual licensed components linking from here and deriving license of this file from those? How are we going to distribute the program?


enum event_type {
EVENT_ARG,
EVENT_RET,
};

struct data_t {
// pid as in the userspace term (i.e. task->tgid in kernel).
u64 pid;
// ppid is the userspace term (i.e task->real_parent->tgid in kernel).
u64 ppid;
char comm[TASK_COMM_LEN];
enum event_type type;
char argv[ARGSIZE];
int retval;
u64 cgroup;
};

BPF_RING_BUF(execve_events, EVENTS_BUF_SIZE);

BPF_COUNTER(lost);

static int __submit_arg(void *ptr, struct data_t *data)
{
bpf_probe_read_user(data->argv, sizeof(data->argv), ptr);
if (bpf_ringbuf_output(&execve_events, data, sizeof(struct data_t), 0) != 0)
INCR_COUNTER(lost);
return 1;
}

static int submit_arg(void *ptr, struct data_t *data)
{
const char *argp = 0;
bpf_probe_read_user(&argp, sizeof(argp), ptr);
if (argp) {
return __submit_arg((void *)(argp), data);
}
return 0;
}

static int enter_execve(const char *filename,
const char *const *argv,
const char *const *envp)
{
// create data here and pass to submit_arg to save stack space (#555)
struct data_t data = {};
struct task_struct *task;

data.pid = bpf_get_current_pid_tgid() >> 32;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is about how we report PID and TGID.

I'm looking at execsnoop and others and they now report both the PID and TGID instead of just reporting TGID like we were doing before.

@eyakubovich @klizhentas I'm thinking it's probably better for visibility to have both and PID and TGID be reported and switch the PID value to true PID in our events. What do you two think?

https://github.com/iovisor/bcc/blob/master/libbpf-tools/execsnoop.bpf.c#L46-L57

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should just report TGID (userspace PID). That's what an average user would expect. Unless you're debugging something, which thread performed an action is usually not interesting.

data.cgroup = bpf_get_current_cgroup_id();

task = (struct task_struct *)bpf_get_current_task();
data.ppid = BPF_CORE_READ(task, real_parent, tgid);

bpf_get_current_comm(&data.comm, sizeof(data.comm));
data.type = EVENT_ARG;

__submit_arg((void *)filename, &data);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you do a little refactoring around __submit_arg and submit_arg and some comments around what the difference between is and why they're split. Will help future readers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and elsewhere I wasn't trying to do much refactoring or improving. Mostly just enough to get it ported to libbpf reliably. Technically with a ring buffer, this split is no longer needed. We can use bpf_ring_reserve to allocate the memory in the ring buffer, write out all the args there and then use bpf_ring_submit to send it. The only downside is that it'll require a double scan -- first to compute the strlen of all the args and then to copy them. But that's no worse than it is now -- it first copies it into the data_t and then into the ring buffer.


// skip first arg, as we submitted filename
for (int i = 1; i < MAXARGS; i++) {
if (submit_arg((void *)&argv[i], &data) == 0)
goto out;
}

// handle truncated argument list
char ellipsis[] = "...";
__submit_arg((void *)ellipsis, &data);
out:
return 0;
}

static int exit_execve(int ret)
{
struct data_t data = {};
struct task_struct *task;

data.pid = bpf_get_current_pid_tgid() >> 32;
data.cgroup = bpf_get_current_cgroup_id();

task = (struct task_struct *)bpf_get_current_task();
data.ppid = BPF_CORE_READ(task, real_parent, tgid);

bpf_get_current_comm(&data.comm, sizeof(data.comm));
data.type = EVENT_RET;
data.retval = ret;

if (bpf_ringbuf_output(&execve_events, &data, sizeof(data), 0) != 0)
INCR_COUNTER(lost);

return 0;
}

SEC("tp/syscalls/sys_execve")
int tracepoint__syscalls__sys_enter_execve(struct trace_event_raw_sys_enter *tp)
{
const char *filename = (const char *)tp->args[0];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dumb question, but here (and elsewhere) we're guaranteed to always be able to index into args like this right? No need to do a length check here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe so (unless there's a kernel bug). The length of the args will match the number of args to the syscall.

const char *const *argv = (const char *const *)tp->args[1];
const char *const *envp = (const char *const *)tp->args[2];

return enter_execve(filename, argv, envp);
}

SEC("tp/syscalls/sys_exit_execve")
int tracepoint__syscalls__sys_exit_execve(struct trace_event_raw_sys_exit *tp)
{
return exit_execve(tp->ret);
}

SEC("tp/syscalls/sys_execveat")
int tracepoint__syscalls__sys_enter_execveat(struct trace_event_raw_sys_enter *tp)
{
const char *filename = (const char *)tp->args[1];
const char *const *argv = (const char *const *)tp->args[2];
const char *const *envp = (const char *const *)tp->args[3];

return enter_execve(filename, argv, envp);
}

SEC("tp/syscalls/sys_exit_execveat")
int tracepoint__syscalls__sys_exit_execveat(struct trace_event_raw_sys_exit *tp)
{
return exit_execve(tp->ret);
}
45 changes: 45 additions & 0 deletions bpf/counter_test.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copyright 2021 Gravitational, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#include "vmlinux.h"
#include <bpf/bpf_helpers.h> /* most used helpers: SEC, __always_inline, etc */
#include <bpf/bpf_core_read.h> /* for BPF CO-RE helpers */
#include <bpf/bpf_tracing.h> /* for getting kprobe arguments */

#include "helpers.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

BPF_COUNTER(test_counter);

SEC("tp/syscalls/sys_close")
int tracepoint__syscalls__sys_enter_close(struct trace_event_raw_sys_enter *tp)
{
int fd = (int)tp->args[0];

// Special bad FD we trigger upon
if (fd == 1234) {
INCR_COUNTER(test_counter);
}

return 0;
}

SEC("tp/syscalls/sys_exit_close")
int tracepoint__syscalls__sys_exit_close(struct trace_event_raw_sys_exit *tp)
{
return 0;
}
Loading