diff --git a/internal/sys/mapflags_string.go b/internal/sys/mapflags_string.go index c80744ae0..d9fe21722 100644 --- a/internal/sys/mapflags_string.go +++ b/internal/sys/mapflags_string.go @@ -21,24 +21,28 @@ func _() { _ = x[BPF_F_MMAPABLE-1024] _ = x[BPF_F_PRESERVE_ELEMS-2048] _ = x[BPF_F_INNER_MAP-4096] + _ = x[BPF_F_LINK-8192] + _ = x[BPF_F_PATH_FD-16384] } -const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP" +const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAPBPF_F_LINKBPF_F_PATH_FD" var _MapFlags_map = map[MapFlags]string{ - 1: _MapFlags_name[0:17], - 2: _MapFlags_name[17:36], - 4: _MapFlags_name[36:51], - 8: _MapFlags_name[51:63], - 16: _MapFlags_name[63:75], - 32: _MapFlags_name[75:95], - 64: _MapFlags_name[95:110], - 128: _MapFlags_name[110:127], - 256: _MapFlags_name[127:144], - 512: _MapFlags_name[144:155], - 1024: _MapFlags_name[155:169], - 2048: _MapFlags_name[169:189], - 4096: _MapFlags_name[189:204], + 1: _MapFlags_name[0:17], + 2: _MapFlags_name[17:36], + 4: _MapFlags_name[36:51], + 8: _MapFlags_name[51:63], + 16: _MapFlags_name[63:75], + 32: _MapFlags_name[75:95], + 64: _MapFlags_name[95:110], + 128: _MapFlags_name[110:127], + 256: _MapFlags_name[127:144], + 512: _MapFlags_name[144:155], + 1024: _MapFlags_name[155:169], + 2048: _MapFlags_name[169:189], + 4096: _MapFlags_name[189:204], + 8192: _MapFlags_name[204:214], + 16384: _MapFlags_name[214:227], } func (i MapFlags) String() string { diff --git a/internal/sys/syscall.go b/internal/sys/syscall.go index 088e82eea..b1d49b870 100644 --- a/internal/sys/syscall.go +++ b/internal/sys/syscall.go @@ -139,6 +139,17 @@ const ( BPF_F_MMAPABLE BPF_F_PRESERVE_ELEMS BPF_F_INNER_MAP + BPF_F_LINK + BPF_F_PATH_FD +) + +// Flags used by bpf_mprog. +const ( + BPF_F_REPLACE = 1 << (iota + 2) + BPF_F_BEFORE + BPF_F_AFTER + BPF_F_ID + BPF_F_LINK_MPROG = 1 << 13 // aka BPF_F_LINK ) // wrappedErrno wraps syscall.Errno to prevent direct comparisons with diff --git a/internal/unix/types_linux.go b/internal/unix/types_linux.go index 51ed7d059..bc6372401 100644 --- a/internal/unix/types_linux.go +++ b/internal/unix/types_linux.go @@ -25,6 +25,7 @@ const ( EACCES = linux.EACCES EILSEQ = linux.EILSEQ EOPNOTSUPP = linux.EOPNOTSUPP + ESTALE = linux.ESTALE ) const ( diff --git a/internal/unix/types_other.go b/internal/unix/types_other.go index 1760e9e79..3a0f79cd3 100644 --- a/internal/unix/types_other.go +++ b/internal/unix/types_other.go @@ -27,6 +27,7 @@ const ( EACCES EILSEQ EOPNOTSUPP + ESTALE ) // Constants are distinct to avoid breaking switch statements. diff --git a/link/anchor.go b/link/anchor.go new file mode 100644 index 000000000..983296d2e --- /dev/null +++ b/link/anchor.go @@ -0,0 +1,135 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +const anchorFlags = sys.BPF_F_REPLACE | + sys.BPF_F_BEFORE | + sys.BPF_F_AFTER | + sys.BPF_F_ID | + sys.BPF_F_LINK_MPROG + +// Anchor is a reference to a link or program. +// +// It is used to describe where an attachment or detachment should take place +// for link types which support multiple attachment. +type Anchor interface { + // anchor returns an fd or ID and a set of flags. + // + // By default fdOrID is taken to reference a program, but BPF_F_LINK_MPROG + // changes this to refer to a link instead. + // + // BPF_F_BEFORE, BPF_F_AFTER, BPF_F_REPLACE modify where a link or program + // is attached. The default behaviour if none of these flags is specified + // matches BPF_F_AFTER. + anchor() (fdOrID, flags uint32, _ error) +} + +type firstAnchor struct{} + +func (firstAnchor) anchor() (fdOrID, flags uint32, _ error) { + return 0, sys.BPF_F_BEFORE, nil +} + +func First() Anchor { + return firstAnchor{} +} + +type lastAnchor struct{} + +func (lastAnchor) anchor() (fdOrID, flags uint32, _ error) { + return 0, sys.BPF_F_AFTER, nil +} + +func Last() Anchor { + return lastAnchor{} +} + +// Before is the position just in front of target. +func BeforeLink(target Link) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterLink(target Link) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Before is the position just in front of target. +func BeforeLinkByID(target ID) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterLinkByID(target ID) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Before is the position just in front of target. +func BeforeProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Replace the target itself. +func ReplaceProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_REPLACE} +} + +// Before is the position just in front of target. +func BeforeProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Replace the target itself. +func ReplaceProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_REPLACE} +} + +type anchor struct { + target any + position uint32 +} + +func (ap anchor) anchor() (fdOrID, flags uint32, _ error) { + var typeFlag uint32 + switch target := ap.target.(type) { + case *ebpf.Program: + fd := target.FD() + if fd < 0 { + return 0, 0, sys.ErrClosedFd + } + fdOrID = uint32(fd) + typeFlag = 0 + case ebpf.ProgramID: + fdOrID = uint32(target) + typeFlag = sys.BPF_F_ID + case interface{ FD() int }: + fd := target.FD() + if fd < 0 { + return 0, 0, sys.ErrClosedFd + } + fdOrID = uint32(fd) + typeFlag = sys.BPF_F_LINK_MPROG + case ID: + fdOrID = uint32(target) + typeFlag = sys.BPF_F_LINK_MPROG | sys.BPF_F_ID + default: + return 0, 0, fmt.Errorf("invalid target %T", ap.target) + } + + return fdOrID, ap.position | typeFlag, nil +} diff --git a/link/cgroup.go b/link/cgroup.go index 58e85fe9d..79f3d2b7f 100644 --- a/link/cgroup.go +++ b/link/cgroup.go @@ -143,8 +143,7 @@ func (cg *progAttachCgroup) Update(prog *ebpf.Program) error { // Atomically replacing multiple programs requires at least // 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf // program in MULTI mode") - args.Flags |= uint32(flagReplace) - args.Replace = cg.current + args.Anchor = ReplaceProgram(cg.current) } if err := RawAttachProgram(args); err != nil { diff --git a/link/link.go b/link/link.go index 36acd6ee4..590ea3aec 100644 --- a/link/link.go +++ b/link/link.go @@ -98,6 +98,8 @@ func wrapRawLink(raw *RawLink) (_ Link, err error) { return &kprobeMultiLink{*raw}, nil case PerfEventType: return nil, fmt.Errorf("recovering perf event fd: %w", ErrNotSupported) + case TCXType: + return &tcxLink{*raw}, nil default: return raw, nil } @@ -132,6 +134,7 @@ type TracingInfo sys.TracingLinkInfo type CgroupInfo sys.CgroupLinkInfo type NetNsInfo sys.NetNsLinkInfo type XDPInfo sys.XDPLinkInfo +type TCXInfo sys.TcxLinkInfo // Tracing returns tracing type-specific link info. // @@ -315,6 +318,8 @@ func (l *RawLink) Info() (*Info, error) { case RawTracepointType, IterType, PerfEventType, KprobeMultiType: // Extra metadata not supported. + case TCXType: + extra = &TCXInfo{} default: return nil, fmt.Errorf("unknown link info type: %d", info.Type) } diff --git a/link/program.go b/link/program.go index 67d4f537c..d8a2a15f9 100644 --- a/link/program.go +++ b/link/program.go @@ -2,22 +2,27 @@ package link import ( "fmt" + "runtime" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type RawAttachProgramOptions struct { - // File descriptor to attach to. This differs for each attach type. + // Target to query. This is usually a file descriptor but may refer to + // something else based on the attach type. Target int // Program to attach. Program *ebpf.Program - // Program to replace (cgroups). - Replace *ebpf.Program - // Attach must match the attach type of Program (and Replace). + // Attach must match the attach type of Program. Attach ebpf.AttachType - // Flags control the attach behaviour. This differs for each attach type. + // Attach relative to an anchor. Optional. + Anchor Anchor + // Flags control the attach behaviour. Specify an Anchor instead of + // F_LINK, F_ID, F_BEFORE, F_AFTER and F_REPLACE. Optional. Flags uint32 + // Only attach if the internal revision matches the given value. + ExpectedRevision uint64 } // RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH. @@ -25,45 +30,72 @@ type RawAttachProgramOptions struct { // You should use one of the higher level abstractions available in this // package if possible. func RawAttachProgram(opts RawAttachProgramOptions) error { - var replaceFd uint32 - if opts.Replace != nil { - replaceFd = uint32(opts.Replace.FD()) + if opts.Flags&anchorFlags != 0 { + return fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.ProgAttachAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachBpfFd: uint32(opts.Program.FD()), - ReplaceBpfFd: replaceFd, AttachType: uint32(opts.Attach), AttachFlags: uint32(opts.Flags), + ExpectedRevision: opts.ExpectedRevision, + } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return fmt.Errorf("attach program: %w", err) + } + + if flags == sys.BPF_F_REPLACE { + // Ensure that replacing a program works on old kernels. + attr.ReplaceBpfFd = fdOrID + } else { + attr.RelativeFdOrId = fdOrID + attr.AttachFlags |= flags + } } if err := sys.ProgAttach(&attr); err != nil { if haveFeatErr := haveProgAttach(); haveFeatErr != nil { return haveFeatErr } - return fmt.Errorf("can't attach program: %w", err) + return fmt.Errorf("attach program: %w", err) } + runtime.KeepAlive(opts.Program) return nil } -type RawDetachProgramOptions struct { - Target int - Program *ebpf.Program - Attach ebpf.AttachType -} +type RawDetachProgramOptions RawAttachProgramOptions // RawDetachProgram is a low level wrapper around BPF_PROG_DETACH. // // You should use one of the higher level abstractions available in this // package if possible. func RawDetachProgram(opts RawDetachProgramOptions) error { + if opts.Flags&anchorFlags != 0 { + return fmt.Errorf("disallowed flags: use Anchor to specify attach target") + } + attr := sys.ProgDetachAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachBpfFd: uint32(opts.Program.FD()), AttachType: uint32(opts.Attach), + ExpectedRevision: opts.ExpectedRevision, } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return fmt.Errorf("detach program: %w", err) + } + + attr.RelativeFdOrId = fdOrID + attr.AttachFlags |= flags + } + if err := sys.ProgDetach(&attr); err != nil { if haveFeatErr := haveProgAttach(); haveFeatErr != nil { return haveFeatErr diff --git a/link/program_test.go b/link/program_test.go index 0e32f335f..2d56212b4 100644 --- a/link/program_test.go +++ b/link/program_test.go @@ -1,10 +1,14 @@ package link import ( + "fmt" + "net" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" + + qt "github.com/frankban/quicktest" ) func TestProgramAlter(t *testing.T) { @@ -42,3 +46,90 @@ func TestProgramAlter(t *testing.T) { t.Fatal(err) } } + +func TestRawAttachProgramAnchor(t *testing.T) { + testutils.SkipOnOldKernel(t, "6.6", "attach anchor") + + iface, err := net.InterfaceByName("lo") + qt.Assert(t, err, qt.IsNil) + + a := mustLoadProgram(t, ebpf.SchedCLS, 0, "") + info, err := a.Info() + qt.Assert(t, err, qt.IsNil) + aID, _ := info.ID() + + err = RawAttachProgram(RawAttachProgramOptions{ + Target: iface.Index, + Program: a, + Attach: ebpf.AttachTCXIngress, + }) + qt.Assert(t, err, qt.IsNil) + defer RawDetachProgram(RawDetachProgramOptions{ + Target: iface.Index, + Program: a, + Attach: ebpf.AttachTCXIngress, + }) + + link, err := AttachTCX(TCXOptions{ + Interface: iface.Index, + Program: mustLoadProgram(t, ebpf.SchedCLS, 0, ""), + Attach: ebpf.AttachTCXIngress, + }) + qt.Assert(t, err, qt.IsNil) + defer link.Close() + + linkInfo, err := link.Info() + qt.Assert(t, err, qt.IsNil) + + b := mustLoadProgram(t, ebpf.SchedCLS, 0, "") + + for _, anchor := range []Anchor{ + First(), + Last(), + AfterProgram(a), + AfterProgramByID(aID), + AfterLink(link), + AfterLinkByID(linkInfo.ID), + } { + t.Run(fmt.Sprintf("%T", anchor), func(t *testing.T) { + err := RawAttachProgram(RawAttachProgramOptions{ + Target: iface.Index, + Program: b, + Attach: ebpf.AttachTCXIngress, + Anchor: anchor, + }) + qt.Assert(t, err, qt.IsNil) + + // Detach doesn't allow first or last anchor. + if _, ok := anchor.(firstAnchor); ok { + anchor = nil + } else if _, ok := anchor.(lastAnchor); ok { + anchor = nil + } + + err = RawDetachProgram(RawDetachProgramOptions{ + Target: iface.Index, + Program: b, + Attach: ebpf.AttachTCXIngress, + Anchor: anchor, + }) + qt.Assert(t, err, qt.IsNil) + }) + } + + // Check that legacy replacement with a program works. + err = RawAttachProgram(RawAttachProgramOptions{ + Target: iface.Index, + Program: b, + Attach: ebpf.AttachTCXIngress, + Anchor: ReplaceProgram(a), + }) + qt.Assert(t, err, qt.IsNil) + + err = RawDetachProgram(RawDetachProgramOptions{ + Target: iface.Index, + Program: b, + Attach: ebpf.AttachTCXIngress, + }) + qt.Assert(t, err, qt.IsNil) +} diff --git a/link/syscalls.go b/link/syscalls.go index 4ffde6461..96d6c7b1a 100644 --- a/link/syscalls.go +++ b/link/syscalls.go @@ -24,6 +24,7 @@ const ( XDPType = sys.BPF_LINK_TYPE_XDP PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT KprobeMultiType = sys.BPF_LINK_TYPE_KPROBE_MULTI + TCXType = sys.BPF_LINK_TYPE_TCX ) var haveProgAttach = internal.NewFeatureTest("BPF_PROG_ATTACH", "4.10", func() error { diff --git a/link/tcx.go b/link/tcx.go new file mode 100644 index 000000000..6989af8c9 --- /dev/null +++ b/link/tcx.go @@ -0,0 +1,68 @@ +package link + +import ( + "fmt" + "runtime" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type TCXOptions struct { + // Index of the interface to attach to. + Interface int + // Program to attach. + Program *ebpf.Program + // One of the AttachTCX* constants. + Attach ebpf.AttachType + // Attach relative to an anchor. Optional. + Anchor Anchor + // Only attach if the expected revision matches. + ExpectedRevision uint64 + // Flags control the attach behaviour. Specify an Anchor instead of + // F_LINK, F_ID, F_BEFORE, F_AFTER and R_REPLACE. Optional. + Flags uint32 +} + +func AttachTCX(opts TCXOptions) (Link, error) { + if opts.Interface < 0 { + return nil, fmt.Errorf("interface %d is out of bounds", opts.Interface) + } + + if opts.Flags&anchorFlags != 0 { + return nil, fmt.Errorf("disallowed flags: use Anchor to specify attach target") + } + + attr := sys.LinkCreateTcxAttr{ + ProgFd: uint32(opts.Program.FD()), + AttachType: sys.AttachType(opts.Attach), + TargetIfindex: uint32(opts.Interface), + ExpectedRevision: opts.ExpectedRevision, + Flags: opts.Flags, + } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return nil, fmt.Errorf("attach tcx link: %w", err) + } + + attr.RelativeFdOrId = fdOrID + attr.Flags |= flags + } + + fd, err := sys.LinkCreateTcx(&attr) + runtime.KeepAlive(opts.Program) + runtime.KeepAlive(opts.Anchor) + if err != nil { + return nil, fmt.Errorf("attach tcx link: %w", err) + } + + return &tcxLink{RawLink{fd, ""}}, nil +} + +type tcxLink struct { + RawLink +} + +var _ Link = (*tcxLink)(nil) diff --git a/link/tcx_test.go b/link/tcx_test.go new file mode 100644 index 000000000..77cde74a2 --- /dev/null +++ b/link/tcx_test.go @@ -0,0 +1,90 @@ +package link + +import ( + "fmt" + "math" + "net" + "testing" + + qt "github.com/frankban/quicktest" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/testutils" + "github.com/cilium/ebpf/internal/unix" +) + +func TestAttachTCX(t *testing.T) { + testutils.SkipOnOldKernel(t, "6.6", "TCX link") + + prog := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") + link, _ := mustAttachTCX(t, prog, ebpf.AttachTCXIngress) + + testLink(t, link, prog) +} + +func TestTCXAnchor(t *testing.T) { + testutils.SkipOnOldKernel(t, "6.6", "TCX link") + + a := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") + b := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") + + linkA, iface := mustAttachTCX(t, a, ebpf.AttachTCXEgress) + + programInfo, err := a.Info() + qt.Assert(t, err, qt.IsNil) + programID, _ := programInfo.ID() + + linkInfo, err := linkA.Info() + qt.Assert(t, err, qt.IsNil) + linkID := linkInfo.ID + + for _, anchor := range []Anchor{ + First(), + Last(), + BeforeProgram(a), + BeforeProgramByID(programID), + AfterLink(linkA), + AfterLinkByID(linkID), + } { + t.Run(fmt.Sprintf("%T", anchor), func(t *testing.T) { + linkB, err := AttachTCX(TCXOptions{ + Program: b, + Attach: ebpf.AttachTCXEgress, + Interface: iface, + Anchor: anchor, + }) + qt.Assert(t, err, qt.IsNil) + qt.Assert(t, linkB.Close(), qt.IsNil) + }) + } +} + +func TestTCXExpectedRevision(t *testing.T) { + testutils.SkipOnOldKernel(t, "6.6", "TCX link") + + iface, err := net.InterfaceByName("lo") + qt.Assert(t, err, qt.IsNil) + + _, err = AttachTCX(TCXOptions{ + Program: mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, ""), + Attach: ebpf.AttachTCXEgress, + Interface: iface.Index, + ExpectedRevision: math.MaxUint64, + }) + qt.Assert(t, err, qt.ErrorIs, unix.ESTALE) +} + +func mustAttachTCX(tb testing.TB, prog *ebpf.Program, attachType ebpf.AttachType) (Link, int) { + iface, err := net.InterfaceByName("lo") + qt.Assert(tb, err, qt.IsNil) + + link, err := AttachTCX(TCXOptions{ + Program: prog, + Attach: attachType, + Interface: iface.Index, + }) + qt.Assert(tb, err, qt.IsNil) + tb.Cleanup(func() { qt.Assert(tb, link.Close(), qt.IsNil) }) + + return link, iface.Index +}