Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support to maintain udp conn state #493

Merged
merged 7 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: false
RemoveBracesLLVM: true

# Taken from:
# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ tools/ \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kernel-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
strategy:
fail-fast: false
matrix:
kernel: [ '5.10-20240305.092417', '5.15-20240305.092417', '6.1-20240305.092417', '6.6-20240305.092417' ]
kernel: [ '5.15-20240305.092417', '6.1-20240305.092417', '6.6-20240305.092417' ]
timeout-minutes: 10
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ node_modules/
*.log
.build_tags
.checkpatch-camelcase.git.
venv
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ ebpf: submodule clean-ebpf
go generate ./trace/trace.go && echo trace > $(BUILD_TAGS_FILE) || echo > $(BUILD_TAGS_FILE)

ebpf-lint:
./scripts/checkpatch.pl --no-tree --strict --no-summary --show-types --color=always control/kern/tproxy.c --ignore COMMIT_COMMENT_SYMBOL,NOT_UNIFIED_DIFF,COMMIT_LOG_LONG_LINE,LONG_LINE_COMMENT,VOLATILE,ASSIGN_IN_IF,PREFER_DEFINED_ATTRIBUTE_MACRO,CAMELCASE,LEADING_SPACE,OPEN_ENDED_LINE,SPACING
./scripts/checkpatch.pl --no-tree --strict --no-summary --show-types --color=always control/kern/tproxy.c --ignore COMMIT_COMMENT_SYMBOL,NOT_UNIFIED_DIFF,COMMIT_LOG_LONG_LINE,LONG_LINE_COMMENT,VOLATILE,ASSIGN_IN_IF,PREFER_DEFINED_ATTRIBUTE_MACRO,CAMELCASE,LEADING_SPACE,OPEN_ENDED_LINE,SPACING,BLOCK_COMMENT_STYLE

## End Ebpf
1 change: 1 addition & 0 deletions common/consts/ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ var (
ProgTypeSkLookupFeatureVersion = internal.Version{5, 9, 0}
SockmapFeatureVersion = internal.Version{5, 10, 0}
UserspaceBatchUpdateLpmTrieFeatureVersion = internal.Version{5, 13, 0}
BpfTimerFeatureVersion = internal.Version{5, 15, 0}
HelperBpfGetFuncIpVersionFeatureVersion = internal.Version{5, 15, 0}
)

Expand Down
2 changes: 1 addition & 1 deletion control/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func NewControlPlane(
kernelVersion.String(),
requirement.String())
}
if requirement := consts.SockmapFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) {
if requirement := consts.BpfTimerFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) {
return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again",
kernelVersion.String(),
requirement.String())
Expand Down
30 changes: 30 additions & 0 deletions control/control_plane_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,36 @@ func (c *controlPlaneCore) _bindWan(ifname string) error {
return nil
})

filterIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0x2023, 0b010+uint16(c.flip)),
Protocol: unix.ETH_P_ALL,
Priority: 1,
},
Fd: c.bpf.bpfPrograms.TproxyWanIngress.FD(),
Name: consts.AppName + "_wan_ingress",
DirectAction: true,
}
_ = netlink.FilterDel(filterIngress)
// Remove and add.
if !c.isReload {
// Clean up thoroughly.
filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter)
filterIngressFlipped.FilterAttrs.Handle ^= 1
_ = netlink.FilterDel(filterIngressFlipped)
}
if err := netlink.FilterAdd(filterIngress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.FilterDel(filterIngress); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err)
}
return nil
})

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion control/kern/headers
Submodule headers updated 1 files
+40 −0 bpf_timer.h
205 changes: 159 additions & 46 deletions control/kern/tproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "headers/bpf_core_read.h"
#include "headers/bpf_endian.h"
#include "headers/bpf_helpers.h"
#include "headers/bpf_timer.h"

// #define __DEBUG_ROUTING
// #define __PRINT_ROUTING_RESULT
Expand Down Expand Up @@ -76,6 +77,8 @@

#define ESOCKTNOSUPPORT 94 /* Socket type not supported */

#define TIMEOUT_UDP_CONN_STATE 3e11 /* 300s */

#define NDP_REDIRECT 137

enum { BPF_F_CURRENT_NETNS = -1 };
Expand Down Expand Up @@ -320,7 +323,8 @@ struct port_range {
*
* domain(geosite:cn, suffix: google.com) && l4proto(tcp) -> my_group
*
* pseudocode: domain(geosite:cn || suffix:google.com) && l4proto(tcp) -> my_group
* pseudocode: domain(geosite:cn || suffix:google.com) && l4proto(tcp) ->
* my_group
*
* A match_set can be: IP set geosite:cn, suffix google.com, tcp proto
*/
Expand Down Expand Up @@ -383,6 +387,19 @@ struct {
__uint(pinning, LIBBPF_PIN_BY_NAME);
} cookie_pid_map SEC(".maps");

struct udp_conn_state {
// pass

struct bpf_timer timer;
};

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_DST_MAPPING_NUM);
__type(key, struct tuples_key);
__type(value, struct udp_conn_state);
} udp_conn_state_map SEC(".maps");

// Functions:

static __always_inline __u8 ipv4_get_dscp(const struct iphdr *iph)
Expand Down Expand Up @@ -564,7 +581,8 @@ parse_transport(const struct __sk_buff *skb, __u32 link_h_len,
__builtin_memset(udph, 0, sizeof(struct udphdr));

// bpf_printk("parse_transport: h_proto: %u ? %u %u", ethh->h_proto,
// bpf_htons(ETH_P_IP), bpf_htons(ETH_P_IPV6));
// bpf_htons(ETH_P_IP),
// bpf_htons(ETH_P_IPV6));
if (ethh->h_proto == bpf_htons(ETH_P_IP)) {
ret = bpf_skb_load_bytes(skb, offset, iph,
sizeof(struct iphdr));
Expand Down Expand Up @@ -1024,16 +1042,16 @@ int tproxy_lan_ingress(struct __sk_buff *skb)
get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);

/*
* ip rule add fwmark 0x8000000/0x8000000 table 2023
* ip route add local default dev lo table 2023
* ip -6 rule add fwmark 0x8000000/0x8000000 table 2023
* ip -6 route add local default dev lo table 2023

* ip rule del fwmark 0x8000000/0x8000000 table 2023
* ip route del local default dev lo table 2023
* ip -6 rule del fwmark 0x8000000/0x8000000 table 2023
* ip -6 route del local default dev lo table 2023
*/
* ip rule add fwmark 0x8000000/0x8000000 table 2023
* ip route add local default dev lo table 2023
* ip -6 rule add fwmark 0x8000000/0x8000000 table 2023
* ip -6 route add local default dev lo table 2023

* ip rule del fwmark 0x8000000/0x8000000 table 2023
* ip route del local default dev lo table 2023
* ip -6 rule del fwmark 0x8000000/0x8000000 table 2023
* ip -6 route del local default dev lo table 2023
*/
// Socket lookup and assign skb to existing socket connection.
struct bpf_sock_tuple tuple = { 0 };
__u32 tuple_size;
Expand Down Expand Up @@ -1119,12 +1137,14 @@ int tproxy_lan_ingress(struct __sk_buff *skb)
/// NOTICE: No pid pname info for LAN packet.
// // Maybe this packet is also in the host (such as docker) ?
// // I tried and it is false.
// __u64 cookie = bpf_get_socket_cookie(skb);
// struct pid_pname *pid_pname = bpf_map_lookup_elem(&cookie_pid_map,
// &cookie); if (pid_pname) {
// __builtin_memcpy(routing_result.pname, pid_pname->pname, TASK_COMM_LEN);
// routing_result.pid = pid_pname->pid;
// }
//__u64 cookie = bpf_get_socket_cookie(skb);
//struct pid_pname *pid_pname =
// bpf_map_lookup_elem(&cookie_pid_map, &cookie);
//if (pid_pname) {
// __builtin_memcpy(routing_result.pname, pid_pname->pname,
// TASK_COMM_LEN);
// routing_result.pid = pid_pname->pid;
//}

// Save routing result.
ret = bpf_map_update_elem(&routing_tuples_map, &tuples.five,
Expand Down Expand Up @@ -1208,28 +1228,113 @@ static __always_inline bool pid_is_control_plane(struct __sk_buff *skb,
if ((skb->mark & 0x100) == 0x100) {
bpf_printk("No pid_pname found. But it should not happen");
/*
* if (l4proto == IPPROTO_TCP) {
*if (tcph.syn && !tcph.ack) {
* bpf_printk("No pid_pname found. But it should not happen: local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
*} else {
* bpf_printk("No pid_pname found. But it should not happen: (Old "
* "Connection): local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
*}
* } else {
*bpf_printk("No pid_pname found. But it should not happen: local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
* }
*/
* if (l4proto == IPPROTO_TCP) {
*if (tcph.syn && !tcph.ack) {
* bpf_printk("No pid_pname found. But it should not happen: local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
*} else {
* bpf_printk("No pid_pname found. But it should not happen: (Old "
* "Connection): local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
*}
* } else {
*bpf_printk("No pid_pname found. But it should not happen: local:%u "
* "(%u)[%llu]",
* bpf_ntohs(sport), l4proto, cookie);
* }
*/
return true;
}
return false;
}

static int refresh_udp_conn_state_timer_cb(void *_udp_conn_state_map,
struct tuples_key *key,
struct udp_conn_state *val)
{
bpf_map_delete_elem(&udp_conn_state_map, key);
return 0;
}

static __always_inline void copy_reversed_tuples(struct tuples_key *key,
struct tuples_key *dst)
{
__builtin_memset(dst, 0, sizeof(*dst));
dst->dip = key->sip;
dst->sip = key->dip;
dst->sport = key->dport;
dst->dport = key->sport;
dst->l4proto = key->l4proto;
}

static __always_inline int refresh_udp_conn_state_timer(struct tuples_key *key)
{
struct udp_conn_state new_output_state = { 0 };
int ret = bpf_map_update_elem(&udp_conn_state_map, key,
&new_output_state, BPF_ANY);
if (unlikely(ret))
return -EINVAL;
struct udp_conn_state *value =
bpf_map_lookup_elem(&udp_conn_state_map, key);
if (unlikely(!value))
return -EFAULT;

ret = bpf_timer_init(&value->timer, &udp_conn_state_map,
CLOCK_MONOTONIC);
if (unlikely(ret))
goto del;

ret = bpf_timer_set_callback(&value->timer,
refresh_udp_conn_state_timer_cb);
if (unlikely(ret))
goto del;

ret = bpf_timer_start(&value->timer, TIMEOUT_UDP_CONN_STATE, 0);
if (unlikely(ret))
goto del;

return 0;
del:
bpf_map_delete_elem(&udp_conn_state_map, key);
return -EFAULT;
}

SEC("tc/wan_ingress")
int tproxy_wan_ingress(struct __sk_buff *skb)
{
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
struct icmp6hdr icmp6h;
struct tcphdr tcph;
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;

if (get_link_h_len(skb->ifindex, &link_h_len))
return TC_ACT_OK;
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
if (ret)
return TC_ACT_OK;
if (l4proto != IPPROTO_UDP)
return TC_ACT_PIPE;

struct tuples tuples;
struct tuples_key reversed_tuples_key;

get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);
copy_reversed_tuples(&tuples.five, &reversed_tuples_key);

if (refresh_udp_conn_state_timer(&reversed_tuples_key))
return TC_ACT_SHOT;

return TC_ACT_PIPE;
}

// Routing and redirect the packet back.
// We cannot modify the dest address here. So we cooperate with wan_ingress.
SEC("tc/wan_egress")
Expand All @@ -1239,7 +1344,7 @@ int tproxy_wan_egress(struct __sk_buff *skb)
if (skb->ingress_ifindex != NOWHERE_IFINDEX)
return TC_ACT_OK;
// if ((skb->mark & 0x80) == 0x80) {
// return TC_ACT_OK;
// return TC_ACT_OK;
// }

struct ethhdr ethh;
Expand Down Expand Up @@ -1401,6 +1506,13 @@ int tproxy_wan_egress(struct __sk_buff *skb)
flag[6] = tuples.dscp;
struct pid_pname *pid_pname;

if (bpf_map_lookup_elem(&udp_conn_state_map, &tuples.five)) {
if (refresh_udp_conn_state_timer(&tuples.five))
return TC_ACT_SHOT;

return TC_ACT_OK;
}

if (pid_is_control_plane(skb, &pid_pname)) {
// From control plane. Direct.
return TC_ACT_OK;
Expand Down Expand Up @@ -1491,20 +1603,21 @@ int tproxy_wan_egress(struct __sk_buff *skb)
SEC("tc/dae0peer_ingress")
int tproxy_dae0peer_ingress(struct __sk_buff *skb)
{
/* Only packets redirected from wan_egress or lan_ingress have this cb mark. */
/* Only packets redirected from wan_egress or lan_ingress have this cb mark.
*/
if (skb->cb[0] != TPROXY_MARK)
return TC_ACT_SHOT;

/* ip rule add fwmark 0x8000000/0x8000000 table 2023
* ip route add local default dev lo table 2023
*/
* ip route add local default dev lo table 2023
*/
skb->mark = TPROXY_MARK;
bpf_skb_change_type(skb, PACKET_HOST);

/* l4proto is stored in skb->cb[1] only for UDP and new TCP. As for
* established TCP, kernel can take care of socket lookup, so just
* return them to stack without calling bpf_sk_assign.
*/
* established TCP, kernel can take care of socket lookup, so just
* return them to stack without calling bpf_sk_assign.
*/
__u8 l4proto = skb->cb[1];

if (l4proto != 0)
Expand Down Expand Up @@ -1585,9 +1698,9 @@ static __always_inline int _update_map_elem_by_cookie(const __u64 cookie)
unsigned long arg_end = BPF_CORE_READ(current, mm, arg_end);

/*
* For string like: /usr/lib/sddm/sddm-helper --socket /tmp/sddm-auth1
* We extract "sddm-helper" from it.
*/
* For string like: /usr/lib/sddm/sddm-helper --socket /tmp/sddm-auth1
* We extract "sddm-helper" from it.
*/
unsigned long loc, j, last_slash = -1;
#pragma unroll
for (loc = 0, j = 0; j < MAX_ARG_LEN_TO_PROBE;
Expand All @@ -1609,7 +1722,7 @@ static __always_inline int _update_map_elem_by_cookie(const __u64 cookie)
(const void *)(arg_start + j));
if (ret) {
// bpf_printk("failed to read process name.0: [%ld, %ld]", arg_start,
// arg_end);
// arg_end);
// bpf_printk("_failed to read process name.0: %ld %ld", j, to_read);
return ret;
}
Expand Down
Loading
Loading