[ON-GOING] CVE-2024-36971 N-day, attempt at PoC

2024-10-12

Overall Description

Described as a Race condition with a UAF in the UDP cache Linux Kernel.

Good case

In the documentation, this was described as how the orders of operations is suppose to be.

static inline void sk_dst_reset(struct sock *sk) {
	sk_dst_set(sk, NULL);
}

----> 
static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) {
	struct dst_entry *old_dst;

	sk_tx_queue_clear(sk); // clear tx queue NULL 
	sk->sk_dst_pending_confirm = 0; // sets a flag to NULL 
	old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); excahnge everything with NULL
	dst_release(old_dst); // release with NULL structure.
}

----> 
void dst_release(struct dst_entry *dst) { //A NULL structure of dst_entry
	if (dst) {
		int newrefcnt;

		newrefcnt = atomic_dec_return(&dst->__refcnt);
		if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
					     __func__, dst, newrefcnt);
		if (!newrefcnt)
			call_rcu(&dst->rcu_head, dst_destroy_rcu); //removed.
	}
}

The Patch Diff.

Here is the patch Diff/vulnable code functions.

int sk_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) {
	// ...
	case SO_CNX_ADVICE:
		if (val == 1)
			dst_negative_advice(sk);

	// ...
}

static inline void dst_negative_advice(struct sock *sk) {
	struct dst_entry *ndst, *dst = __sk_dst_get(sk);

	if (dst && dst->ops->negative_advice) {  // check against NULL
		ndst = dst->ops->negative_advice(dst);
		if (ndst != dst) {
			rcu_assign_pointer(sk->sk_dst_cache, ndst);
			sk_tx_queue_clear(sk);
			sk->sk_dst_pending_confirm = 0;
		}
	}
}

static inline struct dst_entry * __sk_dst_get(struct sock *sk) {
	return rcu_dereference_check(sk->sk_dst_cache, lockdep_sock_is_held(sk));
}

static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) {
	struct rtable *rt = (struct rtable *)dst;
	struct dst_entry *ret = dst;

	if (rt) {
		if (dst->obsolete > 0) {
			ip_rt_put(rt);
			ret = NULL;
		} else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->dst.expires) {
			ip_rt_put(rt);
			ret = NULL;
		}
	}
	return ret;
}

The Structures

some notable structures.

struct socket {
    socket_state            state;
    short                   type;
    unsigned long           flags;
    struct socket_wq __rcu  *wq;
    struct file             *file;
    struct sock             *sk; // HERE
    const struct proto_ops  *ops;
};

struct sock {
    socket_lock_t sk_lock;
    atomic_t sk_drops;
    int sk_rcvlowat;
    struct sk_buff_head sk_error_queue;
    struct sk_buff_head sk_receive_queue;
    struct dst_entry * sk_rx_dst;
    struct dst_entry __rcu * sk_dst_cache;
    atomic_t sk_omem_alloc;
    int sk_sndbuf;
    int sk_wmem_queued;
    refcount_t sk_wmem_alloc;
    unsigned long sk_tsq_flags;
    struct sk_buff * sk_send_head;
    struct sk_buff_head sk_write_queue;
    __s32 sk_peek_off;
    int sk_write_pending;
    __u32 sk_dst_pending_confirm;
    u32 sk_pacing_status;
    long sk_sndtimeo;
    struct timer_list sk_timer;
    __u32 sk_priority;
    __u32 sk_mark;
    u32 sk_pacing_rate;
    u32 sk_max_pacing_rate;
    struct page_frag sk_frag;
    netdev_features_t sk_route_caps;
    netdev_features_t sk_route_nocaps;
    int sk_gso_type;
    unsigned int sk_gso_max_size;
    gfp_t sk_allocation;
    __u32 sk_txhash;
    unsigned int __sk_flags_offset;
    unsigned int sk_padding:1;
    unsigned int sk_kern_sock:1;
    unsigned int sk_no_check_tx:1;
    unsigned int sk_no_check_rx:1;
    unsigned int sk_userlocks:4;
    unsigned int sk_protocol:8;
    unsigned int sk_type:16;
};

struct dst_entry {
	struct net_device *dev;
	struct dst_ops *ops;
	unsigned long _metrics;
	unsigned long expires;
	struct xfrm_state *xfrm;
	void *__pad1;
	int (*input)(struct sk_buff *);
	int	(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
	unsigned short flags;
	short obsolete;
	unsigned short header_len;	
	unsigned short trailer_len;
	atomic_t __refcnt;	/* 64-bit offset 64 */
	int __use;
	unsigned long lastuse;
	struct lwtunnel_state *lwtstate;
	struct rcu_head rcu_head;
    short error;
	short __pad;
	__u32 tclassid;
	atomic_t __refcnt;	/* 32-bit offset 64 */
	netdevice_tracker	dev_tracker;
};

struct dst_ops {
    unsigned short family;
    __be16 protocol;
    unsigned int gc_thresh;
    int (*gc)(struct dst_ops *ops);
    struct dst_entry *  (*check)(struct dst_entry *, __u32 cookie);
    unsigned int (*default_advmss)(const struct dst_entry *);
    unsigned int (*mtu)(const struct dst_entry *);
    u32 *(*cow_metrics)(struct dst_entry *, unsigned long);
    void (*destroy)(struct dst_entry *);
    void (*ifdown)(struct dst_entry *,struct net_device *dev, int how);
    struct dst_entry *  (*negative_advice)(struct dst_entry *);
    void (*link_failure)(struct sk_buff *);
    void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,struct sk_buff *skb, u32 mtu);
    void (*redirect)(struct dst_entry *dst, struct sock *sk,struct sk_buff *skb);
    int (*local_out)(struct sk_buff *skb);
    struct neighbour *(*neigh_lookup)(const struct dst_entry *dst,struct sk_buff *skb, const void *daddr);
    struct kmem_cache *kmem_cachep;
    struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp;
};
static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
		      struct msghdr *msg, size_t size) {
	struct sock *sk = sock->sk;
	struct dn_scp *scp = DN_SK(sk);
	size_t mss;
	struct sk_buff_head *queue = &scp->data_xmit_queue;
	int flags = msg->msg_flags;
	int err = 0;
	size_t sent = 0;
	int addr_len = msg->msg_namelen;
	DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name);
	struct sk_buff *skb = NULL;
	struct dn_skb_cb *cb;
	size_t len;
	unsigned char fctype;
	long timeo;

	if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
		return -EOPNOTSUPP;

	if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
		return -EINVAL;

	lock_sock(sk);
	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
	/*
	 * The only difference between stream sockets and sequenced packet
	 * sockets is that the stream sockets always behave as if MSG_EOR
	 * has been set.
	 */
	if (sock->type == SOCK_STREAM) {
		if (flags & MSG_EOR) {
			err = -EINVAL;
			goto out;
		}
		flags |= MSG_EOR;
	}


	err = dn_check_state(sk, addr, addr_len, &timeo, flags);
	if (err)
		goto out_err;

	if (sk->sk_shutdown & SEND_SHUTDOWN) {
		err = -EPIPE;
		if (!(flags & MSG_NOSIGNAL))
			send_sig(SIGPIPE, current, 0);
		goto out_err;
	}

	if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
		dst_negative_advice(sk);

	mss = scp->segsize_rem;
	fctype = scp->services_rem & NSP_FC_MASK;

	mss = dn_current_mss(sk, flags);

	if (flags & MSG_OOB) {
		queue = &scp->other_xmit_queue;
		if (size > mss) {
			err = -EMSGSIZE;
			goto out;
		}
	}