通过 netlink 从内核向用户 space 单播数据失败

Failure while unicast data from kernel to user space via netlink

我是内核开发的新手,在通过 netlink 套接字将数据从内核 space 单向投射到用户 space 时遇到了问题。 send_nat() 函数将从内核模块调用以将用户定义的 struct nat_mntr 写入 netlink 套接字.但是 nlmsg_unicast() 一直失败,即使我在 cfg 中尝试了不同的配置。请帮助我找出代码中的错误。

userdefined.c

    int no_data_request = 1;
    EXPORT_SYMBOL(no_data_request);
    int request_pid = 0; // PID of requesting process
    EXPORT_SYMBOL(request_pid);

void send_nat(struct sock *nl_sk, struct nat_mntr *nat_data, int pid, int group, gfp_t flags, int *sock_closed){

        struct nlmsghdr *nlh;
        struct sk_buff *skb_out;
        int msg_size;
        int res = 200;
        #define MYPROTO 31

        printk(KERN_DEBUG "%s: Entered \n", __FUNCTION__);
        if ( nat_data == NULL ){
                printk(KERN_DEBUG "%s: nat_data is NULL: Leaving \n", __FUNCTION__);
                return ;
        }else {
                printk(KERN_DEBUG "%s: nat_data is filled \n", __FUNCTION__);
        }

        if (nl_sk == NULL) {

                printk(KERN_DEBUG "%s: nl_sk is NULL  \n", __FUNCTION__);
        }else
                printk(KERN_DEBUG "%s: nl_sock is not null \n", __FUNCTION__);

        if (*sock_closed == 1) {

                printk(KERN_DEBUG "%s: sock_closed == 1, creating socket \n", __FUNCTION__);
                struct netlink_kernel_cfg cfg  = {
                        .groups         = 1,
                        .input          = rr,
                };
                nl_sk  = netlink_kernel_create(&init_net, MYPROTO, &cfg);

                if (!nl_sk) {
                        printk(KERN_DEBUG "%s: Error creating socket: sock_closed = %d:  Leaving  \n",  __FUNCTION__ , *sock_closed);
                        return ;
                }
                else {
                        *sock_closed = 0;
                        printk(KERN_DEBUG "%s: Socket created successfully: sock_closed = %d  \n",  __FUNCTION__ , *sock_closed );
                }
        }
        else if (*sock_closed == 0 ){
                printk(KERN_DEBUG "%s:Already created socket.  sock_closed = 0 \n",  __FUNCTION__);
        }
        else {
                printk(KERN_DEBUG "%s: sock_closed status is unknown: sock_closed = %d Leaving  \n",  __FUNCTION__ , *sock_closed);
                return ;
        }

        if( no_data_request){
                printk(KERN_DEBUG "%s: No one has requested data: Leaving  \n",  __FUNCTION__);
                return ;
        }else{
                printk(KERN_DEBUG "%s: Process %d requested the data\n",  __FUNCTION__, request_pid );
        }
        msg_size = sizeof(struct nat_mntr);
        skb_out = nlmsg_new(msg_size, 0);

        if ( !skb_out ) {
                printk(KERN_DEBUG "%s: Failed to skb_out = nlmsg_new(msg_size, 0): Leaving \n",  __FUNCTION__);
                return;
        }

        nlh = nlmsg_put(skb_out, 0, 0, NLMSG_DONE, msg_size, 0); /* NLMSG_DONE */
        NETLINK_CB(skb_out).dst_group = 0; /* not in mcast group */

        if (!nlh) {
                printk(KERN_DEBUG "%s: Failed nlh = nlmsg_put(skb_out, 0, 1,  NLMSG_DONE, msg_size, 0): Leaving  \n",  __FUNCTION__);
                return ;
        }
        else {
                printk(KERN_DEBUG "%s: Successfull nlh = nlmsg_put(skb_out, 0, 0,  NLMSG_DONE, msg_size, 0)  \n",  __FUNCTION__);
        }

        if(memcpy(nlmsg_data(nlh), nat_data , sizeof(nat_data) ) == NULL ) {
                printk(KERN_DEBUG "%s: Failed to memcpy(nlmsg_data(nlh), nat_data , sizeof(struct nat_mntr)) Leaving  \n",  __FUNCTION__);
                return ;
        }
        res =  nlmsg_unicast(nl_sk, skb_out2, request_pid);
        if (res < 0 ){
                printk(KERN_DEBUG "%s: Failed to  nlmsg_unicast(nl_sk, skb_out, request_pid): Leaving \n",  __FUNCTION__);
                return ;
        }
        printk(KERN_DEBUG "%s: Data sent successfully : Leaving \n",  __FUNCTION__);
}


// Callback of kernel socket. 
void rr(struct sk_buff *skb){
        printk(KERN_DEBUG "%s: Entered \n", __FUNCTION__);
        struct nlmsghdr *nlh;
        nlh = (struct nlmsghdr *)skb->data;
        printk(KERN_DEBUG "Request received \n");
        request_pid = nlh->nlmsg_pid; /* pid of sending process */
        no_data_request = 0; // Someone is out there
        printk(KERN_DEBUG "%s: Leaving:\n", __FUNCTION__);
}

kernel_module.c

#define NAT_GROUP 21
struct sock *nl_sk_ud = NULL;
EXPORT_SYMBOL(nl_sk_ud);
int sock_closed = 1;
EXPORT_SYMBOL(sock_closed);
struct nat_mntr *data = NULL;
EXPORT_SYMBOL(data);

any_kernel_function(){

....

data = get_info(skb, 0, l3proto, l4proto, &target, mtype); // Returns pointer to struct nat_mntr
send_nat(nl_sk_ud, data,  0, NAT_GROUP, 0, &sock_closed);

....

}

如果您的内核模块写入一个答案,那么用户空间请求将收到两个响应:一个 ACK​​(由内核自动制作)和实际响应。

我认为人们不会注意到这一点,因为通常内核模块会在 ACK 之前快速响应。因此,用户空间客户端收到答案并忽略接下来的任何内容(包括 ACK),直到下一个请求。

在您的代码中,内核模块没有立即响应。它会等到数据可用,然后再获取数据。这可能会发生:

  1. 用户空间客户端发送请求。
  2. 内核模块存储 pid。
  3. Linux 向客户端回复 ACK。
  4. 客户端收到应答,错误地将其解析为伪造的 nat_mntr 而不是 ACK,并关闭套接字。
  5. 内核模块一旦有了数据就发送应答。 nlmsg_unicast() returns 错误代码 -111 因为客户端不再监听。

解决此问题的一种方法是让客户端期待两个数据包 - 并忽略第一个数据包,即 ACK。


顺便说一句:这不是您的代码的唯一问题。

  • 当你这样做时

    nl_sk = netlink_kernel_create(&init_net, MYPROTO, &cfg);

您正在将套接字分配给局部变量。如果再次调用该函数,即使 sock_closed 为 0,nl_sk will 也不会被初始化。

  • 您永远不会释放套接字。因为您不能在内核空间中以相同的协议打开两个套接字,所以后续的套接字创建将无法挽回地失败,直到您重新启动。 (例如,如果您需要重新编译,这将影响您。)
  • 代码很活泼。至少,no_data_requestrequest_pid 应该是原子整数。
  • 不要这样做:

    nlh = (struct nlmsghdr *)skb->data;

这个比较好:

nlh = nlmsg_hdr(skb);

这甚至更好(因为它为您做了一些验证和 Netlink 文书工作):

netlink_rcv_skb(skb, &rr2);