"criu dump重要步骤的深入研究四"

  "criu checkpoint七"

Posted by Xu on June 13, 2017

CRIU,DUMP重要步骤的解析

经过前三个部分的研究,我们掌握了dump过程中信息收集阶段的前三个主要步骤:进程树的收集,进程树的id信息收集,网络锁操作,今天进入信息收集的最后一个步骤解析:命名空间的信息收集

collect_namespace

collect_namespaces():三个步骤:收集用户命名空间,mnt命名空间和网络命名空间

int collect_namespaces(bool for_dump)
{
	int ret;

	ret = collect_user_namespaces(for_dump);
	if (ret < 0)
		return ret;

	ret = collect_mnt_namespaces(for_dump);
	if (ret < 0)
		return ret;

	ret = collect_net_namespaces(for_dump);
	if (ret < 0)
		return ret;

	return 0;
}

第一个步骤:收集user_namespaces,collect_user_namespaces(for_dump)

int collect_user_namespaces(bool for_dump)
{
	if (!for_dump)
		return 0;

	if (!(root_ns_mask & CLONE_NEWUSER))
		return 0;

	return walk_namespaces(&user_ns_desc, collect_user_ns, NULL);
}

int walk_namespaces(struct ns_desc *nd, int (*cb)(struct ns_id *, void *), void *oarg)
{
	int ret = 0;
	struct ns_id *ns;

	for (ns = ns_ids; ns != NULL; ns = ns->next) {
	//同样是以ns_ids为入口,遍历所有命名空间,匹配到user_ns_desc后执行cb(ns,NULL),这里的cb函数其实就是collect_user_ns函数
		if (ns->nd != nd)
			continue;

		if (ns->type == NS_CRIU) {
			if (root_ns_mask & nd->cflag)
				continue;

			ret = cb(ns, oarg);//执行collect_user_ns函数
			break;
		}

		ret = cb(ns, oarg);//执行collect_user_ns函数
		if (ret)
			break;
	}

	return ret;
}

int collect_user_ns(struct ns_id *ns, void *oarg)
//一个函数调用

{
	/*
	 * User namespace is dumped before files to get uid and gid
	 * mappings, which are used for convirting local id-s to
	 * userns id-s (userns_uid(), userns_gid())
	 */
	if (dump_user_ns(root_item->pid->real, root_item->ids->user_ns_id))
		return -1;

	return 0;
}

---
int dump_user_ns(pid_t pid, int ns_id)
{
	int ret, exit_code = -1;
	UsernsEntry *e = &userns_entry;
	struct cr_img *img;

	ret = parse_id_map(pid, "uid_map", &e->uid_map);//解析uid映射关系
	if (ret < 0)
		goto err;
	e->n_uid_map = ret;

	ret = parse_id_map(pid, "gid_map", &e->gid_map);//解析gid映射关系
	if (ret < 0)
		goto err;
	e->n_gid_map = ret;

	if (check_user_ns(pid))//fork()一个子进程设置gid和uid,prctl(PR_SET_KEEPCAPS, 1)表示当gid,uid修改时,进程依然具有响应能力,最后调用switch_ns来切换所有命名空间,父进程等待子进程返回状态。
		return -1;

	img = open_image(CR_FD_USERNS, O_DUMP, ns_id);//打开镜像
	if (!img)
		goto err;
	ret = pb_write_one(img, e, PB_USERNS);//写入镜像
	close_image(img);
	if (ret < 0)
		goto err;

	return 0;
err:
	if (e->uid_map) {
		xfree(e->uid_map[0]);
		xfree(e->uid_map);
	}
	if (e->gid_map) {
		xfree(e->gid_map[0]);
		xfree(e->gid_map);
	}
	return exit_code;
}

第二个步骤:mnt_namespace信息的收集

int collect_mnt_namespaces(bool for_dump)
{
	struct collect_mntns_arg arg;
	int ret;

	arg.for_dump = for_dump;
	arg.need_to_validate = false;

	ret = walk_namespaces(&mnt_ns_desc, collect_mntns, &arg);//同用户命名空间信息的收集,主要是调用collect_mntns,返回挂载信息mount_info pm
	if (ret)
		goto err;

 #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED
	if (for_dump && !opts.has_binfmt_misc) {
	 //binfmt_misc:是指特定的文件类型用特定的应用程序打开的功能
		unsigned int s_dev = 0;
		struct ns_id *ns;

		for (ns = ns_ids; ns != NULL; ns = ns->next) {
			if (ns->type == NS_ROOT && ns->nd == &mnt_ns_desc)
				break;//通过ns_ids找到指定的mnt命名空间
		}

		if (ns) {
			ret = mount_cr_time_mount(ns, &s_dev, "binfmt_misc", BINFMT_MISC_HOME,
						  "binfmt_misc");//首先切换到mnt命名空间,将binfmt_misc目录挂载到"/proc/sys/fs/binfmt_misc"
			if (ret == -EPERM)
				pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
			else if (ret < 0 && ret != -EBUSY && ret != -ENODEV && ret != -ENOENT) {
				pr_err("Can't mount binfmt_misc: %d %s\n", ret, strerror(-ret));
				goto err;
			} else if (ret == 0) {
				ret = -1;
				goto err;
			} else if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
								BINFMT_MISC_HOME, s_dev) < 0) {
				ret = -1;
				goto err;
			}
		}
	}
 #endif

	ret = resolve_external_mounts(mntinfo);
	if (ret)
		goto err;

	if (arg.need_to_validate) {
		ret = -1;

		if (resolve_shared_mounts(mntinfo, arg.root_master_id))
			goto err;
		if (validate_mounts(mntinfo, true))
			goto err;
	}

	ret = 0;
err:
	return ret;
}

挂载信息的收集主要在于collect_mntns函数,该函数通过一系列挂载信息解析函数的调用从/proc/mount_info中获得解析数据并填充到挂载信息结构体,再将填充好的结构体追加到挂载信息全局链表mntinfo

static int collect_mntns(struct ns_id *ns, void *__arg)
{
	struct collect_mntns_arg *arg = __arg;
	struct mount_info *pms;

	pms = collect_mntinfo(ns, arg->for_dump);//收集挂载信息
	if (!pms)
		return -1;

	if (arg->for_dump && ns->type != NS_CRIU)
		arg->need_to_validate = true;

	mntinfo_add_list(pms);//添加到全局挂载信息链表结构体mntinfo中去

	if (arg->need_to_validate && ns->id == root_item->ids->mnt_ns_id)
		arg->root_master_id = ns->mnt.mntinfo_tree->master_id;

	return 0;
}

struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
{
	struct mount_info *pm;

	pm = parse_mountinfo(ns->ns_pid, ns, for_dump);//解析挂载信息到结构体mount_info pm
	if (!pm) {
		pr_err("Can't parse %d's mountinfo\n", ns->ns_pid);
		return NULL;
	}

	ns->mnt.mntinfo_tree = mnt_build_tree(pm, NULL);
	if (ns->mnt.mntinfo_tree == NULL)
		goto err;

	ns->mnt.mntinfo_list = pm;
	return pm;
err:
	free_mntinfo(pm);
	return NULL;
}
//解析挂载信息,将解析的挂载信息填充到挂载信息结构体mount_info中去并返回
struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump)
{
	struct mount_info *list = NULL;
	FILE *f;

	f = fopen_proc(pid, "mountinfo");//打开指定pid的挂载信息文件
	if (!f)
		return NULL;

	while (fgets(buf, BUF_SIZE, f)) {
		struct mount_info *new;
		int ret = -1;
		char *fsname = NULL;

		new = mnt_entry_alloc();//新建一个挂载信息结构体
		if (!new)
			goto end;

		new->nsid = nsid;指定该挂载信息结构体的nsidmnt_ns_desc

		ret = parse_mountinfo_ent(buf, new, &fsname);//根据读取到的挂载信息buf解析得到挂载信息结构体new
		if (ret < 0) {
			pr_err("Bad format in %d mountinfo: '%s'\n", pid, buf);
			goto end;
		}

		/*
		 * Drop this mountpoint early, so that lookup_mnt_id/etc will
		 * fail loudly at "dump" stage if an opened file or another mnt
		 * depends on this one.
		 */
		if (for_dump && should_skip_mount(new->mountpoint + 1)) {
			pr_info("\tskip %s @ %s\n", fsname,  new->mountpoint);
			mnt_entry_free(new);
			new = NULL;
			goto end;
		}

		pr_info("\ttype %s source %s mnt_id %d s_dev %#x %s @ %s flags %#x options %s\n",
				fsname, new->source,
				new->mnt_id, new->s_dev, new->root, new->mountpoint,
				new->flags, new->options);

		if (new->fstype->parse) {
			ret = new->fstype->parse(new);
			if (ret < 0) {
				pr_err("Failed to parse FS specific data on %s\n",
						new->mountpoint);
				mnt_entry_free(new);
				new = NULL;
				goto end;
			}

			if (ret > 0) {
				pr_info("\tskipping fs mounted at %s\n", new->mountpoint + 1);
				mnt_entry_free(new);
				new = NULL;
				ret = 0;
				goto end;

			}
		}
end:
		if (fsname)
			free(fsname);

		if (new) {
			new->next = list;
			list = new;
		}

		if (ret)
			goto err;
	}
out:
	fclose(f);
	return list;

err:
	while (list) {
		struct mount_info *next = list->next;
		mnt_entry_free(list);
		list = next;
	}
	goto out;
}
//解析读取到的mount_info信息str,将对应信息填充到mount_info结构体new中去
static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
{
	struct fd_link root_link;
	unsigned int kmaj, kmin;
	int ret, n;
	char *sub, *opt = NULL;

	new->mountpoint = xmalloc(PATH_MAX);//为挂载信息结构体的挂载点分配内存
	if (new->mountpoint == NULL)
		goto err;

	new->mountpoint[0] = '.';
	ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n",
			&new->mnt_id, &new->parent_mnt_id,
			&kmaj, &kmin, &new->root, new->mountpoint + 1,
			&opt, &n);//将在/proc/mountinfo下读取到的挂载信息buf(str)输入到new各个对象中去。
	if (ret != 7)
		goto err;

	cure_path(new->mountpoint);//修复被损坏的路径,用符号替换回8进制码
	cure_path(new->root);

	root_link.len = strlen(new->root);
	strcpy(root_link.name, new->root);
	if (strip_deleted(&root_link)) {
		strcpy(new->root, root_link.name);
		new->deleted = true;
	}

	new->mountpoint = xrealloc(new->mountpoint, strlen(new->mountpoint) + 1);
	if (!new->mountpoint)
		goto err;
	new->ns_mountpoint = new->mountpoint;
	new->is_ns_root = is_root(new->ns_mountpoint + 1);

	new->s_dev = new->s_dev_rt = MKKDEV(kmaj, kmin);
	new->flags = 0;
	if (parse_mnt_flags(opt, &new->flags))//解析mnt参数,填充mount_info->flags  new->flags
		goto err;

	free(opt); /* we are going to reallocate/reuse this buffer */
	opt = NULL;

	str += n;
	if (parse_mnt_opt(str, new, &n))//解析mnt选项,填充mount_info new

		goto err;

	str += n;
	ret = sscanf(str, "%ms %ms %ms", fsname, &new->source, &opt);
	if (ret == 2) {
		/* src may be empty */
		opt = new->source;
		new->source = xstrdup("");
		if (new->source == NULL)
			goto err;
	} else if (ret != 3)
		goto err;

	cure_path(new->source);

	new->fsname = xstrdup(*fsname);
	if (!new->fsname)
		goto err;

	/*
	 * The kernel reports "subtypes" sometimes and the valid
	 * type-vs-subtype delimiter is the dot symbol. We disregard
	 * any subtypes for the purpose of finding the fstype.
	 */
	sub = strchr(*fsname, '.');
	if (sub)
		*sub = 0;

	new->fstype = find_fstype_by_name(*fsname);

	new->options = xmalloc(strlen(opt) + 1);
	if (!new->options)
		goto err;

	if (parse_sb_opt(opt, &new->sb_flags, new->options))
		goto err;

	ret = 0;
ret:
	xfree(opt);
	return ret;
err:
	ret = -1;
	goto ret;
}

第三个步骤:网络命名空间的信息收集

int collect_net_namespaces(bool for_dump)
{
	return walk_namespaces(&net_ns_desc, collect_net_ns,
			(void *)(for_dump ? 1UL : 0));//匹配到指定的net_ns_desc命名空间后调用collect_net_ns来收集网络命名空间信息。
}

static int collect_net_ns(struct ns_id *ns, void *oarg)
{
	bool for_dump = (oarg == (void *)1);
	int ret;

	pr_info("Collecting netns %d/%d\n", ns->id, ns->ns_pid);
	ret = prep_ns_sockets(ns, for_dump);
	if (ret)
		return ret;

	if (!for_dump)
		return 0;

	return collect_sockets(ns);
}

switch_ns会进一步调用setns(),setns()系统调用非常缓慢耗时,为了防止多次调用switch_ns,我们需要提前创建好其它命名空间的sockets.
static int prep_ns_sockets(struct ns_id *ns, bool for_dump)
{
	int nsret = -1, ret;

	if (ns->type != NS_CRIU) {
		pr_info("Switching to %d's net for collecting sockets\n", ns->ns_pid);
		if (switch_ns(ns->ns_pid, &net_ns_desc, &nsret))//切换命名空间
			return -1;
	}

	if (for_dump) {
		ret = ns->net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
		//int socket(int domain, int type, int protocol);domain用于设置网络通信的域,函数socket()根据这个参数选择通信协议的族,type用于设置套接字通信的类型,protocol用于制定某个协议的特定类型。SOCK_RAW类型,提供原始网络协议访问,SOCK_SEQPACKET序列化包,提供一个序列化的、可靠的、双向的基本连接的数据传输通道,数据长度定常。每次调用读系统调用时数据需要将全部数据读出。详细内容解读参考:http://blog.csdn.net/xc_tsao/article/details/44123331
		if (ret < 0) {
			pr_perror("Can't create sock diag socket");
			goto err_nl;
		}
	} else
		ns->net.nlsk = -1;

	ret = ns->net.seqsk = socket(PF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK, 0);
	if (ret < 0) {
		pr_perror("Can't create seqsk for parasite");
		goto err_sq;
	}

	ret = 0;
out:
	if (nsret >= 0 && restore_ns(nsret, &net_ns_desc) < 0) {
		nsret = -1;
		if (ret == 0)
			goto err_ret;
	}

	return ret;

err_ret:
	close(ns->net.seqsk);
err_sq:
	if (ns->net.nlsk >= 0)
		close(ns->net.nlsk);
err_nl:
	goto out;
}

//收集sockets信息,通过设置消息请求,调用do_collect_req将设置好的消息发送给指定套接字nl,接受套接字消息并成功回调指定函数,分别收集UNIX sockets,IPv4 TCP sockets,IPv4 UDP sockets,IPv4 UDP-lite sockets,IPv6 TCP sockets,IPv6 UDP sockets,IPv6 UDP-lite sockets等网络信息

int collect_sockets(struct ns_id *ns)
{
	int err = 0, tmp;
	int nl = ns->net.nlsk;
	struct sock_diag_req req;

	memset(&req, 0, sizeof(req));//用于初始化内存,将内存地址&req后sizeof(req)大小的内容全部替换成0,相当于对req的初始化操作
	req.hdr.nlmsg_len	= sizeof(req);
	req.hdr.nlmsg_type	= SOCK_DIAG_BY_FAMILY;
	req.hdr.nlmsg_flags	= NLM_F_DUMP | NLM_F_REQUEST;
	req.hdr.nlmsg_seq	= CR_NLMSG_SEQ;

	/* Collect UNIX sockets */
	//用于收集Unix Socker信息
	req.r.u.sdiag_family	= AF_UNIX;
	req.r.u.udiag_states	= -1; /* All */
	req.r.u.udiag_show	= UDIAG_SHOW_NAME | UDIAG_SHOW_VFS |
				  UDIAG_SHOW_PEER | UDIAG_SHOW_ICONS |
				  UDIAG_SHOW_RQLEN;
	tmp = do_collect_req(nl, &req, sizeof(req), unix_receive_one, NULL);
	if (tmp)
		err = tmp;

	/* Collect IPv4 TCP sockets */
	//用于收集IPv4 TCP信息
	req.r.i.sdiag_family	= AF_INET;
	req.r.i.sdiag_protocol	= IPPROTO_TCP;
	req.r.i.idiag_ext	= 0;
	/* Only listening and established sockets supported yet */
	req.r.i.idiag_states	= (1 << TCP_LISTEN) | (1 << TCP_ESTABLISHED) |
					(1 << TCP_FIN_WAIT1) | (1 << TCP_FIN_WAIT2) |
					(1 << TCP_CLOSE_WAIT) | (1 << TCP_LAST_ACK) |
					(1 << TCP_CLOSING) | (1 << TCP_SYN_SENT);
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	/* Collect IPv4 UDP sockets */
	//用于收集IPv4 UDP sockets信息
	req.r.i.sdiag_family	= AF_INET;
	req.r.i.sdiag_protocol	= IPPROTO_UDP;
	req.r.i.idiag_ext	= 0;
	req.r.i.idiag_states	= -1; /* All */
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	/* Collect IPv4 UDP-lite sockets */
	//用于收集IPv4 UDP-lite sockets信息
	req.r.i.sdiag_family	= AF_INET;
	req.r.i.sdiag_protocol	= IPPROTO_UDPLITE;
	req.r.i.idiag_ext	= 0;
	req.r.i.idiag_states	= -1; /* All */
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	/* Collect IPv6 TCP sockets */
	//用于收集Collect IPv6 TCP sockets信息
	req.r.i.sdiag_family	= AF_INET6;
	req.r.i.sdiag_protocol	= IPPROTO_TCP;
	req.r.i.idiag_ext	= 0;
	/* Only listening sockets supported yet */
	req.r.i.idiag_states	= (1 << TCP_LISTEN) | (1 << TCP_ESTABLISHED) |
					(1 << TCP_FIN_WAIT1) | (1 << TCP_FIN_WAIT2) |
					(1 << TCP_CLOSE_WAIT) | (1 << TCP_LAST_ACK) |
					(1 << TCP_CLOSING) | (1 << TCP_SYN_SENT);
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	/* Collect IPv6 UDP sockets */
	//用于收集IPv6 UDP sockets信息
	req.r.i.sdiag_family	= AF_INET6;
	req.r.i.sdiag_protocol	= IPPROTO_UDP;
	req.r.i.idiag_ext	= 0;
	req.r.i.idiag_states	= -1; /* All */
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	/* Collect IPv6 UDP-lite sockets */
	//用于收集IPv6 UDP-lite sockets信息
	req.r.i.sdiag_family	= AF_INET6;
	req.r.i.sdiag_protocol	= IPPROTO_UDPLITE;
	req.r.i.idiag_ext	= 0;
	req.r.i.idiag_states	= -1; /* All */
	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
	if (tmp)
		err = tmp;

	req.r.p.sdiag_family	= AF_PACKET;
	req.r.p.sdiag_protocol	= 0;
	req.r.p.pdiag_show	= PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
					PACKET_SHOW_FANOUT | PACKET_SHOW_RING_CFG;
	tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
	if (tmp) {
		pr_warn("The current kernel doesn't support packet_diag\n");
		if (ns->ns_pid == 0 || tmp != -ENOENT) /* Fedora 19 */
			err = tmp;
	}

	req.r.n.sdiag_family	= AF_NETLINK;
	req.r.n.sdiag_protocol	= NDIAG_PROTO_ALL;
	req.r.n.ndiag_show	= NDIAG_SHOW_GROUPS;
	tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
	if (tmp) {
		pr_warn("The current kernel doesn't support netlink_diag\n");
		if (ns->ns_pid == 0 || tmp != -ENOENT) /* Fedora 19 */
			err = tmp;
	}

	/* don't need anymore */
	close(nl);//关闭套接字
	ns->net.nlsk = -1;

	if (err && (ns->type == NS_CRIU)) {
		/*
		 * If netns isn't dumped, criu will fail only
		 * if an unsupported socket will be really dumped.
		 */
		pr_info("Uncollected sockets! Will probably fail later.\n");
		err = 0;
	}

	return err;
}


static int do_collect_req(int nl, struct sock_diag_req *req, int size,
		int (*receive_callback)(struct nlmsghdr *h, void *), void *arg)
{
	int tmp;

	tmp = do_rtnl_req(nl, req, size, receive_callback, NULL, arg);

	if (tmp == 0)
		set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);//请求发送并响应回调成功,设置收集记录字节对应bit。

	return tmp;
}

//该函数向指定的套接字端口发送请求数据msg,发送后监听该套接字返回的消息,根据返回的消息回调指定的函数,成功则回调receive_callback,失败回调error_callback
int do_rtnl_req(int nl, void *req, int size,
		int (*receive_callback)(struct nlmsghdr *h, void *),
		int (*error_callback)(int err, void *), void *arg)
{
	struct msghdr msg;
	struct sockaddr_nl nladdr;
	struct iovec iov;//io容器
	static char buf[16384];
	int err;

	if (!error_callback)
		error_callback = rtnl_return_err;

	memset(&msg, 0, sizeof(msg));//初始化msg
	msg.msg_name	= &nladdr;
	msg.msg_namelen	= sizeof(nladdr);
	msg.msg_iov	= &iov;
	msg.msg_iovlen	= 1;

	memset(&nladdr, 0, sizeof(nladdr));//初始化nladdr
	nladdr.nl_family = AF_NETLINK;

	iov.iov_base	= req;
	iov.iov_len	= size;

	if (sendmsg(nl, &msg, 0) < 0) {
	//向套接字nl发送消息请求msg
	/*int sendmsg(int s, const struct msghdr *msg, unsigned int           
	 *flags);函数参数描述如下:
     *要在其上发送消息的套接口s
     *信息头结构指针msg,这会控制函数调用的功能
     *可选的标记位参数flags。这与send或是sendto函数调用的标记参数相同。
     *函数的返回值为实际发送的字节数。否则,返回-1表明发生了错误,而errno表明错      
     *误原因。
     */
		err = -errno;
		pr_perror("Can't send request message");
		goto err;
	}

	iov.iov_base	= buf;
	iov.iov_len	= sizeof(buf);

	while (1) {

		memset(&msg, 0, sizeof(msg));
		msg.msg_name	= &nladdr;
		msg.msg_namelen	= sizeof(nladdr);
		msg.msg_iov	= &iov;
		msg.msg_iovlen	= 1;

		err = recvmsg(nl, &msg, 0);//该循环就是为了等待套接字nl返回的消息
		/*
		*int recvmsg(int s, struct msghdr *msg, unsigned int flags);
        *函数参数如下:
        *要在其上接收信息的套接口s
        *信息头结构指针msg,这会控制函数调用的操作。
        *可选标记位参数flags。这与recv或是recvfrom函数调用的标记参数相同。
        *这个函数的返回值为实际接收的字节数。否则,返回-1表明发生了错误,而errno表明错误原因。
        */
		if (err < 0) {
			if (errno == EINTR)
				continue;
			else {
				err = -errno;
				pr_perror("Error receiving nl report");
				goto err;
			}
		}
		if (err == 0)
			break;

		if (msg.msg_flags & MSG_TRUNC) {
			pr_err("Message truncated\n");
			err = -EMSGSIZE;
			goto err;
		}

		err = nlmsg_receive(buf, err, receive_callback, error_callback, arg);//解析nl接收到的消息buf,根据返回的消息来进行回调,若成功回调receive_callback,否则回调error_callback。
		if (err < 0)
			goto err;
		if (err == 0)
			break;
	}

	return 0;

err:
	return err;
}