"内核OVERLAY--目录搜索之打开文件"

  "第一章"

Posted by Xu on January 3, 2018

内核OVERLAY–目录搜索之打开文件

我们执行strace ls merged可以查看该命令执行哪些系统调用:

strace_ls

这是几个关键的系统调用:

  • openat()
  • getdents()

这一章节主要介绍文件的打开过程: openfile

打开目录do_sys_open

openat()会进一步调用do_sys_open():

  • 首先检查并包装传递进来的标志位
  • 随后将用户空间的路径名复制到内核空间
  • 在顺利取得空闲的文件表述符的情况下调用 do_filp_open 完成对路径的搜寻和文件的打开
  • 使用 fsnotify 机制来唤醒文件系统中的监控进程
  • 如果一切顺利,就为这个文件描述符安装文件,然后大功告成并将文件描述符返回用户空间。
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
	struct open_flags op;
	int fd = build_open_flags(flags, mode, &op);//根据flags,mode构建open_flags对象
	struct filename *tmp;//文件名结构体,包含文件名各种相关信息

	if (fd)
		return fd;

	tmp = getname(filename);//根据文件名string返回文件名相关信息结构体struct
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);

	fd = get_unused_fd_flags(flags);//分配一个未使用的文件描述符
	if (fd >= 0) {
		struct file *f = do_filp_open(dfd, tmp, &op);//已指定的文件描述符及文件名结构体,打开标志打开文件,获取文件对象,见下详解
		if (IS_ERR(f)) {
			put_unused_fd(fd);
			fd = PTR_ERR(f);
		} else {
			fsnotify_open(f);// 
			fd_install(fd, f);
		}
	}
	putname(tmp);
	return fd;
}

do_filp_open:

利用缓存打开文件,返回打开的文件对象

struct file *do_filp_open(int dfd, struct filename *pathname,
		const struct open_flags *op)
{
	struct nameidata nd;//路径搜索的信息存放结构体
	int flags = op->lookup_flags;
	struct file *filp;

	set_nameidata(&nd, dfd, pathname);//根据相关信息设置nd,然后更新进程nameidata
	filp = path_openat(&nd, op, flags | LOOKUP_RCU);//get_empty_filp()获取空的file对象,path_init初始化nameidata,并设置nd的索引节点nd->inode,最后link_path_walk()执行标准路径名查找,见P498,并将查找结果添加到nd中去,该函数主要是根据路径分量循环寻找路径名中最后一个分量对应文件的父目录的dentry和mnt,并将结果填充到表示merged目录的文件对象缓存
	if (unlikely(filp == ERR_PTR(-ECHILD)))
		filp = path_openat(&nd, op, flags);
	if (unlikely(filp == ERR_PTR(-ESTALE)))
		filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
	restore_nameidata();
	return filp;
}

path_openat:

在这里 Kernel 向我们展示了“路径行走(path walk)”的两种策略:rcu-walk和 ref-walk。在 rcu-walk 期间将会禁止抢占,也决不能出现进程阻塞,所以其效率很高;ref-walk 会在 rcu-walk 失败、进程需要睡眠或者需要取得某结构的引用计数(reference count)的情况下切换进来,很明显它的效率大大低于 rcu-walk。最后 REVAL其实也是 ref-walk,在以后我们会看到,该模式是在已经完成了路径查找,打开具体文件时,如果该文件已经过期(stale)才启动的,所以 REVAL 是给具体文件系统自己去解释的。

path_openat 主要作用是首先为 struct file 申请内存空间,设置遍历路径的初始状态,然后遍历路径并找到最终目标的父节点,最后根据目标的类型和标志位完成 open 操作,最终返回一个新的 file 结构

static struct file *path_openat(struct nameidata *nd,
			const struct open_flags *op, unsigned flags)
{
	const char *s;
	struct file *file;
	int opened = 0;
	int error;

	file = get_empty_filp();//分配一个新的文件对象,调用kmem_cache_alloc()从filp高速缓存中获取一个空闲的文件对象
	if (IS_ERR(file))
		return file;

	file->f_flags = op->open_flag;

	if (unlikely(file->f_flags & __O_TMPFILE)) {//临时文件暂时不考虑
		error = do_tmpfile(nd, flags, op, file, &opened);
		goto out2;
	}

	s = path_init(nd, flags);//根据nd中的路径名称name来解析路径相关信息设置nd->root和nd->path,其中需要分析到路径是否以‘/’开头,若是则为绝对路径,否则为相对路径,并检查dfd参数,如果该值为AT_FDCWD则为当前工作目录,若不为AT_FDCWD,则会调用fget_light()函数来获得dfd所对应的file结构(struct file *),判断该文件对象是否为目录,是则成功,并最后检查权限
	if (IS_ERR(s)) {
		put_filp(file);
		return ERR_CAST(s);
	}
	//刚刚初始化的nd中path指向根目录‘/’,由此开始准备路径遍历
	while (!(error = link_path_walk(s, nd)) &&
		(error = do_last(nd, file, op, &opened)) > 0) {
		// total_link_count 是用来记录符号链接的深度,每穿越一次符号链接这个值就加一,最大允许 40 层符号链接。接下来 link_path_walk 会带领我们走向目标,并在到达最终目标所在目录的时候停下来(最终目标需要交给另一个函数 do_last 单独处理)
		//首先调用link_path_walk()来进行标准的路径名查找,解析路径得到路径最后一个分量的父目录项信息并存放到nd->path.dentry,该函数主要是根据路径分量循环寻找路径名中最后一个分量对应文件的父目录的dentry和mnt,并将结果填充到表示merged目录的文件对象缓存
		//当遍历到最后一个分量后,nd中的path指向最后一个路径分量,随后调用do_last()来处理路径最后一个分量:文件名的解析并执行打开文件的最实质性的工作
		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
		s = trailing_symlink(nd);
		if (IS_ERR(s)) {
			error = PTR_ERR(s);
			break;
		}
	}
	terminate_walk(nd);
out2:
	if (!(opened & FILE_OPENED)) {
		BUG_ON(!error);
		put_filp(file);
	}
	if (unlikely(error)) {
		if (error == -EOPENSTALE) {
			if (flags & LOOKUP_RCU)
				error = -ECHILD;
			else
				error = -ESTALE;
		}
		file = ERR_PTR(error);
	}
	return file;
}

这里要先介绍一个重要的结构体对象,作为路径查找的结果临时变量,存放路径查找过程中的相关信息

struct nameidata {
    struct path    path;//保存当前搜索到的路径;
    struct qstr    last;//保存当前子路径名及其散列值;
    struct path    root;//用来保存根目录的信息;
    struct inode    *inode; /* path.dentry.d_inode *///指向当前找到的目录项的 inode 结构;
    unsigned int    flags;//是一些和查找(lookup)相关的标志位;
    unsigned    seq, m_seq;//seq 是相关目录项的顺序锁序号;是相关文件系统(其实是 mount)的顺序锁序号;
    int        last_type;//表示当前节点类型;
    unsigned    depth;//用来记录在解析符号链接过程中的递归深度;
    char *saved_names[MAX_NESTED_LINKS + 1];//用来记录相应递归深度的符号链接的路径。
};

do_last:

处理路径最后一步分量

  • 首先判断最后一个分量的类型,一共有五种类型
  • 然后lookup_fast()使用rcu-walk模式__d_lookup_rcu查询缓存中的dentry
  • 若读取失败采用unlazy_walk()(ref-walk)查询目录项
  • 若还是失败则采用非rcu模式retry_lookup读取目录项
  • 查询结束后更新nd为最后一个分量查询结果
  • 并尝试调用vfs_open真正打开该路径所指示的文件
/*
 * Handle the last step of open()
 */
static int do_last(struct nameidata *nd,
		   struct file *file, const struct open_flags *op,
		   int *opened)
{
	struct dentry *dir = nd->path.dentry;//获取通过link_path_walk得到的路径最后一个分量的父目录目录项
	int open_flag = op->open_flag;//获取打开标志位
	bool will_truncate = (open_flag & O_TRUNC) != 0;//O_TRUNC判断是否需要将文件截断为0后写入
	bool got_write = false;
	int acc_mode = op->acc_mode;//访问模式
	unsigned seq;//顺序锁序号
	struct inode *inode;
	struct path save_parent = { .dentry = NULL, .mnt = NULL };
	struct path path;
	bool retried = false;
	int error;

	nd->flags &= ~LOOKUP_PARENT;
	nd->flags |= op->intent;

	if (nd->last_type != LAST_NORM) {//判断最后一个分量是否为正常字符串,否则需要处理.或..等相对路径关系
	/*last_type代表路径最后一个分量的类型,有五种类型:
	*LAST_NORM 就是普通的路径名
	*LAST_ROOT 是 “/”
	*LAST_DOT 和 LAST_DOTDOT 分别代表了 “.” 和 “..”
	*LAST_BIND 就是符号链接
	*/
		error = handle_dots(nd, nd->last_type);//处理.和..
		if (unlikely(error))
			return error;
		goto finish_open;
	}

	if (!(open_flag & O_CREAT)) {//检查O_CREAT标志位
		if (nd->last.name[nd->last.len])
			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
		/* we _can_ be in RCU mode here */
		error = lookup_fast(nd, &path, &inode, &seq);//查询dentry中的缓存,看一下是否命中。使用rcu模式进行查询,利用版本号防止数据读取错误,可以不加锁的读取目录项dentry信息,若读取失败则再采取非rcu模式,通过对父目录进行层层加锁来读取指定文件的目录项dentry
		if (likely(!error))
			goto finish_lookup;

		if (error < 0)
			return error;

		BUG_ON(nd->inode != dir->d_inode);
	} else {
		/* create side of things */
		/*
		 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
		 * has been cleared when we got to the last component we are
		 * about to look up
		 */
		error = complete_walk(nd);//告别rcu访问dentry的模式
		if (error)
			return error;

		audit_inode(nd->name, dir, LOOKUP_PARENT);//store the inode and device from a lookup
		/* trailing slashes? */
		if (unlikely(nd->last.name[nd->last.len]))
			return -EISDIR;
	}

retry_lookup:
     //这里采用非rcu模式来查询目录项dentry对象,可以使用各种有可能引起进程阻塞的锁来占有相应的资源了
	if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
		error = mnt_want_write(nd->path.mnt);
		if (!error)
			got_write = true;
		/*
		 * do _not_ fail yet - we might not need that or fail with
		 * a different error; let lookup_open() decide; we'll be
		 * dropping this one anyway.
		 */
	}
	mutex_lock(&dir->d_inode->i_mutex);
	error = lookup_open(nd, &path, file, op, got_write, opened);// lookup_open,说是新朋友其实是新瓶装旧酒,因为它和 lookup_slow 很像,都是先使用 lookup_dcache 在内存中找,如果不行就启动 lookup_real 在具体文件系统里面去找,当它成功返回时会将 path 指向找到的目标。
	mutex_unlock(&dir->d_inode->i_mutex);

	if (error <= 0) {
		if (error)
			goto out;

		if ((*opened & FILE_CREATED) ||
		    !S_ISREG(file_inode(file)->i_mode))
			will_truncate = false;

		audit_inode(nd->name, file->f_path.dentry, 0);
		goto opened;
	}

	if (*opened & FILE_CREATED) {
		/* Don't check for write permission, don't truncate */
		open_flag &= ~O_TRUNC;
		will_truncate = false;
		acc_mode = MAY_OPEN;
		path_to_nameidata(&path, nd);
		goto finish_open_created;
	}

	/*
	 * create/update audit record if it already exists.
	 */
	if (d_is_positive(path.dentry))
		audit_inode(nd->name, path.dentry, 0);

	/*
	 * If atomic_open() acquired write access it is dropped now due to
	 * possible mount and symlink following (this might be optimized away if
	 * necessary...)
	 */
	if (got_write) {
		mnt_drop_write(nd->path.mnt);
		got_write = false;
	}

	if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
		path_to_nameidata(&path, nd);
		return -EEXIST;
	}

	error = follow_managed(&path, nd);//检查当前目录是否为挂载点
	if (unlikely(error < 0))
		return error;

	BUG_ON(nd->flags & LOOKUP_RCU);
	seq = 0;	/* out of RCU mode, so the value doesn't matter */
	if (unlikely(d_is_negative(path.dentry))) {
		path_to_nameidata(&path, nd);//清除nd->mnt和nd->path.dentry 原有结构,设置与path一致
		return -ENOENT;
	}
	inode = d_backing_inode(path.dentry);
finish_lookup:
//结束查询
	if (nd->depth)
		put_link(nd);
	error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW,
				   inode, seq);
	if (unlikely(error))
		return error;

	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
		path_to_nameidata(&path, nd);
	} else {
		save_parent.dentry = nd->path.dentry;
		save_parent.mnt = mntget(path.mnt);
		nd->path.dentry = path.dentry;

	}
	nd->inode = inode;
	nd->seq = seq;
	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
finish_open:
//告别rcu模式
	error = complete_walk(nd);
	if (error) {
		path_put(&save_parent);
		return error;
	}
	audit_inode(nd->name, nd->path.dentry, 0);
	if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
		error = -ELOOP;
		goto out;
	}
	error = -EISDIR;
	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
		goto out;
	error = -ENOTDIR;
	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
		goto out;
	if (!d_is_reg(nd->path.dentry))
		will_truncate = false;

	if (will_truncate) {
		error = mnt_want_write(nd->path.mnt);
		if (error)
			goto out;
		got_write = true;
	}
finish_open_created:
	error = may_open(&nd->path, acc_mode, open_flag);//权限和标志位的检查
	if (error)
		goto out;

	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
	error = vfs_open(&nd->path, file, current_cred());//真正打开相关文件
	if (!error) {
		*opened |= FILE_OPENED;
	} else {
		if (error == -EOPENSTALE)
			goto stale_open;
		goto out;
	}
opened:
	error = open_check_o_direct(file);
	if (error)
		goto exit_fput;
	error = ima_file_check(file, op->acc_mode, *opened);
	if (error)
		goto exit_fput;

	if (will_truncate) {
		error = handle_truncate(file);
		if (error)
			goto exit_fput;
	}
out:
	if (unlikely(error > 0)) {
		WARN_ON(1);
		error = -EINVAL;
	}
	if (got_write)
		mnt_drop_write(nd->path.mnt);
	path_put(&save_parent);
	return error;

exit_fput:
	fput(file);
	goto out;

stale_open:
	/* If no saved parent or already retried then can't retry */
	if (!save_parent.dentry || retried)
		goto out;

	BUG_ON(save_parent.dentry != dir);
	path_put(&nd->path);
	nd->path = save_parent;
	nd->inode = dir->d_inode;
	save_parent.mnt = NULL;
	save_parent.dentry = NULL;
	if (got_write) {
		mnt_drop_write(nd->path.mnt);
		got_write = false;
	}
	retried = true;
	goto retry_lookup;
}

lookup_open():

在调用该函数之前,首先要调用lookup_fast(),进行快速查找,快速查找分为两种模式,rcu_walk和ref_walk模式,一个不用加锁,一个需要加锁访问,都会访问缓存查找,rcu模式如果都没查找到,调用unlazy_walk()切换到ref_walk模式,切换后调用__d_lookup再次进行ref模式内存查找,如果还没有查找到,则调用该函数lookup_open()进行磁盘查找:

search&build_ovl_entry

static int lookup_open(struct nameidata *nd, struct path *path,
			struct file *file,
			const struct open_flags *op,
			bool got_write, int *opened)
{
	struct dentry *dir = nd->path.dentry;//获取最后一个目录分量的父目录目录项
	struct inode *dir_inode = dir->d_inode;//父目录索引
	struct dentry *dentry;
	int error;
	bool need_lookup;

	*opened &= ~FILE_CREATED;
	dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);//首先在目录项缓存结构中查询,先调用d_lookup再缓存中查找,查找后并验证改目录项的合法性,如果在缓存中查不到,设置need_lookup需要进一步查找。
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	/* Cached positive dentry: will open in f_op->open */
	if (!need_lookup && dentry->d_inode)
		goto out_no_open;

	if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
		return atomic_open(nd, dentry, path, file, op, got_write,
				   need_lookup, opened);//原子打开操作
	}

	if (need_lookup) {
		BUG_ON(dentry->d_inode);

		dentry = lookup_real(dir_inode, dentry, nd->flags);//到具体的文件系统中去查找,也就是调用dentry->i_op->lookup操作,即调用ovl_lookup操作函数
		if (IS_ERR(dentry))
			return PTR_ERR(dentry);
	}

	/* Negative dentry, just create the file */
	//当目录项是"负状态"时,则重新创建该文件
	if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
		umode_t mode = op->mode;
		if (!IS_POSIXACL(dir->d_inode))
			mode &= ~current_umask();
		/*
		 * This write is needed to ensure that a
		 * rw->ro transition does not occur between
		 * the time when the file is created and when
		 * a permanent write count is taken through
		 * the 'struct file' in finish_open().
		 */
		if (!got_write) {
			error = -EROFS;
			goto out_dput;
		}
		*opened |= FILE_CREATED;
		error = security_path_mknod(&nd->path, dentry, mode, 0);
		if (error)
			goto out_dput;
		error = vfs_create(dir->d_inode, dentry, mode,
				   nd->flags & LOOKUP_EXCL);
		if (error)
			goto out_dput;
	}
out_no_open:
	path->dentry = dentry;
	path->mnt = nd->path.mnt;
	return 1;

out_dput:
	dput(dentry);
	return error;
}

lookup_real-》ovl_lookup:

到真实的文件系统中搜索目录项:

参数分别为父目录的索引节点、在lookup_dcache中分配内存的目录项结构,搜索标志

struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
			  unsigned int flags)
{
	struct ovl_entry *oe;
	struct ovl_entry *poe = dentry->d_parent->d_fsdata;//获取父目录项的层次信息
	struct path *stack = NULL;
	struct dentry *upperdir, *upperdentry = NULL;
	unsigned int ctr = 0;
	struct inode *inode = NULL;
	bool upperopaque = false;
	struct dentry *this, *prev = NULL;
	unsigned int i;
	int err;

	upperdir = ovl_upperdentry_dereference(poe);//获取父目录在upper层中的目录项
	if (upperdir) {
		this = ovl_lookup_real(upperdir, &dentry->d_name);//查询真实文件的目录项
		err = PTR_ERR(this);
		if (IS_ERR(this))
			goto out;

		if (this) {
			if (unlikely(ovl_dentry_remote(this))) {//验证目录项的状态
				dput(this);
				err = -EREMOTE;
				goto out;
			}
			if (ovl_is_whiteout(this)) {//验证该目录项是否为whiteout文件
				dput(this);
				this = NULL;
				upperopaque = true;//是,则设置该覆盖字段
			} else if (poe->numlower && ovl_is_opaquedir(this)) {
				upperopaque = true;
			}
		}
		upperdentry = prev = this;
	}

	if (!upperopaque && poe->numlower) {
		err = -ENOMEM;
		stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL);//分配底层栈内存空间
		if (!stack)
			goto out_put_upper;
	}

	for (i = 0; !upperopaque && i < poe->numlower; i++) {
		bool opaque = false;
		struct path lowerpath = poe->lowerstack[i];//遍历父目录的底层目录栈

		this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);//获取父目录底层目录下的真实文件的目录项
		err = PTR_ERR(this);
		if (IS_ERR(this)) {
			/*
			 * If it's positive, then treat ENAMETOOLONG as ENOENT.
			 */
			if (err == -ENAMETOOLONG && (upperdentry || ctr))
				continue;
			goto out_put;
		}
		if (!this)
			continue;
		if (ovl_is_whiteout(this)) {
			dput(this);
			break;
		}
		/*
		 * Only makes sense to check opaque dir if this is not the
		 * lowermost layer.
		 */
		if (i < poe->numlower - 1 && ovl_is_opaquedir(this))
			opaque = true;

		if (prev && (!S_ISDIR(prev->d_inode->i_mode) ||
			     !S_ISDIR(this->d_inode->i_mode))) {
			/*
			 * FIXME: check for upper-opaqueness maybe better done
			 * in remove code.
			 */
			if (prev == upperdentry)
				upperopaque = true;
			dput(this);
			break;
		}
		/*
		 * If this is a non-directory then stop here.
		 */
		if (!S_ISDIR(this->d_inode->i_mode))
			opaque = true;

		stack[ctr].dentry = this;//然后设置该搜索文件的在底层目录的目录项信息栈
		stack[ctr].mnt = lowerpath.mnt;//然后设置该搜索文件的在底层目录的挂载信息
		ctr++;
		prev = this;
		if (opaque)
			break;
	}

	oe = ovl_alloc_entry(ctr);//分配一个层次信息结构内存
	err = -ENOMEM;
	if (!oe)
		goto out_put;

	if (upperdentry || ctr) {
		struct dentry *realdentry;

		realdentry = upperdentry ? upperdentry : stack[0].dentry;//设置真实目录项结构为upper层中的目录项,或lower层中顶层的目录项(没有cow到upper层时)

		err = -ENOMEM;
		inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
				      oe);//为该文件创建索引节点
		if (!inode)
			goto out_free_oe;
		ovl_copyattr(realdentry->d_inode, inode);//将索引节点设置为真实目录项的索引节点
	}

	oe->opaque = upperopaque;//设置该文件在ovelray文件系统中的层次信息结构oe
	oe->__upperdentry = upperdentry;
	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);//将栈信息拷贝到层次信息中的底层目录栈中去
	kfree(stack);
	dentry->d_fsdata = oe;//添加该层次信息到该文件在overlay中的目录项结构
	d_add(dentry, inode);

	return NULL;

out_free_oe:
	kfree(oe);
out_put:
	for (i = 0; i < ctr; i++)
		dput(stack[i].dentry);
	kfree(stack);
out_put_upper:
	dput(upperdentry);
out:
	return ERR_PTR(err);
}

//参数是父目录在upper层或lower层中的目录项,文件名称相关信息。
static inline struct dentry *ovl_lookup_real(struct dentry *dir,
					     struct qstr *name)
{
	struct dentry *dentry;

	mutex_lock(&dir->d_inode->i_mutex);
	dentry = lookup_one_len(name->name, dir, name->len);//查询单个路径名分量的目录
	mutex_unlock(&dir->d_inode->i_mutex);

	if (IS_ERR(dentry)) {
		if (PTR_ERR(dentry) == -ENOENT)
			dentry = NULL;
	} else if (!dentry->d_inode) {
		dput(dentry);
		dentry = NULL;
	} else if (ovl_dentry_weird(dentry)) {
		dput(dentry);
		/* Don't support traversing automounts and other weirdness */
		dentry = ERR_PTR(-EREMOTE);
	}
	return dentry;
}


struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
	struct qstr this;
	unsigned int c;
	int err;

	WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));

	this.name = name;
	this.len = len;
	this.hash = full_name_hash(name, len);//对文件名进行hash计算
	if (!len)
		return ERR_PTR(-EACCES);

	if (unlikely(name[0] == '.')) {
		if (len < 2 || (len == 2 && name[1] == '.'))
			return ERR_PTR(-EACCES);
	}

	while (len--) {//检测文件名中不包含'/',只包含一个分量
		c = *(const unsigned char *)name++;
		if (c == '/' || c == '\0')
			return ERR_PTR(-EACCES);
	}
	/*
	 * See if the low-level filesystem might want
	 * to use its own hash..
	 */
	if (base->d_flags & DCACHE_OP_HASH) {
		int err = base->d_op->d_hash(base, &this);//进行在目录项缓存hash搜索
		if (err < 0)
			return ERR_PTR(err);
	}

	err = inode_permission(base->d_inode, MAY_EXEC);//验证索引节点权限
	if (err)
		return ERR_PTR(err);

	return __lookup_hash(&this, base, 0);//hash搜索
}


static struct dentry *__lookup_hash(struct qstr *name,
		struct dentry *base, unsigned int flags)
{
	bool need_lookup;
	struct dentry *dentry;

	dentry = lookup_dcache(name, base, flags, &need_lookup);//首先在缓存中查找目录项
	if (!need_lookup)
		return dentry;

	return lookup_real(base->d_inode, dentry, flags);//缓存中没有,则继续进入到真实的文件系统中进行查找,也就是ext4文件系统
}

vfs_open:

到最后文件打开的最实质性操作在vfs_open函数内实现: vfs_open()->do_dentry_open():这两部调用实际上就是调用文件对象中的open操作,而文件对象中的open操作函数file->f_op由文件的索引节点inode->i_fop赋予。而索引节点的创建由ovl_new_inode()来创建:

struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
			    struct ovl_entry *oe)
{
	struct inode *inode;

	inode = new_inode(sb);//在超级块中分配一个索引节点对象
	if (!inode)
		return NULL;

	inode->i_ino = get_next_ino();//分配索引号
	inode->i_mode = mode;
	inode->i_flags |= S_NOATIME | S_NOCMTIME;

	mode &= S_IFMT;
	switch (mode) {
	case S_IFDIR:
		inode->i_private = oe;
		inode->i_op = &ovl_dir_inode_operations;
		inode->i_fop = &ovl_dir_operations;//目录下一般文件的操作函数
		break;

	case S_IFLNK:
		inode->i_op = &ovl_symlink_inode_operations;
		break;

	case S_IFREG:
	case S_IFSOCK:
	case S_IFBLK:
	case S_IFCHR:
	case S_IFIFO:
		inode->i_op = &ovl_file_inode_operations;
		break;

	default:
		WARN(1, "illegal file type: %i\n", mode);
		iput(inode);
		inode = NULL;
	}

	return inode;
}

由ovl_new_inode函数可知,文件对象中的操作函数位于&ovl_dir_operations:

const struct file_operations ovl_dir_operations = {
	.read		= generic_read_dir,
	.open		= ovl_dir_open,//打开文件函数
	.iterate	= ovl_iterate,
	.llseek		= ovl_dir_llseek,
	.fsync		= ovl_dir_fsync,
	.release	= ovl_dir_release,
};

其中打开文件函数为ovl_dir_open():

static int ovl_dir_open(struct inode *inode, struct file *file)
{
	struct path realpath;
	struct file *realfile;
	struct ovl_dir_file *od;
	enum ovl_path_type type;

	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);//分配一个ovl_dir_file,保存该文件在overlay中特有的文件信息
	if (!od)
		return -ENOMEM;

	type = ovl_path_real(file->f_path.dentry, &realpath);//获取真实路径,根据路径的类型获取上层upper还是下层lower目录的路径
	realfile = ovl_path_open(&realpath, file->f_flags);//根据文件的真实路径获取真实路径文件系统下的文件对象结构,再依据文件对象中的文件打开操作函数打开文件。
	if (IS_ERR(realfile)) {
		kfree(od);
		return PTR_ERR(realfile);
	}
	od->realfile = realfile;
	od->is_real = !OVL_TYPE_MERGE(type);
	od->is_upper = OVL_TYPE_UPPER(type);
	file->private_data = od;//使用文件对象中的private_data字段保存该文件在overlay文件系统中所特有的文件信息结构ovl_dir_file od

	return 0;
}
//文件在overlay文件系统中所特有的文件信息结构ovl_dir_file
struct ovl_dir_file {
	bool is_real;
	bool is_upper;
	struct ovl_dir_cache *cache;
	struct list_head *cursor;
	struct file *realfile;
	struct file *upperfile;
};

总结

  • ls命令第一步通过路径查找,获取merged目录(OverlayFS中指定路径)的file文件对象。
  • 再根据文件对象结构中f_op操作函数来调用真实路径所在文件系统(ext4或其它)下的文件对象打开操作函数。
  • 最后得到文件在overlay文件系统特有的文件信息结构体ovl_dir_file并保存到overlay文件对象结构的private_data字段中。