linux readdir,d_name implementation problem

Problem description

In the struct dirent returned by the operation readdir or scandir, d_name will cause an error, and the subsequent file name output will become very strange. The complete file name cannot be output, or other errors. Examples of error codes are as follows

#define _DEFAULT_SOURCE
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int
main(void)
{
	printf("size:%d inot_t:%d offt:%d _POSIX_VERSION:%d namemax:%d \n", 
		sizeof(struct dirent),sizeof(ino_t), sizeof(off_t),_POSIX_VERSION,NAME_MAX);
		
	struct dirent **namelist;
	int n;

	n = scandir(".", &namelist, NULL, alphasort);
	if (n == -1) {
	perror("scandir");
	exit(EXIT_FAILURE);
	}

	while (n--) {
	namelist[n]->d_name[100] = 0; /* here is wrong*/
	printf("%s %d\n", namelist[n]->d_name, namelist[n]->d_reclen);
	free(namelist[n]);
	}
	free(namelist);

	exit(EXIT_SUCCESS);
}

Main function declaration

#include <sys/types.h>
#include <dirent.h>

DIR *opendir(const char *name);
DIR *fdopendir(int fd);
struct dirent *readdir(DIR *dirp);
int closedir(DIR *dirp);
int scandir(const char *dirp, struct dirent ***namelist,
              int (*filter)(const struct dirent *),
              int (*compar)(const struct dirent **, const struct dirent **));

Main structure

typedef struct __dirstream DIR;
struct __dirstream
  {
    int fd;			/* File descriptor.  */

    __libc_lock_define (, lock) /* Mutex lock for this structure.  */

    size_t allocation;		/* Space allocated for the block.  */
    size_t size;		/* Total valid data in the block.  */
    size_t offset;		/* Current offset into the block.  */

    off_t filepos;		/* Position of next entry to read.  */

    int errcode;		/* Delayed error code.  */

    /* Directory block.  We must make sure that this block starts
       at an address that is aligned adequately enough to store
       dirent entries.  Using the alignment of "void *" is not
       sufficient because dirents on 32-bit platforms can require
       64-bit alignment.  We use "long double" here to be consistent
       with what malloc uses.  */
    char data[0] __attribute__ ((aligned (__alignof__ (long double))));
  };

struct dirent is not directly in / usr / include / dirent H, but declared in / usr / include / bits / dirty H medium

  struct dirent {
               ino_t          d_ino;       /* Inode number */
               off_t          d_off;       /* Not an offset; see below */
               unsigned short d_reclen;    /* Length of this record */
               unsigned char  d_type;      /* Type of file; not supported
                                              by all filesystem types */
               char           d_name[256]; /* Null-terminated filename */
           };

Here we should focus on two members d_reclen and d_name.

d_name

According to the manual description, it is obtained from the header file in glibc, which is a fixed length array and ends with NULL, but is this really the case?
In the following warning, it is explained that D is used in POSIX_ The definition of name is a flexible array with no specified length. The maximum length is NAME_MAX. This value cannot be used as an lvalue. It is wrong to directly use sizeof for members. You need to use strlen to obtain the length. Here's the problem. Clearly, in the above structure definition, this is a fixed length array. Why is it prompted later that it can't be used like this (don't consider the problem of \ 0 obtaining the length first, here's the root cause is that there will be errors in accessing)?

   The d_name field
       The dirent structure definition shown above is taken from the glibc headers, and shows the d_name  field  with  a  fixed
       size.

       Warning: applications should avoid any dependence on the size of the d_name field.  POSIX defines it as char d_name[], a
       character array of unspecified size, with at most NAME_MAX characters preceding the terminating null byte ('\0').

       POSIX.1 explicitly notes that this field should not be used as an lvalue.  The standard  also  notes  that  the  use  of
       sizeof(d_name)  is  incorrect;  use strlen(d_name) instead.  (On some systems, this field is defined as char d_name[1]!)
       By implication, the use sizeof(struct dirent) to capture the size of the record including the size of d_name is also in‐
       correct.

       Note that while the call

           fpathconf(fd, _PC_NAME_MAX)

       returns  the  value 255 for most filesystems, on some filesystems (e.g., CIFS, Windows SMB servers), the null-terminated
       filename that is (correctly) returned in d_name can actually exceed this size.  In such cases, the d_reclen  field  will
       contain a value that exceeds the size of the glibc dirent structure shown above.

Here is the channel code test.

#define _DEFAULT_SOURCE
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int
main(void)
{
	printf("size:%d inot_t:%d offt:%d _POSIX_VERSION:%d namemax:%d \n", 
		sizeof(struct dirent),sizeof(ino_t), sizeof(off_t),_POSIX_VERSION,NAME_MAX);
}

Output results

size:280 inot_t:8 offt:8 _POSIX_VERSION:200809 namemax:255 

I'm confused here. If d_name is a flexible array, so it will not be included in the size calculation of the overall structure. Here, it is obviously 256 unfixed length, and the overall structure is aligned with 8 bytes.

glibc

Query glibc source code, the main call process is as follows.
opendir
__ open_nocancel / * open the folder and get the handle/
opendir_tail
__ fxstat64 / statistics, which is similar to executing stat in the shell to obtain some statistics*/
__ alloc_dir / * assign DIR and return*/

opendir

/* Copyright (C) 1991-2020 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <dirent.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>	/* For BUFSIZ.  */
#include <sys/param.h>	/* For MIN and MAX.  */

#include <not-cancel.h>

/* The st_blksize value of the directory is used as a hint for the
   size of the buffer which receives struct dirent values from the
   kernel.  st_blksize is limited to MAX_DIR_BUFFER_SIZE, in case the
   file system provides a bogus value.  */
#define MAX_DIR_BUFFER_SIZE 1048576U

enum {
  opendir_oflags = O_RDONLY|O_NDELAY|O_DIRECTORY|O_LARGEFILE|O_CLOEXEC
};

static bool
invalid_name (const char *name)
{
  if (__glibc_unlikely (name[0] == '\0'))
    {
      /* POSIX.1-1990 says an empty name gets ENOENT;
	 but `open' might like it fine.  */
      __set_errno (ENOENT);
      return true;
    }
  return false;
}

static DIR *
opendir_tail (int fd)
{
  if (__glibc_unlikely (fd < 0))
    return NULL;

  /* Now make sure this really is a directory and nothing changed since the
     `stat' call.  The S_ISDIR check is superfluous if O_DIRECTORY works,
     but it's cheap and we need the stat call for st_blksize anyway.  */
  struct stat64 statbuf;
  if (__glibc_unlikely (__fxstat64 (_STAT_VER, fd, &statbuf) < 0))
    goto lose;
  if (__glibc_unlikely (! S_ISDIR (statbuf.st_mode)))
    {
      __set_errno (ENOTDIR);
    lose:
      __close_nocancel_nostatus (fd);
      return NULL;
    }

  return __alloc_dir (fd, true, 0, &statbuf);
}


#if IS_IN (libc)
DIR *
__opendirat (int dfd, const char *name)
{
  if (__glibc_unlikely (invalid_name (name)))
    return NULL;

  return opendir_tail (__openat_nocancel (dfd, name, opendir_oflags));
}
#endif


/* Open a directory stream on NAME.  */
DIR *
__opendir (const char *name)
{
  if (__glibc_unlikely (invalid_name (name)))
    return NULL;

  return opendir_tail (__open_nocancel (name, opendir_oflags));
}
weak_alias (__opendir, opendir)

DIR *
__alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp)
{
  /* We have to set the close-on-exit flag if the user provided the
     file descriptor.  */
  if (!close_fd
      && __glibc_unlikely (__fcntl64_nocancel (fd, F_SETFD, FD_CLOEXEC) < 0))
	goto lose;

  const size_t default_allocation = (4 * BUFSIZ < sizeof (struct dirent64)
				     ? sizeof (struct dirent64) : 4 * BUFSIZ);
  const size_t small_allocation = (BUFSIZ < sizeof (struct dirent64)
				   ? sizeof (struct dirent64) : BUFSIZ);
  size_t allocation = default_allocation;
#ifdef _STATBUF_ST_BLKSIZE
  /* Increase allocation if requested, but not if the value appears to
     be bogus.  */
  if (statp != NULL)
    allocation = MIN (MAX ((size_t) statp->st_blksize, default_allocation),
		      MAX_DIR_BUFFER_SIZE);
#endif

  DIR *dirp = (DIR *) malloc (sizeof (DIR) + allocation);
  if (dirp == NULL)
    {
      allocation = small_allocation;
      dirp = (DIR *) malloc (sizeof (DIR) + allocation);

      if (dirp == NULL)
      lose:
	{
	  if (close_fd)
	    {
	      int save_errno = errno;
	      __close_nocancel_nostatus (fd);
	      __set_errno (save_errno);
	    }
	  return NULL;
	}
    }

  dirp->fd = fd;
#if IS_IN (libc)
  __libc_lock_init (dirp->lock);
#endif
  dirp->allocation = allocation;
  dirp->size = 0;
  dirp->offset = 0;
  dirp->filepos = 0;
  dirp->errcode = 0;

  return dirp;
}

Let's focus here__ alloc_dir

__alloc_dir

typedef struct __dirstream DIR; From the declaration point of view, it is a structure containing a flexible array. At the entrance of the function, first calculate the size to be allocated
BUFSIZ is defined as 8K. Here is default at the beginning_ 32K size at the time of allocation. Then recalculate statp - > st according to the previous stat information_ This field gives the "preferred" block size for effective file system I / O
Is the preferred effective operation size of a file system. It is preliminarily estimated that the page size of linux is determined by the relationship (PAGE_SIZE), because the minimum unit of memory management in linux is page. It will be more efficient to access on the 4K alignment boundary. The final size is the same as max_ DIR_ BUFFER_ The smallest size is taken, that is, the limit of storage in a directory is actually here, otherwise opendir will not be accessible.

	#define MAX_DIR_BUFFER_SIZE 1048576U
	
  const size_t default_allocation = (4 * BUFSIZ < sizeof (struct dirent64)
				     ? sizeof (struct dirent64) : 4 * BUFSIZ);
  const size_t small_allocation = (BUFSIZ < sizeof (struct dirent64)
				   ? sizeof (struct dirent64) : BUFSIZ);
  size_t allocation = default_allocation;
#ifdef _STATBUF_ST_BLKSIZE
  /* Increase allocation if requested, but not if the value appears to
     be bogus.  */
  if (statp != NULL)
    allocation = MIN (MAX ((size_t) statp->st_blksize, default_allocation),
		      MAX_DIR_BUFFER_SIZE);
#endif

After that, the buffer size is allocated according to this size, and then initialized, mainly assigning fd and buffer size.

DIR *dirp = (DIR *) malloc (sizeof (DIR) + allocation);
  dirp->fd = fd;
#if IS_IN (libc)
  __libc_lock_init (dirp->lock);
#endif
  dirp->allocation = allocation;
  dirp->size = 0;
  dirp->offset = 0;
  dirp->filepos = 0;
  dirp->errcode = 0;

The whole opendir process is to open the folder, get the handle, calculate the buffer size, allocate DIR, and then return.

readdir

The functions are defined in sysdeps / POSIX / readdir In C, the overall process is relatively simple, that is, to continuously obtain new dirent according to the offset index_ TYPE *.

/* Read a directory entry from DIRP.  */
DIRENT_TYPE *
__READDIR (DIR *dirp)
{
  DIRENT_TYPE *dp;
  int saved_errno = errno;

#if IS_IN (libc)
  __libc_lock_lock (dirp->lock);
#endif

  do
    {
      size_t reclen;

      if (dirp->offset >= dirp->size)
	{
	  /* We've emptied out our buffer.  Refill it.  */

	  size_t maxread;
	  ssize_t bytes;

#ifndef _DIRENT_HAVE_D_RECLEN
	  /* Fixed-size struct; must read one at a time (see below).  */
	  maxread = sizeof *dp;
#else
	  maxread = dirp->allocation;
#endif

	  bytes = __GETDENTS (dirp->fd, dirp->data, maxread);
	  if (bytes <= 0)
	    {
	      /* On some systems getdents fails with ENOENT when the
		 open directory has been rmdir'd already.  POSIX.1
		 requires that we treat this condition like normal EOF.  */
	      if (bytes < 0 && errno == ENOENT)
		bytes = 0;

	      /* Don't modifiy errno when reaching EOF.  */
	      if (bytes == 0)
		__set_errno (saved_errno);
	      dp = NULL;
	      break;
	    }
	  dirp->size = (size_t) bytes;

	  /* Reset the offset into the buffer.  */
	  dirp->offset = 0;
	}

      dp = (DIRENT_TYPE *) &dirp->data[dirp->offset];

#ifdef _DIRENT_HAVE_D_RECLEN
      reclen = dp->d_reclen;
#else
      /* The only version of `struct dirent*' that lacks `d_reclen'
	 is fixed-size.  */
      assert (sizeof dp->d_name > 1);
      reclen = sizeof *dp;
      /* The name is not terminated if it is the largest possible size.
	 Clobber the following byte to ensure proper null termination.  We
	 read jst one entry at a time above so we know that byte will not
	 be used later.  */
      dp->d_name[sizeof dp->d_name] = '\0';
#endif

      dirp->offset += reclen;

#ifdef _DIRENT_HAVE_D_OFF
      dirp->filepos = dp->d_off;
#else
      dirp->filepos += reclen;
#endif

      /* Skip deleted files.  */
    } while (dp->d_ino == 0);

#if IS_IN (libc)
  __libc_lock_unlock (dirp->lock);
#endif

  return dp;
}

Here we mainly focus on two processes 1_ Getdents and 2 Offset calculation. Let's look at offset calculation first

Offset calculation

It is mainly processed through offset. When entering the function for the first time, offset is 0, and dirp - > offset > = dirp - > size must be satisfied. Both are 0, and the current system is 0**_ DIRENT_HAVE_D_RECLEN defines the corresponding of this macro, which is unsigned short d_reclen * * member, this is not POSIX 1. It is mandatory. Different systems implement it differently. Through bytes =__ GETDENTS (dirp->fd, dirp->data, maxread); To read something to the buffer, the specific reading content is not described first, and then the size is assigned to the read length information, and the offset is set to 0. When you come in later, you won't read it again.
dp = (DIRENT_TYPE *) &dirp->data[dirp->offset]; Directly convert the memory area of DIR to obtain a struct dirent, and then reclen = DP - > D_ reclen;
dirp->offset += reclen; It can be seen from here that all node information in the directory is stored in the data in the DIR. One by one, the struct dirent can be preliminarily analyzed. It can be found that the struct dirent is not stored according to the fixed 280, otherwise it does not need to calculate d_reclen to offset, where d_reclen should be the actual size of a struct dirty.

 dp = (DIRENT_TYPE *) &dirp->data[dirp->offset];

#ifdef _DIRENT_HAVE_D_RECLEN
      reclen = dp->d_reclen;
#else
      /* The only version of `struct dirent*' that lacks `d_reclen'
	 is fixed-size.  */
      assert (sizeof dp->d_name > 1);
      reclen = sizeof *dp;
      /* The name is not terminated if it is the largest possible size.
	 Clobber the following byte to ensure proper null termination.  We
	 read jst one entry at a time above so we know that byte will not
	 be used later.  */
      dp->d_name[sizeof dp->d_name] = '\0';
#endif

      dirp->offset += reclen;

#ifdef _DIRENT_HAVE_D_OFF
      dirp->filepos = dp->d_off;
#else
      dirp->filepos += reclen;
#endif

__GETDENTS

The prototype here is (64 bit device)

ssize_t
__getdents64 (int fd, void *buf, size_t nbytes)

Here, it will be called to the kernel through the system, and the specific call process will not be analyzed.

SYSCALL_DEFINE3(getdents64, unsigned int, fd,
		struct linux_dirent64 __user *, dirent, unsigned int, count)
{
	struct fd f;
	struct linux_dirent64 __user * lastdirent;
	struct getdents_callback64 buf = {
		.ctx.actor = filldir64,
		.count = count,
		.current_dir = dirent
	};
	int error;

	if (!access_ok(VERIFY_WRITE, dirent, count))
		return -EFAULT;

	f = fdget_pos(fd);
	if (!f.file)
		return -EBADF;

	error = iterate_dir(f.file, &buf.ctx);
	if (error >= 0)
		error = buf.error;
	lastdirent = buf.previous;
	if (lastdirent) {
		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
		if (__put_user(d_off, &lastdirent->d_off))
			error = -EFAULT;
		else
			error = count - buf.count;
	}
	fdput_pos(f);
	return error;
}

Here is a brief analysis
1. Get struct file through fd
2.iterate_ dir(f.file, &buf.ctx); Content under iteration directory
The point is filldir64 here
The calculation here is obvious. In addition to the fixed size of the structure, the main variable length is namlen. The actual node name length, and 1 byte is reserved for storing \ 0.
Then the content is transferred to the user space, struct linux_dirent64 is an obvious flexible array. Except for the name field, the rest correspond to user space struct dirent one by one.
In the side explanation, posix described above defines the name of struct direct as a flexible array.
Here is struct linux_dirent64 is stored one by one. The length is fixed length + name length, and then aligned.

struct linux_dirent64 {
	u64		d_ino;
	s64		d_off;
	unsigned short	d_reclen;
	unsigned char	d_type;
	char		d_name[0];
};
static int filldir64(struct dir_context *ctx, const char *name, int namlen,
		     loff_t offset, u64 ino, unsigned int d_type)
{
	struct linux_dirent64 __user *dirent;
	struct getdents_callback64 *buf =
		container_of(ctx, struct getdents_callback64, ctx);
	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
		sizeof(u64));

	buf->error = -EINVAL;	/* only used if we fail.. */
	if (reclen > buf->count)
		return -EINVAL;
	dirent = buf->previous;
	if (dirent) {
		if (signal_pending(current))
			return -EINTR;
		if (__put_user(offset, &dirent->d_off))
			goto efault;
	}
	dirent = buf->current_dir;
	if (__put_user(ino, &dirent->d_ino))
		goto efault;
	if (__put_user(0, &dirent->d_off))
		goto efault;
	if (__put_user(reclen, &dirent->d_reclen))
		goto efault;
	if (__put_user(d_type, &dirent->d_type))
		goto efault;
	if (copy_to_user(dirent->d_name, name, namlen))
		goto efault;
	if (__put_user(0, dirent->d_name + namlen))
		goto efault;
	buf->previous = dirent;
	dirent = (void __user *)dirent + reclen;
	buf->current_dir = dirent;
	buf->count -= reclen;
	return 0;
efault:
	buf->error = -EFAULT;
	return -EFAULT;
}

conclusion

  1. First of all, the struct dirent seen in user space is indeed of fixed length
  2. d_reclen refers to the specific space occupied by this node information, fixed structure length + name length + alignment
  3. POSIX in man manual for D_ The description of name is mainly for use, not definition, d_name is an array, but it is forcibly mapped in a section of memory. If it is so long, it does not mean that it can be used.
typedef struct test_val{
	int a;
	int b;
	unsigned char array[256];
};
unsigned char buffer[128];

struct test_val *test = (struct test_val *)buffer;

Similarly, although the array definition is 256, the usable length is only 120 after the conversion here. When you access the array and access an index greater than 120, it is an exception.

Keywords: C Linux

Added by Blob on Thu, 30 Dec 2021 13:37:59 +0200