Backported memfd_create() system call
This commit is contained in:
parent
3d1c55ae1e
commit
eee847d17d
@ -411,6 +411,7 @@
|
||||
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
|
||||
#define __NR_seccomp (__NR_SYSCALL_BASE+383)
|
||||
#define __NR_getrandom (__NR_SYSCALL_BASE+384)
|
||||
#define __NR_memfd_create (__NR_SYSCALL_BASE+385)
|
||||
|
||||
/*
|
||||
* This may need to be greater than __NR_last_syscall+1 in order to
|
||||
|
@ -394,6 +394,7 @@
|
||||
CALL(sys_ni_syscall) /* CALL(sys_renameat2) */
|
||||
CALL(sys_seccomp)
|
||||
CALL(sys_getrandom)
|
||||
/* 385 */ CALL(sys_memfd_create)
|
||||
#ifndef syscalls_counted
|
||||
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
|
||||
#define syscalls_counted
|
||||
|
@ -362,3 +362,4 @@
|
||||
353 i386 renameat2 sys_ni_syscall
|
||||
354 i386 seccomp sys_seccomp
|
||||
355 i386 getrandom sys_getrandom
|
||||
356 i386 memfd_create sys_memfd_create
|
||||
|
@ -325,6 +325,7 @@
|
||||
316 common renameat2 sys_ni_syscall
|
||||
317 common seccomp sys_seccomp
|
||||
318 common getrandom sys_getrandom
|
||||
319 common memfd_create sys_memfd_create
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
|
||||
#include <asm/poll.h>
|
||||
#include <asm/siginfo.h>
|
||||
@ -327,6 +328,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
|
||||
case F_GETPIPE_SZ:
|
||||
err = pipe_fcntl(filp, cmd, arg);
|
||||
break;
|
||||
case F_ADD_SEALS:
|
||||
case F_GET_SEALS:
|
||||
err = shmem_fcntl(filp, cmd, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef __SHMEM_FS_H
|
||||
#define __SHMEM_FS_H
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/pagemap.h>
|
||||
@ -11,6 +12,7 @@
|
||||
|
||||
struct shmem_inode_info {
|
||||
spinlock_t lock;
|
||||
unsigned int seals; /* shmem seals */
|
||||
unsigned long flags;
|
||||
unsigned long alloced; /* data pages alloced to file */
|
||||
union {
|
||||
@ -62,4 +64,19 @@ static inline struct page *shmem_read_mapping_page(
|
||||
mapping_gfp_mask(mapping));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TMPFS
|
||||
|
||||
extern int shmem_add_seals(struct file *file, unsigned int seals);
|
||||
extern int shmem_get_seals(struct file *file);
|
||||
extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
||||
#else
|
||||
|
||||
static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -782,6 +782,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
|
||||
asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
|
||||
asmlinkage long sys_eventfd(unsigned int count);
|
||||
asmlinkage long sys_eventfd2(unsigned int count, int flags);
|
||||
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
|
||||
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
|
||||
asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
|
||||
asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
|
||||
|
@ -704,9 +704,11 @@ __SYSCALL(__NR_renameat2, sys_renameat2)
|
||||
__SYSCALL(__NR_seccomp, sys_seccomp)
|
||||
#define __NR_getrandom 278
|
||||
__SYSCALL(__NR_getrandom, sys_getrandom)
|
||||
#define __NR_memfd_create 279
|
||||
__SYSCALL(__NR_memfd_create, sys_memfd_create)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 279
|
||||
#define __NR_syscalls 280
|
||||
|
||||
/*
|
||||
* All syscalls below here should go away really,
|
||||
|
@ -27,6 +27,21 @@
|
||||
#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
|
||||
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
|
||||
|
||||
/*
|
||||
* Set/Get seals
|
||||
*/
|
||||
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
||||
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
||||
|
||||
/*
|
||||
* Types of seals
|
||||
*/
|
||||
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||
#define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||
/* (1U << 31) is reserved for signed error codes */
|
||||
|
||||
/*
|
||||
* Types of directory notifications that may be requested.
|
||||
*/
|
||||
|
8
include/uapi/linux/memfd.h
Normal file
8
include/uapi/linux/memfd.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef _UAPI_LINUX_MEMFD_H
|
||||
#define _UAPI_LINUX_MEMFD_H
|
||||
|
||||
/* flags for memfd_create(2) (unsigned int) */
|
||||
#define MFD_CLOEXEC 0x0001U
|
||||
#define MFD_ALLOW_SEALING 0x0002U
|
||||
|
||||
#endif /* _UAPI_LINUX_MEMFD_H */
|
@ -193,6 +193,7 @@ cond_syscall(compat_sys_timerfd_settime);
|
||||
cond_syscall(compat_sys_timerfd_gettime);
|
||||
cond_syscall(sys_eventfd);
|
||||
cond_syscall(sys_eventfd2);
|
||||
cond_syscall(sys_memfd_create);
|
||||
|
||||
/* performance counters: */
|
||||
cond_syscall(sys_perf_event_open);
|
||||
|
204
mm/shmem.c
204
mm/shmem.c
@ -66,6 +66,9 @@ static struct vfsmount *shm_mnt;
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/fcntl.h>
|
||||
#include <uapi/linux/memfd.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@ -603,6 +606,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
|
||||
static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
int error;
|
||||
|
||||
error = inode_change_ok(inode, attr);
|
||||
@ -613,6 +617,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
loff_t oldsize = inode->i_size;
|
||||
loff_t newsize = attr->ia_size;
|
||||
|
||||
/* protected by i_mutex */
|
||||
if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
|
||||
(newsize > oldsize && (info->seals & F_SEAL_GROW)))
|
||||
return -EPERM;
|
||||
|
||||
if (newsize != oldsize) {
|
||||
i_size_write(inode, newsize);
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
@ -1448,6 +1457,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
|
||||
info = SHMEM_I(inode);
|
||||
memset(info, 0, (char *)inode - (char *)info);
|
||||
spin_lock_init(&info->lock);
|
||||
info->seals = F_SEAL_SEAL;
|
||||
info->flags = flags & VM_NORESERVE;
|
||||
INIT_LIST_HEAD(&info->swaplist);
|
||||
simple_xattrs_init(&info->xattrs);
|
||||
@ -1501,7 +1511,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
|
||||
struct page **pagep, void **fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
|
||||
|
||||
/* i_mutex is held by caller */
|
||||
if (unlikely(info->seals)) {
|
||||
if (info->seals & F_SEAL_WRITE)
|
||||
return -EPERM;
|
||||
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
|
||||
}
|
||||
|
||||
@ -1872,11 +1892,113 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
|
||||
return offset;
|
||||
}
|
||||
|
||||
static int shmem_wait_for_pins(struct address_space *mapping)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define F_ALL_SEALS (F_SEAL_SEAL | \
|
||||
F_SEAL_SHRINK | \
|
||||
F_SEAL_GROW | \
|
||||
F_SEAL_WRITE)
|
||||
|
||||
int shmem_add_seals(struct file *file, unsigned int seals)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
int error;
|
||||
|
||||
/*
|
||||
* SEALING
|
||||
* Sealing allows multiple parties to share a shmem-file but restrict
|
||||
* access to a specific subset of file operations. Seals can only be
|
||||
* added, but never removed. This way, mutually untrusted parties can
|
||||
* share common memory regions with a well-defined policy. A malicious
|
||||
* peer can thus never perform unwanted operations on a shared object.
|
||||
*
|
||||
* Seals are only supported on special shmem-files and always affect
|
||||
* the whole underlying inode. Once a seal is set, it may prevent some
|
||||
* kinds of access to the file. Currently, the following seals are
|
||||
* defined:
|
||||
* SEAL_SEAL: Prevent further seals from being set on this file
|
||||
* SEAL_SHRINK: Prevent the file from shrinking
|
||||
* SEAL_GROW: Prevent the file from growing
|
||||
* SEAL_WRITE: Prevent write access to the file
|
||||
*
|
||||
* As we don't require any trust relationship between two parties, we
|
||||
* must prevent seals from being removed. Therefore, sealing a file
|
||||
* only adds a given set of seals to the file, it never touches
|
||||
* existing seals. Furthermore, the "setting seals"-operation can be
|
||||
* sealed itself, which basically prevents any further seal from being
|
||||
* added.
|
||||
*
|
||||
* Semantics of sealing are only defined on volatile files. Only
|
||||
* anonymous shmem files support sealing. More importantly, seals are
|
||||
* never written to disk. Therefore, there's no plan to support it on
|
||||
* other file types.
|
||||
*/
|
||||
|
||||
if (file->f_op != &shmem_file_operations)
|
||||
return -EINVAL;
|
||||
if (!(file->f_mode & FMODE_WRITE))
|
||||
return -EPERM;
|
||||
if (seals & ~(unsigned int)F_ALL_SEALS)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
if (info->seals & F_SEAL_SEAL) {
|
||||
error = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
info->seals |= seals;
|
||||
error = 0;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(shmem_add_seals);
|
||||
|
||||
int shmem_get_seals(struct file *file)
|
||||
{
|
||||
if (file->f_op != &shmem_file_operations)
|
||||
return -EINVAL;
|
||||
|
||||
return SHMEM_I(file_inode(file))->seals;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(shmem_get_seals);
|
||||
|
||||
long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long error;
|
||||
|
||||
switch (cmd) {
|
||||
case F_ADD_SEALS:
|
||||
/* disallow upper 32bit */
|
||||
if (arg > UINT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
error = shmem_add_seals(file, arg);
|
||||
break;
|
||||
case F_GET_SEALS:
|
||||
error = shmem_get_seals(file);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t len)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
struct shmem_falloc shmem_falloc;
|
||||
pgoff_t start, index, end;
|
||||
int error;
|
||||
@ -1889,6 +2011,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
|
||||
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
|
||||
|
||||
/* protected by i_mutex */
|
||||
if (info->seals & F_SEAL_WRITE) {
|
||||
error = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
shmem_falloc.waitq = &shmem_falloc_waitq;
|
||||
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
|
||||
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
|
||||
@ -1915,6 +2043,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
|
||||
error = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
start = offset >> PAGE_CACHE_SHIFT;
|
||||
end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
/* Try to avoid a swapstorm if len is impossible to satisfy */
|
||||
@ -2631,6 +2764,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
|
||||
shmem_show_mpol(seq, sbinfo->mpol);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MFD_NAME_PREFIX "memfd:"
|
||||
#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
|
||||
#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
|
||||
|
||||
#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
|
||||
|
||||
SYSCALL_DEFINE2(memfd_create,
|
||||
const char __user *, uname,
|
||||
unsigned int, flags)
|
||||
{
|
||||
struct shmem_inode_info *info;
|
||||
struct file *file;
|
||||
int fd, error;
|
||||
char *name;
|
||||
long len;
|
||||
|
||||
if (flags & ~(unsigned int)MFD_ALL_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
/* length includes terminating zero */
|
||||
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
|
||||
if (len <= 0)
|
||||
return -EFAULT;
|
||||
if (len > MFD_NAME_MAX_LEN + 1)
|
||||
return -EINVAL;
|
||||
|
||||
name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
strcpy(name, MFD_NAME_PREFIX);
|
||||
if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
|
||||
error = -EFAULT;
|
||||
goto err_name;
|
||||
}
|
||||
|
||||
/* terminating-zero may have changed after strnlen_user() returned */
|
||||
if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
|
||||
error = -EFAULT;
|
||||
goto err_name;
|
||||
}
|
||||
|
||||
fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
|
||||
if (fd < 0) {
|
||||
error = fd;
|
||||
goto err_name;
|
||||
}
|
||||
|
||||
file = shmem_file_setup(name, 0, VM_NORESERVE);
|
||||
if (IS_ERR(file)) {
|
||||
error = PTR_ERR(file);
|
||||
goto err_fd;
|
||||
}
|
||||
info = SHMEM_I(file_inode(file));
|
||||
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
|
||||
file->f_flags |= O_RDWR | O_LARGEFILE;
|
||||
if (flags & MFD_ALLOW_SEALING)
|
||||
info->seals &= ~F_SEAL_SEAL;
|
||||
|
||||
fd_install(fd, file);
|
||||
kfree(name);
|
||||
return fd;
|
||||
|
||||
err_fd:
|
||||
put_unused_fd(fd);
|
||||
err_name:
|
||||
kfree(name);
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_TMPFS */
|
||||
|
||||
static void shmem_put_super(struct super_block *sb)
|
||||
|
Loading…
x
Reference in New Issue
Block a user