389 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			389 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
 | 
						|
 *
 | 
						|
 * Permission to use, copy, modify, and distribute this software for any
 | 
						|
 * purpose with or without fee is hereby granted, provided that the above
 | 
						|
 * copyright notice and this permission notice appear in all copies.
 | 
						|
 *
 | 
						|
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | 
						|
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
						|
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 | 
						|
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
						|
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 | 
						|
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 | 
						|
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
						|
 */
 | 
						|
/*
 | 
						|
 * Create a process without mappings by unmapping everything at once and
 | 
						|
 * holding it with ptrace(2). See what happens to
 | 
						|
 *
 | 
						|
 *	/proc/${pid}/maps
 | 
						|
 *	/proc/${pid}/numa_maps
 | 
						|
 *	/proc/${pid}/smaps
 | 
						|
 *	/proc/${pid}/smaps_rollup
 | 
						|
 */
 | 
						|
#undef NDEBUG
 | 
						|
#include <assert.h>
 | 
						|
#include <errno.h>
 | 
						|
#include <stdint.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <string.h>
 | 
						|
#include <fcntl.h>
 | 
						|
#include <sys/mman.h>
 | 
						|
#include <sys/ptrace.h>
 | 
						|
#include <sys/resource.h>
 | 
						|
#include <sys/types.h>
 | 
						|
#include <sys/wait.h>
 | 
						|
#include <unistd.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * 0: vsyscall VMA doesn't exist	vsyscall=none
 | 
						|
 * 1: vsyscall VMA is --xp		vsyscall=xonly
 | 
						|
 * 2: vsyscall VMA is r-xp		vsyscall=emulate
 | 
						|
 */
 | 
						|
static volatile int g_vsyscall;
 | 
						|
static const char *g_proc_pid_maps_vsyscall;
 | 
						|
static const char *g_proc_pid_smaps_vsyscall;
 | 
						|
 | 
						|
static const char proc_pid_maps_vsyscall_0[] = "";
 | 
						|
static const char proc_pid_maps_vsyscall_1[] =
 | 
						|
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
 | 
						|
static const char proc_pid_maps_vsyscall_2[] =
 | 
						|
"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
 | 
						|
 | 
						|
static const char proc_pid_smaps_vsyscall_0[] = "";
 | 
						|
 | 
						|
static const char proc_pid_smaps_vsyscall_1[] =
 | 
						|
"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
 | 
						|
"Size:                  4 kB\n"
 | 
						|
"KernelPageSize:        4 kB\n"
 | 
						|
"MMUPageSize:           4 kB\n"
 | 
						|
"Rss:                   0 kB\n"
 | 
						|
"Pss:                   0 kB\n"
 | 
						|
"Pss_Dirty:             0 kB\n"
 | 
						|
"Shared_Clean:          0 kB\n"
 | 
						|
"Shared_Dirty:          0 kB\n"
 | 
						|
"Private_Clean:         0 kB\n"
 | 
						|
"Private_Dirty:         0 kB\n"
 | 
						|
"Referenced:            0 kB\n"
 | 
						|
"Anonymous:             0 kB\n"
 | 
						|
"LazyFree:              0 kB\n"
 | 
						|
"AnonHugePages:         0 kB\n"
 | 
						|
"ShmemPmdMapped:        0 kB\n"
 | 
						|
"FilePmdMapped:         0 kB\n"
 | 
						|
"Shared_Hugetlb:        0 kB\n"
 | 
						|
"Private_Hugetlb:       0 kB\n"
 | 
						|
"Swap:                  0 kB\n"
 | 
						|
"SwapPss:               0 kB\n"
 | 
						|
"Locked:                0 kB\n"
 | 
						|
"THPeligible:    0\n"
 | 
						|
/*
 | 
						|
 * "ProtectionKey:" field is conditional. It is possible to check it as well,
 | 
						|
 * but I don't have such machine.
 | 
						|
 */
 | 
						|
;
 | 
						|
 | 
						|
static const char proc_pid_smaps_vsyscall_2[] =
 | 
						|
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
 | 
						|
"Size:                  4 kB\n"
 | 
						|
"KernelPageSize:        4 kB\n"
 | 
						|
"MMUPageSize:           4 kB\n"
 | 
						|
"Rss:                   0 kB\n"
 | 
						|
"Pss:                   0 kB\n"
 | 
						|
"Pss_Dirty:             0 kB\n"
 | 
						|
"Shared_Clean:          0 kB\n"
 | 
						|
"Shared_Dirty:          0 kB\n"
 | 
						|
"Private_Clean:         0 kB\n"
 | 
						|
"Private_Dirty:         0 kB\n"
 | 
						|
"Referenced:            0 kB\n"
 | 
						|
"Anonymous:             0 kB\n"
 | 
						|
"LazyFree:              0 kB\n"
 | 
						|
"AnonHugePages:         0 kB\n"
 | 
						|
"ShmemPmdMapped:        0 kB\n"
 | 
						|
"FilePmdMapped:         0 kB\n"
 | 
						|
"Shared_Hugetlb:        0 kB\n"
 | 
						|
"Private_Hugetlb:       0 kB\n"
 | 
						|
"Swap:                  0 kB\n"
 | 
						|
"SwapPss:               0 kB\n"
 | 
						|
"Locked:                0 kB\n"
 | 
						|
"THPeligible:    0\n"
 | 
						|
/*
 | 
						|
 * "ProtectionKey:" field is conditional. It is possible to check it as well,
 | 
						|
 * but I'm too tired.
 | 
						|
 */
 | 
						|
;
 | 
						|
 | 
						|
static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
 | 
						|
{
 | 
						|
	_exit(EXIT_FAILURE);
 | 
						|
}
 | 
						|
 | 
						|
static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
 | 
						|
{
 | 
						|
	_exit(g_vsyscall);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * vsyscall page can't be unmapped, probe it directly.
 | 
						|
 */
 | 
						|
static void vsyscall(void)
 | 
						|
{
 | 
						|
	pid_t pid;
 | 
						|
	int wstatus;
 | 
						|
 | 
						|
	pid = fork();
 | 
						|
	if (pid < 0) {
 | 
						|
		fprintf(stderr, "fork, errno %d\n", errno);
 | 
						|
		exit(1);
 | 
						|
	}
 | 
						|
	if (pid == 0) {
 | 
						|
		setrlimit(RLIMIT_CORE, &(struct rlimit){});
 | 
						|
 | 
						|
		/* Hide "segfault at ffffffffff600000" messages. */
 | 
						|
		struct sigaction act = {};
 | 
						|
		act.sa_flags = SA_SIGINFO;
 | 
						|
		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
 | 
						|
		sigaction(SIGSEGV, &act, NULL);
 | 
						|
 | 
						|
		g_vsyscall = 0;
 | 
						|
		/* gettimeofday(NULL, NULL); */
 | 
						|
		uint64_t rax = 0xffffffffff600000;
 | 
						|
		asm volatile (
 | 
						|
			"call *%[rax]"
 | 
						|
			: [rax] "+a" (rax)
 | 
						|
			: "D" (NULL), "S" (NULL)
 | 
						|
			: "rcx", "r11"
 | 
						|
		);
 | 
						|
 | 
						|
		g_vsyscall = 1;
 | 
						|
		*(volatile int *)0xffffffffff600000UL;
 | 
						|
 | 
						|
		g_vsyscall = 2;
 | 
						|
		exit(g_vsyscall);
 | 
						|
	}
 | 
						|
	waitpid(pid, &wstatus, 0);
 | 
						|
	if (WIFEXITED(wstatus)) {
 | 
						|
		g_vsyscall = WEXITSTATUS(wstatus);
 | 
						|
	} else {
 | 
						|
		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
 | 
						|
		exit(1);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static int test_proc_pid_maps(pid_t pid)
 | 
						|
{
 | 
						|
	char buf[4096];
 | 
						|
	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
 | 
						|
	int fd = open(buf, O_RDONLY);
 | 
						|
	if (fd == -1) {
 | 
						|
		perror("open /proc/${pid}/maps");
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else {
 | 
						|
		ssize_t rv = read(fd, buf, sizeof(buf));
 | 
						|
		close(fd);
 | 
						|
		if (g_vsyscall == 0) {
 | 
						|
			assert(rv == 0);
 | 
						|
		} else {
 | 
						|
			size_t len = strlen(g_proc_pid_maps_vsyscall);
 | 
						|
			assert(rv == len);
 | 
						|
			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
 | 
						|
		}
 | 
						|
		return EXIT_SUCCESS;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static int test_proc_pid_numa_maps(pid_t pid)
 | 
						|
{
 | 
						|
	char buf[4096];
 | 
						|
	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
 | 
						|
	int fd = open(buf, O_RDONLY);
 | 
						|
	if (fd == -1) {
 | 
						|
		if (errno == ENOENT) {
 | 
						|
			/*
 | 
						|
			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
 | 
						|
			 * it doesn't necessarily exist.
 | 
						|
			 */
 | 
						|
			return EXIT_SUCCESS;
 | 
						|
		}
 | 
						|
		perror("open /proc/${pid}/numa_maps");
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else {
 | 
						|
		ssize_t rv = read(fd, buf, sizeof(buf));
 | 
						|
		close(fd);
 | 
						|
		assert(rv == 0);
 | 
						|
		return EXIT_SUCCESS;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static int test_proc_pid_smaps(pid_t pid)
 | 
						|
{
 | 
						|
	char buf[4096];
 | 
						|
	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
 | 
						|
	int fd = open(buf, O_RDONLY);
 | 
						|
	if (fd == -1) {
 | 
						|
		if (errno == ENOENT) {
 | 
						|
			/*
 | 
						|
			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
 | 
						|
			 * it doesn't necessarily exist.
 | 
						|
			 */
 | 
						|
			return EXIT_SUCCESS;
 | 
						|
		}
 | 
						|
		perror("open /proc/${pid}/smaps");
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else {
 | 
						|
		ssize_t rv = read(fd, buf, sizeof(buf));
 | 
						|
		close(fd);
 | 
						|
		if (g_vsyscall == 0) {
 | 
						|
			assert(rv == 0);
 | 
						|
		} else {
 | 
						|
			size_t len = strlen(g_proc_pid_maps_vsyscall);
 | 
						|
			/* TODO "ProtectionKey:" */
 | 
						|
			assert(rv > len);
 | 
						|
			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
 | 
						|
		}
 | 
						|
		return EXIT_SUCCESS;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static const char g_smaps_rollup[] =
 | 
						|
"00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
 | 
						|
"Rss:                   0 kB\n"
 | 
						|
"Pss:                   0 kB\n"
 | 
						|
"Pss_Dirty:             0 kB\n"
 | 
						|
"Pss_Anon:              0 kB\n"
 | 
						|
"Pss_File:              0 kB\n"
 | 
						|
"Pss_Shmem:             0 kB\n"
 | 
						|
"Shared_Clean:          0 kB\n"
 | 
						|
"Shared_Dirty:          0 kB\n"
 | 
						|
"Private_Clean:         0 kB\n"
 | 
						|
"Private_Dirty:         0 kB\n"
 | 
						|
"Referenced:            0 kB\n"
 | 
						|
"Anonymous:             0 kB\n"
 | 
						|
"LazyFree:              0 kB\n"
 | 
						|
"AnonHugePages:         0 kB\n"
 | 
						|
"ShmemPmdMapped:        0 kB\n"
 | 
						|
"FilePmdMapped:         0 kB\n"
 | 
						|
"Shared_Hugetlb:        0 kB\n"
 | 
						|
"Private_Hugetlb:       0 kB\n"
 | 
						|
"Swap:                  0 kB\n"
 | 
						|
"SwapPss:               0 kB\n"
 | 
						|
"Locked:                0 kB\n"
 | 
						|
;
 | 
						|
 | 
						|
static int test_proc_pid_smaps_rollup(pid_t pid)
 | 
						|
{
 | 
						|
	char buf[4096];
 | 
						|
	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
 | 
						|
	int fd = open(buf, O_RDONLY);
 | 
						|
	if (fd == -1) {
 | 
						|
		if (errno == ENOENT) {
 | 
						|
			/*
 | 
						|
			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
 | 
						|
			 * it doesn't necessarily exist.
 | 
						|
			 */
 | 
						|
			return EXIT_SUCCESS;
 | 
						|
		}
 | 
						|
		perror("open /proc/${pid}/smaps_rollup");
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else {
 | 
						|
		ssize_t rv = read(fd, buf, sizeof(buf));
 | 
						|
		close(fd);
 | 
						|
		assert(rv == sizeof(g_smaps_rollup) - 1);
 | 
						|
		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
 | 
						|
		return EXIT_SUCCESS;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
int main(void)
 | 
						|
{
 | 
						|
	int rv = EXIT_SUCCESS;
 | 
						|
 | 
						|
	vsyscall();
 | 
						|
 | 
						|
	switch (g_vsyscall) {
 | 
						|
	case 0:
 | 
						|
		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
 | 
						|
		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
 | 
						|
		break;
 | 
						|
	case 1:
 | 
						|
		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
 | 
						|
		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
 | 
						|
		break;
 | 
						|
	case 2:
 | 
						|
		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
 | 
						|
		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		abort();
 | 
						|
	}
 | 
						|
 | 
						|
	pid_t pid = fork();
 | 
						|
	if (pid == -1) {
 | 
						|
		perror("fork");
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else if (pid == 0) {
 | 
						|
		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
 | 
						|
		if (rv != 0) {
 | 
						|
			if (errno == EPERM) {
 | 
						|
				fprintf(stderr,
 | 
						|
"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
 | 
						|
				);
 | 
						|
				kill(getppid(), SIGTERM);
 | 
						|
				return EXIT_FAILURE;
 | 
						|
			}
 | 
						|
			perror("ptrace PTRACE_TRACEME");
 | 
						|
			return EXIT_FAILURE;
 | 
						|
		}
 | 
						|
 | 
						|
		/*
 | 
						|
		 * Hide "segfault at ..." messages. Signal handler won't run.
 | 
						|
		 */
 | 
						|
		struct sigaction act = {};
 | 
						|
		act.sa_flags = SA_SIGINFO;
 | 
						|
		act.sa_sigaction = sigaction_SIGSEGV;
 | 
						|
		sigaction(SIGSEGV, &act, NULL);
 | 
						|
 | 
						|
#ifdef __amd64__
 | 
						|
		munmap(NULL, ((size_t)1 << 47) - 4096);
 | 
						|
#else
 | 
						|
#error "implement 'unmap everything'"
 | 
						|
#endif
 | 
						|
		return EXIT_FAILURE;
 | 
						|
	} else {
 | 
						|
		/*
 | 
						|
		 * TODO find reliable way to signal parent that munmap(2) completed.
 | 
						|
		 * Child can't do it directly because it effectively doesn't exist
 | 
						|
		 * anymore. Looking at child's VM files isn't 100% reliable either:
 | 
						|
		 * due to a bug they may not become empty or empty-like.
 | 
						|
		 */
 | 
						|
		sleep(1);
 | 
						|
 | 
						|
		if (rv == EXIT_SUCCESS) {
 | 
						|
			rv = test_proc_pid_maps(pid);
 | 
						|
		}
 | 
						|
		if (rv == EXIT_SUCCESS) {
 | 
						|
			rv = test_proc_pid_numa_maps(pid);
 | 
						|
		}
 | 
						|
		if (rv == EXIT_SUCCESS) {
 | 
						|
			rv = test_proc_pid_smaps(pid);
 | 
						|
		}
 | 
						|
		if (rv == EXIT_SUCCESS) {
 | 
						|
			rv = test_proc_pid_smaps_rollup(pid);
 | 
						|
		}
 | 
						|
		/*
 | 
						|
		 * TODO test /proc/${pid}/statm, task_statm()
 | 
						|
		 * ->start_code, ->end_code aren't updated by munmap().
 | 
						|
		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
 | 
						|
		 */
 | 
						|
 | 
						|
		/* Cut the rope. */
 | 
						|
		int wstatus;
 | 
						|
		waitpid(pid, &wstatus, 0);
 | 
						|
		assert(WIFSTOPPED(wstatus));
 | 
						|
		assert(WSTOPSIG(wstatus) == SIGSEGV);
 | 
						|
	}
 | 
						|
 | 
						|
	return rv;
 | 
						|
}
 |