/* * Copyright 2012 Google, Inc. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ /* * The boot cache device mapper reads a set of contiguously stored sectors. * These sectors are copies of the sectors read during an earlier boot. Only * small reads (less than some number of sectors) are selected for the cache, * since this results in the highest benefit. * * The data for the boot cache consists of three sections: * a header, the sector trace and the cache sectors. * These are stored after the file system in the same partition. * * The boot cache is created by separate user process that reads a * sector trace created if the boot cache is invalid. */ #include #include #include #include #include #include #include #include #include #include #include #include "dm.h" #include "dm-bootcache.h" #define DM_MSG_PREFIX "bootcache" #define DEFAULT_MAX_PAGES 50000 #define DEFAULT_SIZE_LIMIT 128 #define DEFAULT_MAX_TRACE (1 << 13) #define MAX_TRACE (1 << 20) #define DEV_MODE FMODE_READ #define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE) #define MAX_DEVICE_NAME (1 << 8) #define FRACTION_OF_TOTAL_PAGES 10 enum bc_state { BC_INIT = 1, BC_TRACING, BC_FILLING, BC_FILLED, BC_BYPASS }; struct bootcache_waiter { struct completion completion; int error; }; struct bootcache_args { /* Device being cached. The boot cache also stores its cache here. */ char device[MAX_DEVICE_NAME]; /* Identifies the data on the device. eg root hex digest from verity */ char signature[MAX_SIGNATURE]; /* Sector start of cache on device */ u64 cache_start; /* Max num of pages to cache */ u64 max_pages; /* Reads this size or larger will not be cached */ u64 size_limit; /* Maximum number of trace records to collect */ u64 max_trace; }; struct bootcache_stats { unsigned num_requests; /* Read requests */ unsigned num_hits; /* Number of hits */ unsigned overlapped; /* Blocks used while reading rest */ }; struct bootcache_page { struct bootcache_page *next; struct page *page; u64 sector; /* first sector in set of sectors in this page */ bool is_filled; }; struct bootcache_sector_map { u32 num_buckets; /* Number of buckets for hash */ u32 num_pages; /* Number of pages of sectors */ struct bootcache_page *pages; /* Cache of pages of sectors */ struct bootcache_page *nextpage;/* Next page entry to add */ struct bootcache_page **bucket; /* Hash buckets */ }; struct bootcache { const char *name; /* Taken from device being cached */ struct bootcache_stats stats; struct bootcache_args args; sector_t begin; /* Beginning sector of underlying device */ sector_t len; /* Length in sectors of underlying device */ atomic_t state; /* Cache state - needs atomic read */ spinlock_t trace_lock; /* Spin lock for trace table */ struct bootcache_trace *trace; /* Trace of blocks read during boot */ u32 trace_next; /* Next element to fill for tracing */ u32 max_io; /* Max pages we can read/write */ bool is_valid; /* The cache is valid */ bool is_free; /* The cache data has been freed */ struct kref kref; /* Protects in-flight operations */ struct dm_target *ti; /* Device in device mapper */ struct bio_set *bio_set; /* Set of bios for reading blocks */ struct dm_dev *dev; /* Device for both cache and data */ struct delayed_work work; /* Work that needs a thread */ struct mutex cache_lock; /* Locks everything in cache struct */ struct completion init_complete; /* Wait for initialization */ struct bootcache_sector_map sectors; /* Table of pages of sectors */ /* Sysfs files for managing the block cache */ struct bin_attribute valid; /* 1 -> valid 0 -> build cache */ struct bin_attribute free; /* Write '1' to free cache */ struct bin_attribute header; /* Content for bootcache header */ struct bin_attribute blocktrace;/* Trace of blocks accessed */ /* Computed hdr to be compared with on disk header. */ struct bootcache_hdr hdr; }; static inline u64 bytes_to_pages(u64 bytes) { return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; } static inline u64 sectors_to_pages(u64 sectors) { return sectors >> (PAGE_SHIFT - SECTOR_SHIFT); } static inline u64 pages_to_sectors(u64 pages) { return pages << (PAGE_SHIFT - SECTOR_SHIFT); } static inline struct bootcache_page **bootcache_hash( struct bootcache_sector_map *map, u64 sector) { return &map->bucket[(u32)sector % map->num_buckets]; } static struct bootcache_page *bootcache_get_chunk( struct bootcache_sector_map *map, u64 sector) { struct bootcache_page *next; next = *bootcache_hash(map, sector); while (next) { if (sector == next->sector) { if (next->is_filled) return next; else return NULL; } next = next->next; } return next; } struct bootcache_page *bootcache_new_chunk(struct bootcache_sector_map *map, u64 sector) { struct bootcache_page **bucket = bootcache_hash(map, sector); struct bootcache_page *p; if (map->nextpage == &map->pages[map->num_pages]) { DMWARN("block cache full"); return NULL; } p = map->nextpage++; p->page = alloc_page(GFP_KERNEL); p->sector = sector; p->next = *bucket; *bucket = p; return p; } static int build_sector_map(struct bootcache_sector_map *map, u32 num_pages) { map->num_pages = num_pages; map->num_buckets = num_pages * 3 / 2; map->bucket = kzalloc(map->num_buckets * sizeof(*map->bucket), GFP_KERNEL); if (!map->bucket) { DMERR("build_sector_maps kzalloc buckets"); return -ENOMEM; } map->pages = kzalloc(num_pages * sizeof(*map->pages), GFP_KERNEL); if (!map->pages) { kfree(map->bucket); DMERR("build_sector_maps kzalloc pages"); return -ENOMEM; } map->nextpage = map->pages; return 0; } static void bootcache_free_sector_map(struct bootcache_sector_map *map) { struct bootcache_page *p; for (p = map->pages; p < map->nextpage; p++) if (p->page) __free_pages(p->page, 0); kfree(map->pages); kfree(map->bucket); map->pages = NULL; map->bucket = NULL; map->nextpage = 0; } static int bootcache_create_bin_file(struct bootcache *cache, struct bin_attribute *attr, char *name, ssize_t size, ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t), ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t)) { int rc = 0; if (attr->attr.name) return -EEXIST; attr->attr.name = name; attr->attr.mode = write ? 0644 : 0444; attr->size = size; attr->read = read; attr->write = write; rc = sysfs_create_bin_file(dm_kobject(dm_table_get_md( cache->ti->table)), attr); if (rc) DMERR("sysfs_create_bin_file %s: %d", name, rc); return rc; } /* * bootcache_remove_bin_file uses the file name as flag * to determine if the sysfs file has been created. */ static void bootcache_remove_bin_file(struct bootcache *cache, struct bin_attribute *attr) { if (attr->attr.name) { sysfs_remove_bin_file(dm_kobject(dm_table_get_md( cache->ti->table)), attr); attr->attr.name = NULL; } } /* * bootcache_remove_all_files removes all the sysfs files * that have been created and only the ones that have been * craeted. */ static void bootcache_remove_all_files(struct bootcache *cache) { bootcache_remove_bin_file(cache, &cache->blocktrace); bootcache_remove_bin_file(cache, &cache->header); bootcache_remove_bin_file(cache, &cache->free); bootcache_remove_bin_file(cache, &cache->valid); } static void bootcache_free_resources(struct kref *kref) { struct bootcache *cache = container_of(kref, struct bootcache, kref); /* Will hang if we try to remove cache->free here */ bootcache_remove_bin_file(cache, &cache->blocktrace); bootcache_remove_bin_file(cache, &cache->header); bootcache_remove_bin_file(cache, &cache->valid); bootcache_free_sector_map(&cache->sectors); kfree(cache->trace); cache->trace = NULL; } /* * bootcache_get_ino returns the inode number of the bio if it has one. * If not, it returns 0, an illegal inode number. * When the bio is sent down for I/O, these fields don't change * while the I/O is pending. */ static unsigned long bootcache_get_ino(struct bio *bio) { if (!bio) return 0; if (!bio->bi_io_vec) return 0; if (!bio->bi_io_vec->bv_page) return 0; if (!bio->bi_io_vec->bv_page->mapping) return 0; if (!bio->bi_io_vec->bv_page->mapping->host) return 0; return bio->bi_io_vec->bv_page->mapping->host->i_ino; } static void bootcache_record(struct bootcache *cache, struct bio *bio) { u64 sector = bio->bi_sector; u64 count = to_sector(bio->bi_size); struct bootcache_trace *tr; if (!cache->trace) return; spin_lock(&cache->trace_lock); if (cache->trace_next < cache->args.max_trace) { tr = &cache->trace[cache->trace_next]; tr->sector = sector; tr->count = count; tr->ino = bootcache_get_ino(bio); ++cache->trace_next; } spin_unlock(&cache->trace_lock); } static bool is_in_cache(struct bootcache *cache, struct bio *bio) { u64 sector = bio->bi_sector; u32 count = bytes_to_pages(bio->bi_size); u32 i; for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) { if (!bootcache_get_chunk(&cache->sectors, sector)) return 0; } ++cache->stats.num_hits; return 1; } static void bootcache_read_from_cache(struct bootcache *cache, struct bio *bio) { struct bootcache_page *bp; u64 sector = bio->bi_sector; u32 count = bytes_to_pages(bio->bi_size); u8 *dst; u8 *src; u32 i; for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) { bp = bootcache_get_chunk(&cache->sectors, sector); if (!bp) { /* * Should have found it because we just * looked for it before calling this code */ DMCRIT("Didn't find block %llx", sector); BUG(); } dst = kmap_atomic(bio_iovec_idx(bio, i)->bv_page); src = kmap_atomic(bp->page); memcpy(dst, src, PAGE_SIZE); kunmap_atomic(src); kunmap_atomic(dst); } set_bit(BIO_UPTODATE, &bio->bi_flags); bio->bi_end_io(bio, 0); } static void bootcache_read(struct bootcache *cache, struct bio *bio) { int state; bio->bi_bdev = cache->dev->bdev; /* Only record reads below the given size */ if ((atomic_read(&cache->state) == BC_BYPASS) || (to_sector(bio->bi_size) > cache->args.size_limit)) { generic_make_request(bio); return; } kref_get(&cache->kref); try_again: state = atomic_read(&cache->state); switch (state) { case BC_INIT: wait_for_completion(&cache->init_complete); goto try_again; case BC_TRACING: bootcache_record(cache, bio); generic_make_request(bio); break; case BC_FILLING: ++cache->stats.overlapped; /* FALLTHRU */ case BC_FILLED: if (is_in_cache(cache, bio)) bootcache_read_from_cache(cache, bio); else generic_make_request(bio); break; case BC_BYPASS: generic_make_request(bio); break; default: DMCRIT("unknown state %d", state); BUG(); break; } ++cache->stats.num_requests; if (cache->stats.num_requests % 1000 == 0) { DMINFO("hits = %u / %u", cache->stats.num_hits, cache->stats.num_requests); } kref_put(&cache->kref, bootcache_free_resources); } static ssize_t valid_read(struct file *file, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct bootcache *cache = container_of(bin_attr, struct bootcache, valid); if (pos > 0 || count == 0) return 0; buf[0] = cache->is_valid ? '1' : '0'; return 1; } static ssize_t free_read(struct file *file, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct bootcache *cache = container_of(bin_attr, struct bootcache, free); if (pos > 0 || count == 0) return 0; buf[0] = cache->is_free ? '1' : '0'; return 1; } static ssize_t free_write(struct file *file, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct bootcache *cache = container_of(bin_attr, struct bootcache, free); ssize_t err = 0; mutex_lock(&cache->cache_lock); if (cache->is_free) { err = 0; goto exit; } atomic_set(&cache->state, BC_BYPASS); /* * Once BC_BYPASS is set, the system * should drain quickly. */ kref_put(&cache->kref, bootcache_free_resources); cache->is_free = 1; /* Tell caller we wrote everything */ err = count; exit: mutex_unlock(&cache->cache_lock); return err; } static ssize_t header_read(struct file *file, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct bootcache *cache = container_of(bin_attr, struct bootcache, header); return memory_read_from_buffer(buf, count, &pos, &cache->hdr, sizeof(cache->hdr)); } static ssize_t blocktrace_read(struct file *file, struct kobject *kobp, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct bootcache *cache = container_of(bin_attr, struct bootcache, blocktrace); char *data; size_t next, size; ssize_t err = 0; kref_get(&cache->kref); if (atomic_read(&cache->state) != BC_TRACING) { err = -ENODEV; goto exit; } data = (char *)cache->trace; spin_lock(&cache->trace_lock); next = cache->trace_next; spin_unlock(&cache->trace_lock); size = next * sizeof(struct bootcache_trace); err = memory_read_from_buffer(buf, count, &pos, data, size); exit: kref_put(&cache->kref, bootcache_free_resources); return err; } static int bootcache_init_sysfs(struct bootcache *cache, struct dm_target *ti) { int rc; rc = bootcache_create_bin_file(cache, &cache->valid, "valid", 3, valid_read, NULL); if (rc) goto error; rc = bootcache_create_bin_file(cache, &cache->free, "free", 3, free_read, free_write); if (rc) goto error; rc = bootcache_create_bin_file(cache, &cache->header, "header", sizeof(cache->hdr), header_read, NULL); if (rc) goto error; rc = bootcache_create_bin_file(cache, &cache->blocktrace, "blocktrace", cache->args.max_trace * sizeof(struct bootcache_trace), blocktrace_read, NULL); if (rc) goto error; return rc; error: bootcache_remove_all_files(cache); return rc; } static void bootcache_read_sectors_end(struct bio *bio, int error) { struct bootcache_waiter *waiter = bio->bi_private; if (unlikely(error)) { waiter->error = error; DMERR("Error occurred in bootcache_read_sectors:" " %d (%llx, %x)", error, (u64)bio->bi_sector, bio->bi_size); } complete(&waiter->completion); } static int bootcache_read_sectors(struct bootcache *cache) { struct bootcache_waiter waiter; struct bio *bio; struct bootcache_page *p; struct bootcache_page *start_page; struct bio_vec *bvec; sector_t sector = cache->args.cache_start + cache->hdr.sectors_meta + SECTORS_PER_PAGE; u32 max_io = cache->max_io; u32 numpages = cache->sectors.num_pages; u32 chunks_to_read = (numpages + max_io - 1) / max_io; int i; int j; int rc = 0; p = cache->sectors.pages; for (i = 0; i < chunks_to_read; i++) { bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set); if (unlikely(!bio)) { DMERR("Out of memory bio_alloc_bioset"); return -ENOMEM; } bio->bi_private = &waiter; bio->bi_idx = 0; bio->bi_bdev = cache->dev->bdev; bio->bi_end_io = bootcache_read_sectors_end; bio->bi_rw = 0; bio->bi_sector = sector; bvec = bio->bi_io_vec; start_page = p; for (j = 0; j < max_io; j++, bvec++, p++) { if (p == cache->sectors.nextpage) break; bvec->bv_page = p->page; bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; } bio->bi_size = j * PAGE_SIZE; bio->bi_vcnt = j; init_completion(&waiter.completion); waiter.error = 0; generic_make_request(bio); wait_for_completion(&waiter.completion); if (waiter.error) { rc = waiter.error; bio->bi_private = cache; bio_put(bio); break; } p = start_page; for (j = 0; j < max_io; j++, p++) { if (p == cache->sectors.nextpage) break; p->is_filled = 1; } sector += pages_to_sectors(j); bio->bi_private = cache; bio_put(bio); } atomic_set(&cache->state, BC_FILLED); return rc; } static void bootcache_dev_read_end(struct bio *bio, int error) { struct bootcache_waiter *waiter = bio->bi_private; if (unlikely(error)) { waiter->error = error; DMERR("Error occurred in bootcache_dev_read: %d (%llx, %x)", error, (u64)bio->bi_sector, bio->bi_size); } complete(&waiter->completion); } static int bootcache_dev_read(struct bootcache *cache, void *data, int len, u64 sector) { struct bootcache_waiter waiter; struct bio *bio; struct bio_vec *bvec; int pages_to_read = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; int max_io = cache->max_io; int bytes_to_copy; int i; int rc = 0; int pages_read; u8 *dst = data; u8 *src; pages_read = 0; while (len) { if (pages_to_read < max_io) max_io = pages_to_read; bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set); if (unlikely(!bio)) { DMERR("Out of memory bio_alloc_bioset"); return -ENOMEM; } bvec = bio->bi_io_vec; for (i = 0; i < max_io; i++, bvec++) bvec->bv_page = alloc_page(GFP_KERNEL); bio->bi_private = &waiter; bio->bi_idx = 0; bio->bi_bdev = cache->dev->bdev; bio->bi_end_io = bootcache_dev_read_end; bio->bi_rw = 0; bio->bi_sector = sector; bvec = bio->bi_io_vec; for (i = 0; i < max_io; i++, bvec++) { bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; } pages_to_read -= max_io; bio->bi_size = max_io * PAGE_SIZE; bio->bi_vcnt = max_io; init_completion(&waiter.completion); waiter.error = 0; generic_make_request(bio); wait_for_completion(&waiter.completion); if (waiter.error) { rc = waiter.error; goto error; } for (i = 0; i < max_io; i++) { bytes_to_copy = min(len, (int)PAGE_SIZE); src = kmap_atomic(bio_iovec_idx(bio, i)->bv_page); memcpy(dst, src, bytes_to_copy); kunmap_atomic(src); len -= bytes_to_copy; if (!len) break; dst += bytes_to_copy; } sector += pages_to_sectors(max_io); bvec = bio->bi_io_vec; for (i = 0; i < max_io; i++, bvec++) __free_pages(bvec->bv_page, 0); bio->bi_private = cache; bio_put(bio); } return rc; error: bvec = bio->bi_io_vec; for (i = 0; i < max_io; i++, bvec++) __free_pages(bvec->bv_page, 0); bio->bi_private = cache; bio_put(bio); return rc; } static int is_valid_hdr(struct bootcache *cache, struct bootcache_hdr *hdr) { u64 max_sectors; u64 max_meta_sectors; u64 max_pages; if (hdr->magic != BOOTCACHE_MAGIC) return 0; if (hdr->version != BOOTCACHE_VERSION) return 0; if (hdr->max_sectors != cache->hdr.max_sectors) return 0; if (hdr->max_hw_sectors != cache->hdr.max_hw_sectors) return 0; if (strncmp(hdr->date, __DATE__, strlen(__DATE__) + 1) != 0) return 0; if (strncmp(hdr->time, __TIME__, strlen(__TIME__) + 1) != 0) return 0; if (strncmp(hdr->signature, cache->hdr.signature, sizeof(hdr->signature)) != 0) return 0; /* * Check sanity: * Can't have any more meta sectors than it takes to map * the remaining parition space for bootcache. */ max_sectors = to_sector(i_size_read(cache->dev->bdev->bd_inode)) - cache->args.cache_start; max_pages = sectors_to_pages(max_sectors); max_pages = min(max_pages, (u64)INT_MAX / sizeof(*cache->trace)); max_pages = min(max_pages, (u64)totalram_pages / FRACTION_OF_TOTAL_PAGES); if (hdr->num_trace_recs > max_pages) { DMERR("too many trace records %lld", (u64)hdr->num_trace_recs); return 0; } max_meta_sectors = to_sector(round_up(max_pages * sizeof(u64), SECTOR_SIZE)); if (hdr->sectors_meta > max_meta_sectors) { DMERR("too many meta sectors %lld", (u64)hdr->sectors_meta); return 0; } if (hdr->sectors_data > max_sectors - hdr->sectors_meta - 1) { DMERR("bootcache too big %lld", (u64)hdr->sectors_data); return 0; } return 1; } static int read_trace(struct bootcache *cache) { u64 size_trace; u64 i; u64 j; int rc; int sum = 0; size_trace = sizeof(*cache->trace) * cache->hdr.num_trace_recs; cache->trace = kzalloc(size_trace, GFP_KERNEL); if (!cache->trace) { DMERR("read_trace out of memory"); return -ENOMEM; } rc = bootcache_dev_read(cache, cache->trace, size_trace, cache->hdr.sector + SECTORS_PER_PAGE); if (rc) { DMERR("bootcache_dev_read trace %d", rc); return rc; } for (i = 0; i < cache->hdr.num_trace_recs; i++) { struct bootcache_trace *tr; tr = &cache->trace[i]; for (j = 0; j < tr->count; j += SECTORS_PER_PAGE) { bootcache_new_chunk(&cache->sectors, tr->sector + j); ++sum; } } return 0; } /** * bootcache_start: * * Reads the bootcache header from disk, checks if it is valid * if valid: * read the sector trace from disk * build hash table for sector trace on page boundaries * begin reading in sectors to be cached * else: * setup to capture trace of sectors * * on error: by pass boot cache */ static void bootcache_start(struct work_struct *work) { struct bootcache *cache = container_of(work, struct bootcache, work.work); struct bootcache_hdr hdr; int rc; rc = bootcache_dev_read(cache, &hdr, sizeof(hdr), cache->hdr.sector); if (rc) { DMERR("bootcache_dev_read hdr %d", rc); goto error; } if (is_valid_hdr(cache, &hdr)) { cache->is_valid = 1; memcpy(&cache->hdr, &hdr, sizeof(cache->hdr)); rc = build_sector_map(&cache->sectors, sectors_to_pages(cache->hdr.sectors_data)); if (rc) goto error; rc = read_trace(cache); if (rc) goto error; atomic_set(&cache->state, BC_FILLING); rc = bootcache_read_sectors(cache); if (rc) goto error; } else { atomic_set(&cache->state, BC_TRACING); cache->trace = kzalloc(sizeof(*cache->trace) * cache->args.max_trace, GFP_KERNEL); if (!cache->trace) { DMERR("cache->trace out of memory"); goto error; } } exit: complete_all(&cache->init_complete); return; error: DMERR("error occured starting bootcache, setting to by pass mode"); atomic_set(&cache->state, BC_BYPASS); cache->is_valid = 0; goto exit; } /** * bootcache_max_io determines the maximum number of pages that can * be passed in one read request to the underlying device. * @cache: the max_sectors and max_hw_sectors must * be filled in. * @proposed_max_io: maxium number of pages the caller wants * to read at a time. * * Returns maximum number of pages that can be read but * no more than proposed_max_io */ static u32 bootcache_max_io(struct bootcache *cache, u32 proposed_max_io) { u32 max_sectors; u32 max_pages; max_sectors = min(cache->hdr.max_sectors, cache->hdr.max_hw_sectors); max_pages = sectors_to_pages(max_sectors); if (proposed_max_io < max_pages) max_pages = proposed_max_io; return max_pages; } static void bootcache_init_hdr(struct bootcache_hdr *hdr, u64 cache_start, struct block_device *bdev, const char *signature) { hdr->sector = cache_start; hdr->magic = BOOTCACHE_MAGIC; hdr->version = BOOTCACHE_VERSION; hdr->state = BC_INIT; hdr->alignment = PAGE_SIZE; hdr->max_hw_sectors = queue_max_hw_sectors(bdev_get_queue(bdev)); hdr->max_sectors = queue_max_sectors(bdev_get_queue(bdev)); strncpy(hdr->date, __DATE__, sizeof(hdr->date)); strncpy(hdr->time, __TIME__, sizeof(hdr->time)); strncpy(hdr->signature, signature, sizeof(hdr->signature)); } static int bootcache_get_device( struct dm_target *ti, char *devname, sector_t dev_start, sector_t dev_len, struct dm_dev **dm_dev) { do { /* Try the normal path first since if everything is ready, it * will be the fastest. */ if (!dm_get_device(ti, devname, dm_table_get_mode(ti->table), dm_dev)) return 0; /* No need to be too aggressive since this is a slow path. */ msleep(500); } while (driver_probe_done() != 0 || *dm_dev == NULL); async_synchronize_full(); return -1; } /** * bootcache_ctr - Construct a boot cache * @ti: Target being created * @argc: Number of elements in argv * @argv: Vector of arguments - All arguments are positional, this * means that to set a particular argument, all of its * predecessors must be present. * * Accepts the folowing parametes [defaults in brackets]: * @device: Device being cached. The boot cache is alsoe stored here. * @cache_start: Sector start on the device for the boot cache. * @signature: Signature to determine if cache is valid. * @size_limit: In sectors, max size reads to include in cache [128] * @max_trace: Number of entries in block trace made during boot [8192] * @max_pages: Maximum number of pages to cache in memory [50000] * * Argument list: * [ [ [ [ [ []]]]]] * * Example: * PARTUUID=0f5dbd05-c063-a848-a296-b8b8c2c24b28/PARTNROFF=1 1741200 * 10e8...78 80 64000 60000 */ static int bootcache_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct bootcache *cache = NULL; const char *signature = NULL; char *device = NULL; u64 cache_start = 0; u64 max_pages = DEFAULT_MAX_PAGES; u64 size_limit = DEFAULT_SIZE_LIMIT; u64 max_trace = DEFAULT_MAX_TRACE; int rc = 0; if (argc > 0) device = argv[0]; if (argc > 1) if (strict_strtoull(argv[1], 10, &cache_start)) { ti->error = "Invalid cache_start"; return -EINVAL; } if (argc > 2) signature = argv[2]; if (argc > 3) if (strict_strtoull(argv[3], 10, &size_limit)) { ti->error = "Invalid size_limit"; return -EINVAL; } if (argc > 4) if (strict_strtoull(argv[4], 10, &max_trace)) { ti->error = "Invalid max_trace"; return -EINVAL; } if (argc > 5) if (strict_strtoull(argv[5], 10, &max_pages)) { ti->error = "Invalid max_pages"; return -EINVAL; } #define NEEDARG(n) \ if (!(n)) { \ ti->error = "Missing argument: " #n; \ return -EINVAL; \ } NEEDARG(device); NEEDARG(signature); NEEDARG(cache_start); #undef NEEDARG if ((dm_table_get_mode(ti->table) & DEV_MODE) != DEV_MODE) { ti->error = "Must be created read only."; return -EINVAL; } cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (!cache) goto bad_cache; init_completion(&cache->init_complete); cache->ti = ti; strlcpy(cache->args.device, device, sizeof(cache->args.device)); strlcpy(cache->args.signature, signature, sizeof(cache->args.signature)); cache->args.cache_start = cache_start; cache->args.max_pages = max_pages; cache->args.size_limit = size_limit; if (max_trace > MAX_TRACE) { DMWARN("max_trace too large %llu, setting to %d\n", max_trace, MAX_TRACE); max_trace = MAX_TRACE; } cache->args.max_trace = max_trace; cache->begin = ti->begin; cache->len = ti->len; atomic_set(&cache->state, BC_INIT); kref_init(&cache->kref); mutex_init(&cache->cache_lock); spin_lock_init(&cache->trace_lock); /* For the name, use the device default with / changed to _ */ cache->name = dm_disk(dm_table_get_md(ti->table))->disk_name; if (bootcache_init_sysfs(cache, ti)) goto bad_sysfs; rc = bootcache_get_device(ti, device, ti->begin, ti->len, &cache->dev); if (rc) { DMERR("Failed to acquire device '%s': %d", device, rc); ti->error = "Device lookup failed"; goto bad_dev; } bootcache_init_hdr(&cache->hdr, cache_start, cache->dev->bdev, signature); cache->max_io = bootcache_max_io(cache, BIO_MAX_PAGES); /* Allocate the bioset used for request padding */ cache->bio_set = bioset_create(cache->max_io * 4, 0); if (!cache->bio_set) { ti->error = "Cannot allocate verity bioset"; goto bad_bio_set; } ti->num_flush_bios = 1; ti->private = cache; { char vdev[BDEVNAME_SIZE]; bdevname(cache->dev->bdev, vdev); DMINFO("dev:%s", vdev); } INIT_WORK(&cache->work.work, bootcache_start); schedule_work(&cache->work.work); DMINFO("cache:%p", cache); return 0; bad_bio_set: dm_put_device(ti, cache->dev); bad_dev: bootcache_remove_all_files(cache); bad_sysfs: kfree(cache); /* hash is not secret so no need to zero */ bad_cache: return -EINVAL; } static void bootcache_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, uint maxlen) { struct bootcache *cache = (struct bootcache *) ti->private; uint sz = 0; char vdev[BDEVNAME_SIZE]; switch (type) { case STATUSTYPE_INFO: DMEMIT("%u %u %u", cache->stats.num_requests, cache->stats.num_hits, cache->stats.overlapped); break; case STATUSTYPE_TABLE: bdevname(cache->dev->bdev, vdev); DMEMIT("/dev/%s signature=%s cache_start=%llu max_pages=%llu" " size_limit=%llu max_trace=%llu\n", vdev, cache->args.signature, cache->args.cache_start, cache->args.max_pages, cache->args.size_limit, cache->args.max_trace); break; } } static void bootcache_dtr(struct dm_target *ti) { /* * Doesn't have to clean-up the meta files in sysfs * because the device mapper has already done it. */ struct bootcache *cache = (struct bootcache *)ti->private; DMDEBUG("Destroying bio set"); bioset_free(cache->bio_set); DMDEBUG("Putting dev"); dm_put_device(ti, cache->dev); DMDEBUG("Destroying config"); kfree(cache); } static int bootcache_map(struct dm_target *ti, struct bio *bio) { bootcache_read(ti->private, bio); return DM_MAPIO_SUBMITTED; } static int bootcache_merge(struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size) { struct bootcache *cache = ti->private; struct request_queue *q = bdev_get_queue(cache->dev->bdev); if (!q->merge_bvec_fn) return max_size; bvm->bi_bdev = cache->dev->bdev; bvm->bi_sector = cache->begin + bvm->bi_sector - ti->begin; /* Optionally, this could just return 0 to stick to single pages. */ return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } static int bootcache_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct bootcache *cache = ti->private; return fn(ti, cache->dev, cache->begin, ti->len, data); } static void bootcache_io_hints(struct dm_target *ti, struct queue_limits *limits) { limits->logical_block_size = PAGE_SIZE; limits->physical_block_size = PAGE_SIZE; blk_limits_io_min(limits, PAGE_SIZE); } static struct target_type bootcache_target = { .name = "bootcache", .version = {0, 1, 0}, .module = THIS_MODULE, .ctr = bootcache_ctr, .dtr = bootcache_dtr, .map = bootcache_map, .merge = bootcache_merge, .status = bootcache_status, .iterate_devices = bootcache_iterate_devices, .io_hints = bootcache_io_hints, }; static int __init dm_bootcache_init(void) { int rc = -ENOMEM; rc = dm_register_target(&bootcache_target); if (rc < 0) { DMERR("register failed %d", rc); goto register_failed; } DMINFO("version %u.%u.%u loaded", bootcache_target.version[0], bootcache_target.version[1], bootcache_target.version[2]); return rc; register_failed: return rc; } static void __exit dm_bootcache_exit(void) { dm_unregister_target(&bootcache_target); } module_init(dm_bootcache_init); module_exit(dm_bootcache_exit); MODULE_AUTHOR("Paul Taysom "); MODULE_DESCRIPTION(DM_NAME "read cache"); MODULE_LICENSE("GPL");