diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3994f0bb..a59ee432 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -571,9 +571,6 @@ tcp_limit_output_bytes - INTEGER typical pfifo_fast qdiscs. tcp_limit_output_bytes limits the number of bytes on qdisc or device to reduce artificial RTT/cwnd and reduce bufferbloat. - Note: For GSO/TSO enabled flows, we try to have at least two - packets in flight. Reducing tcp_limit_output_bytes might also - reduce the size of individual GSO packet (64KB being the max) Default: 131072 tcp_challenge_ack_limit - INTEGER diff --git a/Makefile b/Makefile index c28590e4..9de5f61a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 22 +SUBLEVEL = 23 EXTRAVERSION = NAME = TOSSUG Baby Fish diff --git a/block/blk-core.c b/block/blk-core.c index acf3bf6a..2c66daba 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -741,9 +741,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->sg_reserved_size = INT_MAX; + /* Protect q->elevator from elevator_change */ + mutex_lock(&q->sysfs_lock); + /* init elevator */ - if (elevator_init(q, NULL)) + if (elevator_init(q, NULL)) { + mutex_unlock(&q->sysfs_lock); return NULL; + } + + mutex_unlock(&q->sysfs_lock); + return q; } EXPORT_SYMBOL(blk_init_allocated_queue); diff --git a/block/elevator.c b/block/elevator.c index 668394d1..6d765f7e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -186,6 +186,12 @@ int elevator_init(struct request_queue *q, char *name) struct elevator_type *e = NULL; int err; + /* + * q->sysfs_lock must be held to provide mutual exclusion between + * elevator_switch() and here. + */ + lockdep_assert_held(&q->sysfs_lock); + if (unlikely(q->elevator)) return 0; @@ -959,7 +965,7 @@ fail_init: /* * Switch this queue to the given IO scheduler. */ -int elevator_change(struct request_queue *q, const char *name) +static int __elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; @@ -981,6 +987,18 @@ int elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } + +int elevator_change(struct request_queue *q, const char *name) +{ + int ret; + + /* Protect q->elevator from elevator_init() */ + mutex_lock(&q->sysfs_lock); + ret = __elevator_change(q, name); + mutex_unlock(&q->sysfs_lock); + + return ret; +} EXPORT_SYMBOL(elevator_change); ssize_t elv_iosched_store(struct request_queue *q, const char *name, @@ -991,7 +1009,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!q->elevator) return count; - ret = elevator_change(q, name); + ret = __elevator_change(q, name); if (!ret) return count; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 9413e3f2..a68b56a3 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 9e4854cb..0f1cb0fc 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 272f0092..1bdf104e 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3511,7 +3511,7 @@ static int init_card(struct atm_dev *dev) tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ if (tmp) { memcpy(card->atmdev->esi, tmp->dev_addr, 6); - + dev_put(tmp); printk("%s: ESI %pM\n", card->name, card->atmdev->esi); } /* diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 2d31281e..3165811e 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -32,11 +32,23 @@ #include #include -#include - #include -#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event)) +/* + * Size of a cn_msg followed by a proc_event structure. Since the + * sizeof struct cn_msg is a multiple of 4 bytes, but not 8 bytes, we + * add one 4-byte word to the size here, and then start the actual + * cn_msg structure 4 bytes into the stack buffer. The result is that + * the immediately following proc_event structure is aligned to 8 bytes. + */ +#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event) + 4) + +/* See comment above; we test our assumption about sizeof struct cn_msg here. */ +static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer) +{ + BUILD_BUG_ON(sizeof(struct cn_msg) != 20); + return (struct cn_msg *)(buffer + 4); +} static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; @@ -56,19 +68,19 @@ void proc_fork_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; struct task_struct *parent; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_FORK; rcu_read_lock(); parent = rcu_dereference(task->real_parent); @@ -91,17 +103,17 @@ void proc_exec_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; ev->event_data.exec.process_tgid = task->tgid; @@ -117,14 +129,14 @@ void proc_id_connector(struct task_struct *task, int which_id) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->what = which_id; @@ -145,7 +157,7 @@ void proc_id_connector(struct task_struct *task, int which_id) rcu_read_unlock(); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ @@ -159,17 +171,17 @@ void proc_sid_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_SID; ev->event_data.sid.process_pid = task->pid; ev->event_data.sid.process_tgid = task->tgid; @@ -186,17 +198,17 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_PTRACE; ev->event_data.ptrace.process_pid = task->pid; ev->event_data.ptrace.process_tgid = task->tgid; @@ -221,17 +233,17 @@ void proc_comm_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_COMM; ev->event_data.comm.process_pid = task->pid; ev->event_data.comm.process_tgid = task->tgid; @@ -248,18 +260,18 @@ void proc_coredump_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_COREDUMP; ev->event_data.coredump.process_pid = task->pid; ev->event_data.coredump.process_tgid = task->tgid; @@ -275,18 +287,18 @@ void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; ev->event_data.exit.process_tgid = task->tgid; @@ -312,18 +324,18 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); msg->seq = rcvd_seq; ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->cpu = -1; ev->what = PROC_EVENT_NONE; ev->event_data.ack.err = err; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 635e8f2e..01ed8ac4 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -24,6 +24,7 @@ * Authors: Christian König */ #include +#include #include #include #include "radeon.h" @@ -57,35 +58,57 @@ enum r600_hdmi_iec_status_bits { static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = { /* 32kHz 44.1kHz 48kHz */ /* Clock N CTS N CTS N CTS */ - { 25175, 4576, 28125, 7007, 31250, 6864, 28125 }, /* 25,20/1.001 MHz */ + { 25175, 4096, 25175, 28224, 125875, 6144, 25175 }, /* 25,20/1.001 MHz */ { 25200, 4096, 25200, 6272, 28000, 6144, 25200 }, /* 25.20 MHz */ { 27000, 4096, 27000, 6272, 30000, 6144, 27000 }, /* 27.00 MHz */ { 27027, 4096, 27027, 6272, 30030, 6144, 27027 }, /* 27.00*1.001 MHz */ { 54000, 4096, 54000, 6272, 60000, 6144, 54000 }, /* 54.00 MHz */ { 54054, 4096, 54054, 6272, 60060, 6144, 54054 }, /* 54.00*1.001 MHz */ - { 74176, 11648, 210937, 17836, 234375, 11648, 140625 }, /* 74.25/1.001 MHz */ + { 74176, 4096, 74176, 5733, 75335, 6144, 74176 }, /* 74.25/1.001 MHz */ { 74250, 4096, 74250, 6272, 82500, 6144, 74250 }, /* 74.25 MHz */ - { 148352, 11648, 421875, 8918, 234375, 5824, 140625 }, /* 148.50/1.001 MHz */ + { 148352, 4096, 148352, 5733, 150670, 6144, 148352 }, /* 148.50/1.001 MHz */ { 148500, 4096, 148500, 6272, 165000, 6144, 148500 }, /* 148.50 MHz */ - { 0, 4096, 0, 6272, 0, 6144, 0 } /* Other */ }; -/* - * calculate CTS value if it's not found in the table - */ -static void r600_hdmi_calc_cts(uint32_t clock, int *CTS, int N, int freq) -{ - u64 n; - u32 d; - if (*CTS == 0) { - n = (u64)clock * (u64)N * 1000ULL; - d = 128 * freq; - do_div(n, d); - *CTS = n; - } - DRM_DEBUG("Using ACR timing N=%d CTS=%d for frequency %d\n", - N, *CTS, freq); +/* + * calculate CTS and N values if they are not found in the table + */ +static void r600_hdmi_calc_cts(uint32_t clock, int *CTS, int *N, int freq) +{ + int n, cts; + unsigned long div, mul; + + /* Safe, but overly large values */ + n = 128 * freq; + cts = clock * 1000; + + /* Smallest valid fraction */ + div = gcd(n, cts); + + n /= div; + cts /= div; + + /* + * The optimal N is 128*freq/1000. Calculate the closest larger + * value that doesn't truncate any bits. + */ + mul = ((128*freq/1000) + (n-1))/n; + + n *= mul; + cts *= mul; + + /* Check that we are in spec (not always possible) */ + if (n < (128*freq/1500)) + printk(KERN_WARNING "Calculated ACR N value is too small. You may experience audio problems.\n"); + if (n > (128*freq/300)) + printk(KERN_WARNING "Calculated ACR N value is too large. You may experience audio problems.\n"); + + *N = n; + *CTS = cts; + + DRM_DEBUG("Calculated ACR timing N=%d CTS=%d for frequency %d\n", + *N, *CTS, freq); } struct radeon_hdmi_acr r600_hdmi_acr(uint32_t clock) @@ -93,15 +116,16 @@ struct radeon_hdmi_acr r600_hdmi_acr(uint32_t clock) struct radeon_hdmi_acr res; u8 i; - for (i = 0; r600_hdmi_predefined_acr[i].clock != clock && - r600_hdmi_predefined_acr[i].clock != 0; i++) - ; - res = r600_hdmi_predefined_acr[i]; + /* Precalculated values for common clocks */ + for (i = 0; i < ARRAY_SIZE(r600_hdmi_predefined_acr); i++) { + if (r600_hdmi_predefined_acr[i].clock == clock) + return r600_hdmi_predefined_acr[i]; + } - /* In case some CTS are missing */ - r600_hdmi_calc_cts(clock, &res.cts_32khz, res.n_32khz, 32000); - r600_hdmi_calc_cts(clock, &res.cts_44_1khz, res.n_44_1khz, 44100); - r600_hdmi_calc_cts(clock, &res.cts_48khz, res.n_48khz, 48000); + /* And odd clocks get manually calculated */ + r600_hdmi_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000); + r600_hdmi_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100); + r600_hdmi_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000); return res; } diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index 1bfd292c..06eb45fa 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -47,6 +47,7 @@ #define DFP_RDESC_ORIG_SIZE 97 #define FV_RDESC_ORIG_SIZE 130 #define MOMO_RDESC_ORIG_SIZE 87 +#define MOMO2_RDESC_ORIG_SIZE 87 /* Fixed report descriptors for Logitech Driving Force (and Pro) * wheel controllers @@ -284,6 +285,54 @@ static __u8 momo_rdesc_fixed[] = { 0xC0 /* End Collection */ }; +static __u8 momo2_rdesc_fixed[] = { +0x05, 0x01, /* Usage Page (Desktop), */ +0x09, 0x04, /* Usage (Joystik), */ +0xA1, 0x01, /* Collection (Application), */ +0xA1, 0x02, /* Collection (Logical), */ +0x95, 0x01, /* Report Count (1), */ +0x75, 0x0A, /* Report Size (10), */ +0x15, 0x00, /* Logical Minimum (0), */ +0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ +0x35, 0x00, /* Physical Minimum (0), */ +0x46, 0xFF, 0x03, /* Physical Maximum (1023), */ +0x09, 0x30, /* Usage (X), */ +0x81, 0x02, /* Input (Variable), */ +0x95, 0x0A, /* Report Count (10), */ +0x75, 0x01, /* Report Size (1), */ +0x25, 0x01, /* Logical Maximum (1), */ +0x45, 0x01, /* Physical Maximum (1), */ +0x05, 0x09, /* Usage Page (Button), */ +0x19, 0x01, /* Usage Minimum (01h), */ +0x29, 0x0A, /* Usage Maximum (0Ah), */ +0x81, 0x02, /* Input (Variable), */ +0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ +0x09, 0x00, /* Usage (00h), */ +0x95, 0x04, /* Report Count (4), */ +0x81, 0x02, /* Input (Variable), */ +0x95, 0x01, /* Report Count (1), */ +0x75, 0x08, /* Report Size (8), */ +0x26, 0xFF, 0x00, /* Logical Maximum (255), */ +0x46, 0xFF, 0x00, /* Physical Maximum (255), */ +0x09, 0x01, /* Usage (01h), */ +0x81, 0x02, /* Input (Variable), */ +0x05, 0x01, /* Usage Page (Desktop), */ +0x09, 0x31, /* Usage (Y), */ +0x81, 0x02, /* Input (Variable), */ +0x09, 0x32, /* Usage (Z), */ +0x81, 0x02, /* Input (Variable), */ +0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ +0x09, 0x00, /* Usage (00h), */ +0x81, 0x02, /* Input (Variable), */ +0xC0, /* End Collection, */ +0xA1, 0x02, /* Collection (Logical), */ +0x09, 0x02, /* Usage (02h), */ +0x95, 0x07, /* Report Count (7), */ +0x91, 0x02, /* Output (Variable), */ +0xC0, /* End Collection, */ +0xC0 /* End Collection */ +}; + /* * Certain Logitech keyboards send in report #3 keys which are far * above the logical maximum described in descriptor. This extends @@ -343,6 +392,15 @@ static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, } break; + case USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2: + if (*rsize == MOMO2_RDESC_ORIG_SIZE) { + hid_info(hdev, + "fixing up Logitech Momo Racing Force (Black) report descriptor\n"); + rdesc = momo2_rdesc_fixed; + *rsize = sizeof(momo2_rdesc_fixed); + } + break; + case USB_DEVICE_ID_LOGITECH_VIBRATION_WHEEL: if (*rsize == FV_RDESC_ORIG_SIZE) { hid_info(hdev, diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fa004b11..25943a68 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -782,7 +782,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, int offset; BUG_ON(!domain->pgd); - BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); + + if (addr_width < BITS_PER_LONG && pfn >> addr_width) + /* Address beyond IOMMU's addressing capabilities. */ + return NULL; + parent = domain->pgd; while (level > 0) { diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5b19b2d6..45011f63 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -525,12 +525,13 @@ static int __init intel_irq_remapping_supported(void) if (disable_irq_remap) return 0; if (irq_remap_broken) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, - "This system BIOS has enabled interrupt remapping\n" - "on a chipset that contains an erratum making that\n" - "feature unstable. To maintain system stability\n" - "interrupt remapping is being disabled. Please\n" - "contact your BIOS vendor for an update\n"); + printk(KERN_WARNING + "This system BIOS has enabled interrupt remapping\n" + "on a chipset that contains an erratum making that\n" + "feature unstable. To maintain system stability\n" + "interrupt remapping is being disabled. Please\n" + "contact your BIOS vendor for an update\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); disable_irq_remap = 1; return 0; } diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index baf2686a..02125e6a 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -1083,8 +1083,10 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) spin_unlock_irqrestore(&card->isdnloop_lock, flags); return -ENOMEM; } - for (i = 0; i < 3; i++) - strcpy(card->s0num[i], sdef.num[i]); + for (i = 0; i < 3; i++) { + strlcpy(card->s0num[i], sdef.num[i], + sizeof(card->s0num[0])); + } break; case ISDN_PTYPE_1TR6: if (isdnloop_fake(card, "DRV1.04TC-1TR6-CAPI-CNS-BASIS-29.11.95", @@ -1097,7 +1099,7 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) spin_unlock_irqrestore(&card->isdnloop_lock, flags); return -ENOMEM; } - strcpy(card->s0num[0], sdef.num[0]); + strlcpy(card->s0num[0], sdef.num[0], sizeof(card->s0num[0])); card->s0num[1][0] = '\0'; card->s0num[2][0] = '\0'; break; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 79d385ed..3e618734 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -784,7 +784,7 @@ static int mmc_blk_cmd_error(struct request *req, const char *name, int error, * Otherwise we don't understand what happened, so abort. */ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, - struct mmc_blk_request *brq, int *ecc_err) + struct mmc_blk_request *brq, int *ecc_err, int *gen_err) { bool prev_cmd_status_valid = true; u32 status, stop_status = 0; @@ -822,6 +822,16 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, (brq->cmd.resp[0] & R1_CARD_ECC_FAILED)) *ecc_err = 1; + /* Flag General errors */ + if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) + if ((status & R1_ERROR) || + (brq->stop.resp[0] & R1_ERROR)) { + pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n", + req->rq_disk->disk_name, __func__, + brq->stop.resp[0], status); + *gen_err = 1; + } + /* * Check the current card state. If it is in some data transfer * mode, tell it to stop (and hopefully transition back to TRAN.) @@ -841,6 +851,13 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, return ERR_ABORT; if (stop_status & R1_CARD_ECC_FAILED) *ecc_err = 1; + if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) + if (stop_status & R1_ERROR) { + pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n", + req->rq_disk->disk_name, __func__, + stop_status); + *gen_err = 1; + } } /* Check for set block count errors */ @@ -1084,7 +1101,7 @@ static int mmc_blk_err_check(struct mmc_card *card, mmc_active); struct mmc_blk_request *brq = &mq_mrq->brq; struct request *req = mq_mrq->req; - int ecc_err = 0; + int ecc_err = 0, gen_err = 0; /* * sbc.error indicates a problem with the set block count @@ -1098,7 +1115,7 @@ static int mmc_blk_err_check(struct mmc_card *card, */ if (brq->sbc.error || brq->cmd.error || brq->stop.error || brq->data.error) { - switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err)) { + switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) { case ERR_RETRY: return MMC_BLK_RETRY; case ERR_ABORT: @@ -1130,6 +1147,14 @@ static int mmc_blk_err_check(struct mmc_card *card, u32 status; unsigned long timeout; + /* Check stop command response */ + if (brq->stop.resp[0] & R1_ERROR) { + pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n", + req->rq_disk->disk_name, __func__, + brq->stop.resp[0]); + gen_err = 1; + } + timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS); do { int err = get_card_status(card, &status, 5); @@ -1139,6 +1164,13 @@ static int mmc_blk_err_check(struct mmc_card *card, return MMC_BLK_CMD_ERR; } + if (status & R1_ERROR) { + pr_err("%s: %s: general error sending status command, card status %#x\n", + req->rq_disk->disk_name, __func__, + status); + gen_err = 1; + } + /* Timeout if the device never becomes ready for data * and never leaves the program state. */ @@ -1158,6 +1190,13 @@ static int mmc_blk_err_check(struct mmc_card *card, (R1_CURRENT_STATE(status) == R1_STATE_PRG)); } + /* if general error occurs, retry the write operation. */ + if (gen_err) { + pr_warn("%s: retrying write for general error\n", + req->rq_disk->disk_name); + return MMC_BLK_RETRY; + } + if (brq->data.error) { pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n", req->rq_disk->disk_name, brq->data.error, diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index d7434e0a..a88d04b3 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -537,8 +537,9 @@ static ssize_t bonding_store_arp_interval(struct device *d, goto out; } if (bond->params.mode == BOND_MODE_ALB || - bond->params.mode == BOND_MODE_TLB) { - pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n", + bond->params.mode == BOND_MODE_TLB || + bond->params.mode == BOND_MODE_8023AD) { + pr_info("%s: ARP monitoring cannot be used with ALB/TLB/802.3ad. Only MII monitoring is supported on %s.\n", bond->dev->name, bond->dev->name); ret = -EINVAL; goto out; @@ -696,6 +697,8 @@ static ssize_t bonding_store_downdelay(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (!(bond->params.miimon)) { pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", bond->dev->name); @@ -729,6 +732,7 @@ static ssize_t bonding_store_downdelay(struct device *d, } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR, @@ -751,6 +755,8 @@ static ssize_t bonding_store_updelay(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (!(bond->params.miimon)) { pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", bond->dev->name); @@ -784,6 +790,7 @@ static ssize_t bonding_store_updelay(struct device *d, } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index a5c9df07..c72e214e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -264,6 +264,10 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) mdev->port_cnt++; + /* Initialize time stamp mechanism */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_init_timestamp(mdev); + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { if (!dev->caps.comp_pool) { mdev->profile.prof[i].rx_ring_num = @@ -301,10 +305,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mdev->pndev[i] = NULL; } - /* Initialize time stamp mechanism */ - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - mlx4_en_init_timestamp(mdev); - return mdev; err_mr: diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 9095ff93..064425d3 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -678,9 +678,6 @@ static void cp_tx (struct cp_private *cp) le32_to_cpu(txd->opts1) & 0xffff, PCI_DMA_TODEVICE); - bytes_compl += skb->len; - pkts_compl++; - if (status & LastFrag) { if (status & (TxError | TxFIFOUnder)) { netif_dbg(cp, tx_err, cp->dev, @@ -702,6 +699,8 @@ static void cp_tx (struct cp_private *cp) netif_dbg(cp, tx_done, cp->dev, "tx done, slot %d\n", tx_tail); } + bytes_compl += skb->len; + pkts_compl++; dev_kfree_skb_irq(skb); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 8245b896..6e3f7c65 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -3461,6 +3461,11 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x14, 0x9065); rtl_writephy(tp, 0x14, 0x1065); + /* Check ALDPS bit, disable it if enabled */ + rtl_writephy(tp, 0x1f, 0x0a43); + if (rtl_readphy(tp, 0x10) & 0x0004) + rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0004); + rtl_writephy(tp, 0x1f, 0x0000); } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 523d6b24..c70ff7da 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -661,6 +661,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, const struct iovec *iv, unsigned long total_len, size_t count, int noblock) { + int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long len = total_len; @@ -703,6 +704,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; linear = copylen; if (iov_pages(iv, vnet_hdr_len + copylen, count) <= MAX_SKB_FRAGS) @@ -711,7 +714,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!zerocopy) { copylen = len; - linear = vnet_hdr.hdr_len; + if (vnet_hdr.hdr_len > good_linear) + linear = good_linear; + else + linear = vnet_hdr.hdr_len; } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b3051052..fe3fd778 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1217,6 +1217,8 @@ static int team_user_linkup_option_get(struct team *team, return 0; } +static void __team_carrier_check(struct team *team); + static int team_user_linkup_option_set(struct team *team, struct team_gsetter_ctx *ctx) { @@ -1224,6 +1226,7 @@ static int team_user_linkup_option_set(struct team *team, port->user.linkup = ctx->data.bool_val; team_refresh_port_linkup(port); + __team_carrier_check(port->team); return 0; } @@ -1243,6 +1246,7 @@ static int team_user_linkup_en_option_set(struct team *team, port->user.linkup_enabled = ctx->data.bool_val; team_refresh_port_linkup(port); + __team_carrier_check(port->team); return 0; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b18ead55..9ef85fea 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1069,6 +1069,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb; size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; + int good_linear; int offset = 0; int copylen; bool zerocopy = false; @@ -1109,12 +1110,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } + good_linear = SKB_MAX_HEAD(align); + if (msg_control) { /* There are 256 bytes to be copied in skb, so there is * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; linear = copylen; if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) zerocopy = true; @@ -1122,7 +1127,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (!zerocopy) { copylen = len; - linear = gso.hdr_len; + if (gso.hdr_len > good_linear) + linear = good_linear; + else + linear = gso.hdr_len; } skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index b3c215a4..0afda031 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -203,9 +203,6 @@ static void intr_complete (struct urb *urb) break; } - if (!netif_running (dev->net)) - return; - status = usb_submit_urb (urb, GFP_ATOMIC); if (status != 0) netif_err(dev, timer, dev->net, diff --git a/drivers/video/kyro/fbdev.c b/drivers/video/kyro/fbdev.c index 6157f74a..ec7fc87f 100644 --- a/drivers/video/kyro/fbdev.c +++ b/drivers/video/kyro/fbdev.c @@ -625,15 +625,15 @@ static int kyrofb_ioctl(struct fb_info *info, } break; case KYRO_IOCTL_UVSTRIDE: - if (copy_to_user(argp, &deviceInfo.ulOverlayUVStride, sizeof(unsigned long))) + if (copy_to_user(argp, &deviceInfo.ulOverlayUVStride, sizeof(deviceInfo.ulOverlayUVStride))) return -EFAULT; break; case KYRO_IOCTL_STRIDE: - if (copy_to_user(argp, &deviceInfo.ulOverlayStride, sizeof(unsigned long))) + if (copy_to_user(argp, &deviceInfo.ulOverlayStride, sizeof(deviceInfo.ulOverlayStride))) return -EFAULT; break; case KYRO_IOCTL_OVERLAY_OFFSET: - if (copy_to_user(argp, &deviceInfo.ulOverlayOffset, sizeof(unsigned long))) + if (copy_to_user(argp, &deviceInfo.ulOverlayOffset, sizeof(deviceInfo.ulOverlayOffset))) return -EFAULT; break; } diff --git a/fs/aio.c b/fs/aio.c index e0e9a97e..bdcb2ef2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -423,10 +423,12 @@ static void kill_ioctx_rcu(struct rcu_head *head) * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { + spin_lock(&mm->ioctx_lock); hlist_del_rcu(&ctx->list); + spin_unlock(&mm->ioctx_lock); /* * It'd be more correct to do this in free_ioctx(), after all @@ -494,7 +496,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(ctx); + kill_ioctx(mm, ctx); } } @@ -854,7 +856,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); put_ioctx(ioctx); } @@ -872,7 +874,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); put_ioctx(ioctx); return 0; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5e999680..ca01d830 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1612,6 +1612,12 @@ xfs_file_ioctl( case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_eofblocks eofb; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&eofb, arg, sizeof(eofb))) return -XFS_ERROR(EFAULT); diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 963d7143..ae1193bc 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -30,6 +30,7 @@ enum clock_event_nofitiers { #include struct clock_event_device; +struct module; /* Clock event mode commands */ enum clock_event_mode { @@ -83,6 +84,7 @@ enum clock_event_mode { * @irq: IRQ number (only for non CPU local devices) * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code + * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); @@ -112,6 +114,7 @@ struct clock_event_device { int irq; const struct cpumask *cpumask; struct list_head list; + struct module *owner; } ____cacheline_aligned; /* @@ -150,7 +153,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); -extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 99a652fe..a5bcc1d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -331,11 +331,6 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif -#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \ - defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -#define NET_SKBUFF_NF_DEFRAG_NEEDED 1 -#endif - /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -368,7 +363,6 @@ typedef unsigned char *sk_buff_data_t; * @protocol: Packet protocol from driver * @destructor: Destruct function * @nfct: Associated connection, if any - * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -455,9 +449,6 @@ struct sk_buff { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - struct sk_buff *nfct_reasm; -#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif @@ -2724,18 +2715,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED -static inline void nf_conntrack_get_reasm(struct sk_buff *skb) -{ - if (skb) - atomic_inc(&skb->users); -} -static inline void nf_conntrack_put_reasm(struct sk_buff *skb) -{ - if (skb) - kfree_skb(skb); -} -#endif #ifdef CONFIG_BRIDGE_NETFILTER static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { @@ -2754,10 +2733,6 @@ static inline void nf_reset(struct sk_buff *skb) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); - skb->nfct_reasm = NULL; -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; @@ -2779,10 +2754,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) nf_conntrack_get(src->nfct); dst->nfctinfo = src->nfctinfo; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - dst->nfct_reasm = src->nfct_reasm; - nf_conntrack_get_reasm(src->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); @@ -2794,9 +2765,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(dst->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(dst->nf_bridge); #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6097eff2..d56562ce 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, OOM_KILL, #ifdef CONFIG_NUMA_BALANCING NUMA_PTE_UPDATES, + NUMA_HUGE_PTE_UPDATES, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4c062ccf..f0c13a38 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { __u32 len; /* IPv4 simply where L4 starts IPv6 where L4 Transport Header starts */ - __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ __s16 protocol; __s32 flags; @@ -117,34 +116,12 @@ struct ip_vs_iphdr { union nf_inet_addr daddr; }; -/* Dependency to module: nf_defrag_ipv6 */ -#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return skb->nfct_reasm; -} -static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) -{ - if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb))) - return skb_header_pointer(skb_nfct_reasm(skb), - ipvsh->thoff_reasm, len, buffer); - - return skb_header_pointer(skb, offset, len, buffer); -} -#else -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return NULL; -} static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, int len, void *buffer, const struct ip_vs_iphdr *ipvsh) { return skb_header_pointer(skb, offset, len, buffer); } -#endif static inline void ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) @@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) (struct ipv6hdr *)skb_network_header(skb); iphdr->saddr.in6 = iph->saddr; iphdr->daddr.in6 = iph->daddr; - /* ipv6_find_hdr() updates len, flags, thoff_reasm */ - iphdr->thoff_reasm = 0; + /* ipv6_find_hdr() updates len, flags */ iphdr->len = 0; iphdr->flags = 0; iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, &iphdr->fragoffs, &iphdr->flags); - /* get proto from re-assembled packet and it's offset */ - if (skb_nfct_reasm(skb)) - iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb), - &iphdr->thoff_reasm, - -1, NULL, NULL); - } else #endif { diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fd79c9a1..17920d84 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,10 +6,7 @@ extern void nf_defrag_ipv6_enable(void); extern int nf_ct_frag6_init(void); extern void nf_ct_frag6_cleanup(void); extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user); -extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, - struct net_device *out, - int (*okfn)(struct sk_buff *)); +extern void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 6a23c6c5..9df0e3b1 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "tick-internal.h" @@ -23,10 +22,6 @@ /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); - -/* Notification for clock events */ -static RAW_NOTIFIER_HEAD(clockevents_chain); - /* Protection for the above */ static DEFINE_RAW_SPINLOCK(clockevents_lock); @@ -267,30 +262,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return (rc && force) ? clockevents_program_min_delta(dev) : rc; } -/** - * clockevents_register_notifier - register a clock events change listener - */ -int clockevents_register_notifier(struct notifier_block *nb) -{ - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&clockevents_lock, flags); - ret = raw_notifier_chain_register(&clockevents_chain, nb); - raw_spin_unlock_irqrestore(&clockevents_lock, flags); - - return ret; -} - -/* - * Notify about a clock event change. Called with clockevents_lock - * held. - */ -static void clockevents_do_notify(unsigned long reason, void *dev) -{ - raw_notifier_call_chain(&clockevents_chain, reason, dev); -} - /* * Called after a notify add to make devices available which were * released from the notifier call. @@ -304,7 +275,7 @@ static void clockevents_notify_released(void) struct clock_event_device, list); list_del(&dev->list); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); } } @@ -325,7 +296,7 @@ void clockevents_register_device(struct clock_event_device *dev) raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags); @@ -421,6 +392,7 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { + module_put(old->owner); clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); @@ -468,7 +440,7 @@ void clockevents_notify(unsigned long reason, void *arg) int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); - clockevents_do_notify(reason, arg); + tick_notify(reason, arg); switch (reason) { case CLOCK_EVT_NOTIFY_CPU_DEAD: diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index bb221517..af8d1d4f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -475,6 +475,7 @@ static void sync_cmos_clock(struct work_struct *work) * called as close as possible to 500 ms before the new second starts. * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... + * We want the clock to be within a couple of ticks from the target. */ if (!ntp_synced()) { /* @@ -485,7 +486,7 @@ static void sync_cmos_clock(struct work_struct *work) } getnstimeofday(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { + if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { struct timespec adjust = now; fail = -ENODEV; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 297b90b5..52d4827c 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "tick-internal.h" @@ -65,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) /* * Check, if the device can be utilized as broadcast device: */ -int tick_check_broadcast_device(struct clock_event_device *dev) +static bool tick_check_broadcast_device(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || + (newdev->features & CLOCK_EVT_FEAT_C3STOP)) + return false; + + if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && + !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return false; + + return !curdev || newdev->rating > curdev->rating; +} + +/* + * Conditionally install/replace broadcast device + */ +void tick_install_broadcast_device(struct clock_event_device *dev) { struct clock_event_device *cur = tick_broadcast_device.evtdev; - if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || - (tick_broadcast_device.evtdev && - tick_broadcast_device.evtdev->rating >= dev->rating) || - (dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + if (!tick_check_broadcast_device(cur, dev)) + return; - clockevents_exchange_device(tick_broadcast_device.evtdev, dev); + if (!try_module_get(dev->owner)) + return; + + clockevents_exchange_device(cur, dev); if (cur) cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; @@ -91,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) */ if (dev->features & CLOCK_EVT_FEAT_ONESHOT) tick_clock_notify(); - return 1; } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7ce5e5a4..086216c4 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -206,14 +207,50 @@ static void tick_setup_device(struct tick_device *td, tick_setup_oneshot(newdev, handler, next_event); } +static bool tick_check_percpu(struct clock_event_device *curdev, + struct clock_event_device *newdev, int cpu) +{ + if (!cpumask_test_cpu(cpu, newdev->cpumask)) + return false; + if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) + return true; + /* Check if irq affinity can be set */ + if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) + return false; + /* Prefer an existing cpu local device */ + if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) + return false; + return true; +} + +static bool tick_check_preferred(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + /* Prefer oneshot capable device */ + if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { + if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return false; + if (tick_oneshot_mode_active()) + return false; + } + + /* + * Use the higher rated one, but prefer a CPU local device with a lower + * rating than a non-CPU local device + */ + return !curdev || + newdev->rating > curdev->rating || + !cpumask_equal(curdev->cpumask, newdev->cpumask); +} + /* * Check, if the new registered device should be used. */ -static int tick_check_new_device(struct clock_event_device *newdev) +void tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; - int cpu, ret = NOTIFY_OK; + int cpu; unsigned long flags; raw_spin_lock_irqsave(&tick_device_lock, flags); @@ -226,40 +263,15 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = td->evtdev; /* cpu local device ? */ - if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { + if (!tick_check_percpu(curdev, newdev, cpu)) + goto out_bc; - /* - * If the cpu affinity of the device interrupt can not - * be set, ignore it. - */ - if (!irq_can_set_affinity(newdev->irq)) - goto out_bc; + /* Preference decision */ + if (!tick_check_preferred(curdev, newdev)) + goto out_bc; - /* - * If we have a cpu local device already, do not replace it - * by a non cpu local device - */ - if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) - goto out_bc; - } - - /* - * If we have an active device, then check the rating and the oneshot - * feature. - */ - if (curdev) { - /* - * Prefer one shot capable devices ! - */ - if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && - !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) - goto out_bc; - /* - * Check the rating - */ - if (curdev->rating >= newdev->rating) - goto out_bc; - } + if (!try_module_get(newdev->owner)) + return; /* * Replace the eventually existing device by the new @@ -276,18 +288,14 @@ static int tick_check_new_device(struct clock_event_device *newdev) tick_oneshot_notify(); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - return NOTIFY_STOP; + return; out_bc: /* * Can the new device be used as a broadcast device ? */ - if (tick_check_broadcast_device(newdev)) - ret = NOTIFY_STOP; - + tick_install_broadcast_device(newdev); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - - return ret; } /* @@ -361,17 +369,10 @@ static void tick_resume(void) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -/* - * Notification about clock event devices - */ -static int tick_notify(struct notifier_block *nb, unsigned long reason, - void *dev) +void tick_notify(unsigned long reason, void *dev) { switch (reason) { - case CLOCK_EVT_NOTIFY_ADD: - return tick_check_new_device(dev); - case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_OFF: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: @@ -405,21 +406,12 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, default: break; } - - return NOTIFY_OK; } -static struct notifier_block tick_notifier = { - .notifier_call = tick_notify, -}; - /** * tick_init - initialize the tick control - * - * Register the notifier with the clockevents framework */ void __init tick_init(void) { - clockevents_register_notifier(&tick_notifier); tick_broadcast_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae..60742fe6 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -18,6 +18,8 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); +extern void tick_notify(unsigned long reason, void *dev); +extern void tick_check_new_device(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); @@ -90,7 +92,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); -extern int tick_check_broadcast_device(struct clock_event_device *dev); +extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); @@ -102,9 +104,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); #else /* !BROADCAST */ -static inline int tick_check_broadcast_device(struct clock_event_device *dev) +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { - return 0; } static inline int tick_is_broadcast_device(struct clock_event_device *dev) diff --git a/mm/mprotect.c b/mm/mprotect.c index 2bbb648e..d4d5399c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -135,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pmd_t *pmd; unsigned long next; unsigned long pages = 0; + unsigned long nr_huge_updates = 0; bool all_same_node; pmd = pmd_offset(pud, addr); @@ -145,7 +146,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, split_huge_page_pmd(vma, addr, pmd); else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) { - pages++; + pages += HPAGE_PMD_NR; + nr_huge_updates++; continue; } /* fall through */ @@ -165,6 +167,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, change_pmd_protnuma(vma->vm_mm, addr, pmd); } while (pmd++, addr = next, addr != end); + if (nr_huge_updates) + count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); + return pages; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 3c612605..d07c5efd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -783,6 +783,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING "numa_pte_updates", + "numa_huge_pte_updates", "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba609..32bd1e87 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); diff --git a/net/compat.c b/net/compat.c index 344cc757..2a546269 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else { kern_msg->msg_name = NULL; kern_msg->msg_namelen = 0; diff --git a/net/core/dev.c b/net/core/dev.c index bfdc9ce4..14f92c64 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4478,7 +4478,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 03da61bd..31a12f28 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -507,7 +507,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && diff --git a/net/core/iovec.c b/net/core/iovec.c index 1117a26a..04e43ef0 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; m->msg_namelen = 0; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c..ebbea537 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2515,6 +2515,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2536,6 +2538,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b98f358d..df5f8aa5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -585,9 +585,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b..31b127e8 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -862,7 +862,7 @@ lowpan_process_data(struct sk_buff *skb) * Traffic class carried in-line * ECN + DSCP (1 byte), Flow Label is elided */ - case 1: /* 10b */ + case 2: /* 10b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; @@ -875,7 +875,7 @@ lowpan_process_data(struct sk_buff *skb) * Flow Label carried in-line * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided */ - case 2: /* 01b */ + case 1: /* 01b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 093b5713..5f3dc1df 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -57,7 +57,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68005480..3e51a52c 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -351,6 +351,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!rt->dst.xfrm || rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { dev->stats.tx_carrier_errors++; + ip_rt_put(rt); goto tx_error_icmp; } tdev = rt->dst.dev; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 81e7b1aa..fcb63dcd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -583,7 +583,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 642d5779..0addeb41 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1725,8 +1725,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 54f441ca..1f49f056 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -813,12 +813,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, xmit_size_goal = min_t(u32, gso_size, sk->sk_gso_max_size - 1 - hlen); - /* TSQ : try to have at least two segments in flight - * (one in NIC TX ring, another in Qdisc) - */ - xmit_size_goal = min_t(u32, xmit_size_goal, - sysctl_tcp_limit_output_bytes >> 1); - xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); /* We try hard to avoid divides here */ @@ -2915,6 +2909,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -2998,13 +2993,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -3021,7 +3010,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb->tail - skb->transport_header) + diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e24e2f89..55fec18e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -176,7 +176,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7ef93515..cae73997 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1877,8 +1877,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * - better RTT estimation and ACK scheduling * - faster recovery * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) */ - limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, + sk->sk_pacing_rate >> 10); if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); @@ -3111,7 +3115,6 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; tcp_xmit_probe_skb(sk, 0); } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f6153978..5b3fb0aa 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -972,7 +972,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -1071,6 +1071,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 46e88433..f0ccdb78 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -453,8 +453,10 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; + rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) count++; + rcu_read_unlock_bh(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8f45bafa..3dddff2f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -141,8 +141,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -150,7 +150,8 @@ static int ip6_finish_output2(struct sk_buff *skb) static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(skb, ip6_finish_output2); else return ip6_finish_output2(skb); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c9b6a6e6..97cd7507 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -172,63 +172,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int __ipv6_conntrack_in(struct net *net, - unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb->nfct_reasm; - const struct nf_conn_help *help; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - /* This packet is fragmented and has reassembled packet. */ - if (reasm) { - /* Reassembled packet isn't parsed yet ? */ - if (!reasm->nfct) { - unsigned int ret; - - ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); - if (ret != NF_ACCEPT) - return ret; - } - - /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. In case of unconfirmed connections NAT - * might reassign a helper, so the entire packet is also - * required. - */ - ct = nf_ct_get(reasm, &ctinfo); - if (ct != NULL && !nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { - nf_conntrack_get_reasm(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, - (struct net_device *)in, - (struct net_device *)out, - okfn, NF_IP6_PRI_CONNTRACK + 1); - return NF_DROP_ERR(-ECANCELED); - } - } - - nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; - return NF_ACCEPT; - } - - return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - static unsigned int ipv6_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(in), PF_INET6, hooknum, skb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -242,7 +192,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(out), PF_INET6, hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index dffdc1a3..253566a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -621,31 +621,16 @@ ret_orig: return skb; } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb) { struct sk_buff *s, *s2; - unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - nf_conntrack_put_reasm(s->nfct_reasm); - nf_conntrack_get_reasm(skb); - s->nfct_reasm = skb; - s2 = s->next; s->next = NULL; - - if (ret != -ECANCELED) - ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, - in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - else - kfree_skb(s); - + consume_skb(s); s = s2; } - nf_conntrack_put_reasm(skb); } static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121f..581dd9ed 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); + nf_ct_frag6_consume_orig(reasm); + + NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, + (struct net_device *) in, (struct net_device *) out, + okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f7b017d9..e0917a62 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -728,8 +728,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + if (rinfo->prefix_len == 0) + rt = rt6_get_dflt_router(gwaddr, dev); + else + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, + gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 23b8eb53..21a3a475 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1131,12 +1131,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1606,12 +1600,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags. */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, @@ -1663,9 +1651,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs && !skb_nfct_reasm(skb)) { + if (iph.fragoffs) { /* Fragment that couldn't be mapped to a conn entry - * and don't have any pointer to a reasm skb * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); @@ -1747,38 +1734,6 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 -/* - * AF_INET6 fragment handling - * Copy info from first fragment, to the rest of them. - */ -static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb_nfct_reasm(skb); - struct net *net; - - /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. - * ipvs_property is set when checking first fragment - * in ip_vs_in() and ip_vs_out(). - */ - if (reasm) - IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); - if (!reasm || !reasm->ipvs_property) - return NF_ACCEPT; - - net = skb_net(skb); - if (!net_ipvs(net)->enable) - return NF_ACCEPT; - - /* Copy stored fw mark, saved in ip_vs_{in,out} */ - skb->mark = reasm->mark; - - return NF_ACCEPT; -} - /* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients @@ -1916,14 +1871,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 - /* After mangle & nat fetch 2:nd fragment and following */ - { - .hook = ip_vs_preroute_frag6, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST + 1, - }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 9ef22bdc..bed5f704 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { - struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; @@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS */ - if (reasm) { - skb = reasm; - dataoff = iph.thoff_reasm + sizeof(struct udphdr); - } else - dataoff = iph.len + sizeof(struct udphdr); + dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - /* todo: Check if this will mess-up the reasm skb !!! /HS */ retc = skb_linearize(skb); if (retc < 0) return retc; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ebde1ef..798e3f25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -432,9 +439,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } @@ -2043,12 +2050,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2061,7 +2080,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2075,19 +2094,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2164,8 +2181,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2203,7 +2219,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2219,7 +2234,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2231,19 +2246,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; + err = -ENETDOWN; + if (unlikely(!(dev->flags & IFF_UP))) + goto out_unlock; + if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_unlock; - if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2377,15 +2390,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2605,6 +2617,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b456..1035fa2d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/socket.c b/net/socket.c index 40dbfe5a..1539c44f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -215,12 +215,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -1995,7 +1996,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } @@ -2265,6 +2266,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, /* We assume all kernel code knows the size of sockaddr_storage */ msg_sys->msg_namelen = 0; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,