diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 21041898157a1f..971cb4bbf32362 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -265,6 +265,7 @@ extern bool __read_mostly sysctl_sched_itmt_enabled; /* Interface to set priority of a cpu */ void sched_set_itmt_core_prio(int prio, int core_cpu); +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu); /* Interface to notify scheduler that system supports ITMT */ int sched_set_itmt_support(void); diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index bc7671f920a7ed..c69afdb06d2ccc 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev, if (ret < 0) return ret; + /* update the ITMT scheduler logic to use the power policy data */ + /* scale the val up by 2 so the range is 224 - 256 */ + sched_set_itmt_power_ratio(256 - val * 2, cpu); + return count; } diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 243a769fdd97b9..3d4bce75bbc434 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -26,6 +26,7 @@ static DEFINE_MUTEX(itmt_update_mutex); DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); +DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio); /* Boolean to track if system has ITMT capabilities */ static bool __read_mostly sched_itmt_capable; @@ -167,7 +168,12 @@ void sched_clear_itmt_support(void) int arch_asym_cpu_priority(int cpu) { - return per_cpu(sched_core_priority, cpu); + int power_ratio = per_cpu(sched_power_ratio, cpu); + + /* a power ratio of 0 (uninitialized) is assumed to be maximum */ + if (power_ratio == 0) + power_ratio = 256 - 2 * 6; + return per_cpu(sched_core_priority, cpu) * power_ratio / 256; } /** @@ -188,3 +194,24 @@ void sched_set_itmt_core_prio(int prio, int cpu) { per_cpu(sched_core_priority, cpu) = prio; } + +/** + * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT + * @power_ratio: The power scaling ratio [1..256] for the core + * @core_cpu: The cpu number associated with the core + * + * Set a scaling to the cpu performance based on long term power + * settings (like EPB). + * + * Note this is for the policy not for the actual dynamic frequency; + * the frequency will increase itself as workloads run on a core. + */ + +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu) +{ + int cpu; + + for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { + per_cpu(sched_power_ratio, cpu) = power_ratio; + } +} diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 87e749106dda66..0388a7173104a1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1597,6 +1597,9 @@ unsigned long calibrate_delay_is_known(void) if (!constant_tsc || !mask) return 0; + if (cpu != 0) + return cpu_data(0).loops_per_jiffy; + sibling = cpumask_any_but(mask, cpu); if (sibling < nr_cpu_ids) return cpu_data(sibling).loops_per_jiffy; diff --git a/block/early-lookup.c b/block/early-lookup.c index 3fb57f7d2b1276..243ad0ca102121 100644 --- a/block/early-lookup.c +++ b/block/early-lookup.c @@ -5,6 +5,7 @@ */ #include #include +#include struct uuidcmp { const char *uuid; @@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt) */ int __init early_lookup_bdev(const char *name, dev_t *devt) { - if (strncmp(name, "PARTUUID=", 9) == 0) - return devt_from_partuuid(name + 9, devt); + if (strncmp(name, "PARTUUID=", 9) == 0) { + int res; + int needtowait = 40<<1; + res = devt_from_partuuid(name + 9, devt); + if (!res) return res; + while (res && needtowait) { + msleep(500); + res = devt_from_partuuid(name + 9, devt); + needtowait--; + } + return res; + } if (strncmp(name, "PARTLABEL=", 10) == 0) return devt_from_partlabel(name + 10, devt); if (strncmp(name, "/dev/", 5) == 0) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 492a10f1bdbfa3..1cdc189c79ca1f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -382,6 +382,13 @@ static void intel_pstate_set_itmt_prio(int cpu) * update them at any time after it has been called. */ sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); + /* + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. + * In this case we can't use CPPC.highest_perf to enable ITMT. + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + */ + if (cppc_perf.highest_perf == 0xff) + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); if (max_highest_perf <= min_highest_perf) { if (cppc_perf.highest_perf > max_highest_perf) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9ba83954c25558..48392bf119158e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -594,7 +594,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -602,7 +602,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, - .target_residency = 100, + .target_residency = 900, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -610,7 +610,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -618,7 +618,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 1500, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -626,7 +626,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 2000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -634,7 +634,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 5000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -642,7 +642,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -662,7 +662,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -670,7 +670,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, - .target_residency = 100, + .target_residency = 1000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -678,7 +678,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -686,7 +686,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 2000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -694,7 +694,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 4000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -702,7 +702,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 7000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -710,7 +710,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -731,7 +731,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -739,7 +739,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 70, - .target_residency = 100, + .target_residency = 1000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -747,7 +747,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, - .target_residency = 200, + .target_residency = 600, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -755,7 +755,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x33", .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, - .target_residency = 800, + .target_residency = 3000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -763,7 +763,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, - .target_residency = 800, + .target_residency = 3200, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -771,7 +771,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, - .target_residency = 5000, + .target_residency = 9000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -779,7 +779,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, - .target_residency = 5000, + .target_residency = 9000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -800,7 +800,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 300, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -829,7 +829,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 4, - .target_residency = 4, + .target_residency = 40, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -837,7 +837,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 170, - .target_residency = 600, + .target_residency = 900, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -987,7 +987,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, - .target_residency = 4, + .target_residency = 40, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -1041,7 +1041,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_INIT_XSTATE, .exit_latency = 290, - .target_residency = 800, + .target_residency = 1200, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -1072,7 +1072,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { CPUIDLE_FLAG_INIT_XSTATE | CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 170, - .target_residency = 650, + .target_residency = 1250, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -1082,7 +1082,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { CPUIDLE_FLAG_INIT_XSTATE | CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 210, - .target_residency = 1000, + .target_residency = 2000, .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index c135254665b6b4..c29c0bd04edf7d 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -622,7 +622,7 @@ static int i8042_enable_kbd_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_KBDINT; i8042_ctr |= I8042_CTR_KBDDIS; - pr_err("Failed to enable KBD port\n"); + pr_info("Failed to enable KBD port\n"); return -EIO; } @@ -641,7 +641,7 @@ static int i8042_enable_aux_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_AUXINT; i8042_ctr |= I8042_CTR_AUXDIS; - pr_err("Failed to enable AUX port\n"); + pr_info("Failed to enable AUX port\n"); return -EIO; } @@ -733,7 +733,7 @@ static int i8042_check_mux(void) i8042_ctr &= ~I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { - pr_err("Failed to disable AUX port, can't use MUX\n"); + pr_info("Failed to disable AUX port, can't use MUX\n"); return -EIO; } @@ -950,7 +950,7 @@ static int i8042_controller_selftest(void) do { if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { - pr_err("i8042 controller selftest timeout\n"); + pr_info("i8042 controller selftest timeout\n"); return -ENODEV; } @@ -972,7 +972,7 @@ static int i8042_controller_selftest(void) pr_info("giving up on controller selftest, continuing anyway...\n"); return 0; #else - pr_err("i8042 controller selftest failed\n"); + pr_info("i8042 controller selftest failed\n"); return -EIO; #endif } diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d6bdad4baadd80..6171a4ab1b512f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -44,7 +44,7 @@ #define DRV_NAME "dummy" -static int numdummies = 1; +static int numdummies = 0; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b14dd064006cca..fedc8444d0fb60 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -60,7 +60,7 @@ struct pci_pme_device { struct pci_dev *dev; }; -#define PME_TIMEOUT 1000 /* How long between PME checks */ +#define PME_TIMEOUT 4000 /* How long between PME checks */ /* * Following exit from Conventional Reset, devices must be ready within 1 sec diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e4653bb99946b1..fc3093dad1924c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1301,6 +1301,8 @@ static int load_elf_binary(struct linux_binprm *bprm) mm = current->mm; mm->end_code = end_code; mm->start_code = start_code; + if (start_code >= ELF_ET_DYN_BASE) + mm->mmap_base = start_code; mm->start_data = start_data; mm->end_data = end_data; mm->start_stack = bprm->p; diff --git a/fs/readdir.c b/fs/readdir.c index 7764b863897888..c501155ed99a31 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -147,7 +147,7 @@ EXPORT_SYMBOL(iterate_dir); */ static int verify_dirent_name(const char *name, int len) { - if (len <= 0 || len >= PATH_MAX) + if (unlikely(len <= 0 || len >= PATH_MAX)) return -EIO; if (memchr(name, '/', len)) return -EIO; diff --git a/include/linux/wait.h b/include/linux/wait.h index f648044466d5f5..61c333708cbacf 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); @@ -1209,6 +1210,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/include/net/sock.h b/include/net/sock.h index 60bcb13f045c31..7fdd69f40151df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1611,10 +1611,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) static inline void sk_mem_uncharge(struct sock *sk, int size) { + int reclaimable, reclaim_threshold; + + reclaim_threshold = 64 * 1024; if (!sk_has_account(sk)) return; sk_forward_alloc_add(sk, size); - sk_mem_reclaim(sk); + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + if (reclaimable > reclaim_threshold) { + reclaimable -= reclaim_threshold; + __sk_mem_reclaim(sk, reclaimable); + } } #if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) @@ -2999,7 +3006,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); * platforms. This makes socket queueing behavior and performance * not depend upon such differences. */ -#define _SK_MEM_PACKETS 256 +#define _SK_MEM_PACKETS 1024 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 3bcc03f3aa4f07..43f794329ee599 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -82,7 +82,7 @@ #define BOND_STATE_ACTIVE 0 /* link is active */ #define BOND_STATE_BACKUP 1 /* link is backup */ -#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ +#define BOND_DEFAULT_MAX_BONDS 0 /* Default maximum number of devices to support */ #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 6af29da8889ebf..a83c82ad7150a2 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -476,7 +476,9 @@ void __init prepare_namespace(void) * For example, it is not atypical to wait 5 seconds here * for the touchpad of a laptop to initialize. */ + async_synchronize_full(); wait_for_device_probe(); + async_synchronize_full(); md_run_setup(); diff --git a/init/init_task.c b/init/init_task.c index a55e2189206fa4..be4ced220eb747 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -143,7 +143,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { .journal_info = NULL, INIT_CPU_TIMERS(init_task) .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), - .timer_slack_ns = 50000, /* 50 usec default slack */ + .timer_slack_ns = 50, /* 50 nsec default slack */ .thread_pid = &init_struct_pid, .thread_node = LIST_HEAD_INIT(init_signals.thread_head), #ifdef CONFIG_AUDIT diff --git a/init/main.c b/init/main.c index 07a3116811c5d7..c8e07ac605e904 100644 --- a/init/main.c +++ b/init/main.c @@ -1220,10 +1220,13 @@ static __init_or_module void trace_initcall_finish_cb(void *data, initcall_t fn, int ret) { ktime_t rettime, *calltime = data; + long long delta; rettime = ktime_get(); - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); + delta = ktime_us_delta(rettime, *calltime); + if (ret || delta) + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", + fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } static __init_or_module void diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 24df4d98f7d200..1d5923996fa5e2 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -746,6 +746,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; + int i = 0; lockdep_assert_preemption_disabled(); @@ -782,7 +783,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - cpu_relax(); + if (i++ > 1000) + cpu_relax(); } return state; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5b752324270b08..df39ba1906003e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -191,7 +191,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) */ static unsigned int get_update_sysctl_factor(void) { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); + unsigned int cpus = num_online_cpus(); unsigned int factor; switch (sysctl_sched_tunable_scaling) { @@ -12847,7 +12847,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) update_next_balance(sd, &next_balance); - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) + if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost) break; if (sd->flags & SD_BALANCE_NEWIDLE) { diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 77ae87f36e8412..caa7f9629160d8 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -1343,10 +1343,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, return ret; } +static DEFINE_PER_CPU(unsigned long, last_yield); + static void do_sched_yield(void) { struct rq_flags rf; struct rq *rq; + int cpu = raw_smp_processor_id(); + + cond_resched(); + + /* rate limit yielding to something sensible */ + + if (!time_after(jiffies, per_cpu(last_yield, cpu))) + return; + + per_cpu(last_yield, cpu) = jiffies; rq = this_rq_lock_irq(&rf); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 20f27e2cf7aec6..9ddd02e7551ccf 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -66,6 +66,17 @@ int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head, } EXPORT_SYMBOL_GPL(add_wait_queue_priority_exclusive); +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -277,6 +288,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent } EXPORT_SYMBOL(prepare_to_wait_exclusive); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + if (list_empty(&wq_entry->entry)) + __add_wait_queue(wq_head, wq_entry); + set_current_state(state); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { wq_entry->flags = flags; diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 799e0e5eac26db..cb0f0ad85ea243 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,6 +18,7 @@ #else #include #include +#include #endif struct raid6_calls raid6_call; @@ -138,8 +139,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) for (best = NULL, algo = raid6_recov_algos; *algo; algo++) if (!best || (*algo)->priority > best->priority) - if (!(*algo)->valid || (*algo)->valid()) + if (!(*algo)->valid || (*algo)->valid()) { best = *algo; + break; + } if (best) { raid6_2data_recov = best->data2; @@ -155,12 +158,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { - unsigned long perf, bestgenperf, j0, j1; + unsigned long perf; + const unsigned long max_perf = 2500; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; + const u64 ns_per_mb = 1000000000 >> 20; + u64 n, ns, t, ns_best = ~0ULL; - for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->priority >= best->priority) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -170,26 +176,20 @@ static inline const struct raid6_calls *raid6_choose_gen( break; } - perf = 0; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1<gen_syndrome(disks, PAGE_SIZE, *dptrs); - perf++; } + ns = local_clock() - t; preempt_enable(); - if (perf > bestgenperf) { - bestgenperf = perf; + if (ns < ns_best) { + ns_best = ns; best = *algo; } - pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, - (perf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns); } } @@ -206,31 +206,23 @@ static inline const struct raid6_calls *raid6_choose_gen( goto out; } - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n", + best->name, (ns_best > 0) ? n / ns_best : 0, ns_best); if (best->xor_syndrome) { - perf = 0; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { + t = local_clock(); + for (perf = 0; perf < max_perf; perf++) { best->xor_syndrome(disks, start, stop, PAGE_SIZE, *dptrs); - perf++; } + ns = local_clock() - t; preempt_enable(); - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (perf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns); } - out: return best; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index cdd1e12aac8c03..b72336d324451f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -633,7 +633,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a18aeca7ab074..8e0d6b92b48d63 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5267,7 +5267,7 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_wshare = min(4UL*1024*1024, limit); + max_wshare = min(16UL*1024*1024, limit); max_rshare = min(32UL*1024*1024, limit); init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;