diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21041898157a1f..971cb4bbf32362 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -265,6 +265,7 @@ extern bool __read_mostly sysctl_sched_itmt_enabled;
 
 /* Interface to set priority of a cpu */
 void sched_set_itmt_core_prio(int prio, int core_cpu);
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu);
 
 /* Interface to notify scheduler that system supports ITMT */
 int sched_set_itmt_support(void);
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index bc7671f920a7ed..c69afdb06d2ccc 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	/* update the ITMT scheduler logic to use the power policy data */
+	/* scale the val up by 2 so the range is 224 - 256 */
+	sched_set_itmt_power_ratio(256 - val * 2, cpu);
+
 	return count;
 }
 
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 243a769fdd97b9..3d4bce75bbc434 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -26,6 +26,7 @@
 
 static DEFINE_MUTEX(itmt_update_mutex);
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio);
 
 /* Boolean to track if system has ITMT capabilities */
 static bool __read_mostly sched_itmt_capable;
@@ -167,7 +168,12 @@ void sched_clear_itmt_support(void)
 
 int arch_asym_cpu_priority(int cpu)
 {
-	return per_cpu(sched_core_priority, cpu);
+	int power_ratio = per_cpu(sched_power_ratio, cpu);
+
+	/* a power ratio of 0 (uninitialized) is assumed to be maximum */
+	if (power_ratio == 0)
+		power_ratio = 256 - 2 * 6;
+	return per_cpu(sched_core_priority, cpu) * power_ratio / 256;
 }
 
 /**
@@ -188,3 +194,24 @@ void sched_set_itmt_core_prio(int prio, int cpu)
 {
 	per_cpu(sched_core_priority, cpu) = prio;
 }
+
+/**
+ * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT
+ * @power_ratio:	The power scaling ratio [1..256] for the core
+ * @core_cpu:		The cpu number associated with the core
+ *
+ * Set a scaling to the cpu performance based on long term power
+ * settings (like EPB).
+ *
+ * Note this is for the policy not for the actual dynamic frequency;
+ * the frequency will increase itself as workloads run on a core.
+ */
+
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu)
+{
+	int cpu;
+
+	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+		per_cpu(sched_power_ratio, cpu) = power_ratio;
+	}
+}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 87e749106dda66..0388a7173104a1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1597,6 +1597,9 @@ unsigned long calibrate_delay_is_known(void)
 	if (!constant_tsc || !mask)
 		return 0;
 
+	if (cpu != 0)
+		return cpu_data(0).loops_per_jiffy;
+
 	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;
diff --git a/block/early-lookup.c b/block/early-lookup.c
index 3fb57f7d2b1276..243ad0ca102121 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -5,6 +5,7 @@
  */
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 struct uuidcmp {
 	const char *uuid;
@@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt)
  */
 int __init early_lookup_bdev(const char *name, dev_t *devt)
 {
-	if (strncmp(name, "PARTUUID=", 9) == 0)
-		return devt_from_partuuid(name + 9, devt);
+	if (strncmp(name, "PARTUUID=", 9) == 0) {
+		int res;
+		int  needtowait = 40<<1;
+		res = devt_from_partuuid(name + 9, devt);
+		if (!res) return res;
+		while (res && needtowait) {
+			msleep(500);
+			res = devt_from_partuuid(name + 9, devt);
+			needtowait--;
+		}
+		return res;
+	}
 	if (strncmp(name, "PARTLABEL=", 10) == 0)
 		return devt_from_partlabel(name + 10, devt);
 	if (strncmp(name, "/dev/", 5) == 0)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 492a10f1bdbfa3..1cdc189c79ca1f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -382,6 +382,13 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	 * update them at any time after it has been called.
 	 */
 	sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
+	/*
+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+	 */
+	if (cppc_perf.highest_perf == 0xff)
+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
 
 	if (max_highest_perf <= min_highest_perf) {
 		if (cppc_perf.highest_perf > max_highest_perf)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 9ba83954c25558..48392bf119158e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -594,7 +594,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -602,7 +602,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
-		.target_residency = 100,
+		.target_residency = 900,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -610,7 +610,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -618,7 +618,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 1500,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -626,7 +626,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -634,7 +634,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 5000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -642,7 +642,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -662,7 +662,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -670,7 +670,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -678,7 +678,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -686,7 +686,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -694,7 +694,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 4000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -702,7 +702,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 7000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -710,7 +710,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -731,7 +731,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -739,7 +739,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -747,7 +747,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 85,
-		.target_residency = 200,
+		.target_residency = 600,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -755,7 +755,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 124,
-		.target_residency = 800,
+		.target_residency = 3000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -763,7 +763,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 200,
-		.target_residency = 800,
+		.target_residency = 3200,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -771,7 +771,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 480,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -779,7 +779,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 890,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -800,7 +800,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 300,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -829,7 +829,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 4,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -837,7 +837,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 170,
-		.target_residency = 600,
+		.target_residency = 900,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -987,7 +987,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 2,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1041,7 +1041,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
 					   CPUIDLE_FLAG_INIT_XSTATE,
 		.exit_latency = 290,
-		.target_residency = 800,
+		.target_residency = 1200,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1072,7 +1072,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
 					   CPUIDLE_FLAG_INIT_XSTATE |
 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
 		.exit_latency = 170,
-		.target_residency = 650,
+		.target_residency = 1250,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1082,7 +1082,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
 					   CPUIDLE_FLAG_INIT_XSTATE |
 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
 		.exit_latency = 210,
-		.target_residency = 1000,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c135254665b6b4..c29c0bd04edf7d 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -622,7 +622,7 @@ static int i8042_enable_kbd_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_KBDINT;
 		i8042_ctr |= I8042_CTR_KBDDIS;
-		pr_err("Failed to enable KBD port\n");
+		pr_info("Failed to enable KBD port\n");
 		return -EIO;
 	}
 
@@ -641,7 +641,7 @@ static int i8042_enable_aux_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_AUXINT;
 		i8042_ctr |= I8042_CTR_AUXDIS;
-		pr_err("Failed to enable AUX port\n");
+		pr_info("Failed to enable AUX port\n");
 		return -EIO;
 	}
 
@@ -733,7 +733,7 @@ static int i8042_check_mux(void)
 	i8042_ctr &= ~I8042_CTR_AUXINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
-		pr_err("Failed to disable AUX port, can't use MUX\n");
+		pr_info("Failed to disable AUX port, can't use MUX\n");
 		return -EIO;
 	}
 
@@ -950,7 +950,7 @@ static int i8042_controller_selftest(void)
 	do {
 
 		if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
-			pr_err("i8042 controller selftest timeout\n");
+			pr_info("i8042 controller selftest timeout\n");
 			return -ENODEV;
 		}
 
@@ -972,7 +972,7 @@ static int i8042_controller_selftest(void)
 	pr_info("giving up on controller selftest, continuing anyway...\n");
 	return 0;
 #else
-	pr_err("i8042 controller selftest failed\n");
+	pr_info("i8042 controller selftest failed\n");
 	return -EIO;
 #endif
 }
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index d6bdad4baadd80..6171a4ab1b512f 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -44,7 +44,7 @@
 
 #define DRV_NAME	"dummy"
 
-static int numdummies = 1;
+static int numdummies = 0;
 
 /* fake multicast ability */
 static void set_multicast_list(struct net_device *dev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006cca..fedc8444d0fb60 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -60,7 +60,7 @@ struct pci_pme_device {
 	struct pci_dev *dev;
 };
 
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
 
 /*
  * Following exit from Conventional Reset, devices must be ready within 1 sec
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e4653bb99946b1..fc3093dad1924c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1301,6 +1301,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	mm = current->mm;
 	mm->end_code = end_code;
 	mm->start_code = start_code;
+	if (start_code >= ELF_ET_DYN_BASE)
+		mm->mmap_base = start_code;
 	mm->start_data = start_data;
 	mm->end_data = end_data;
 	mm->start_stack = bprm->p;
diff --git a/fs/readdir.c b/fs/readdir.c
index 7764b863897888..c501155ed99a31 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -147,7 +147,7 @@ EXPORT_SYMBOL(iterate_dir);
  */
 static int verify_dirent_name(const char *name, int len)
 {
-	if (len <= 0 || len >= PATH_MAX)
+	if (unlikely(len <= 0 || len >= PATH_MAX))
 		return -EIO;
 	if (memchr(name, '/', len))
 		return -EIO;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index f648044466d5f5..61c333708cbacf 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
 					     struct wait_queue_entry *wq_entry);
@@ -1209,6 +1210,7 @@ do {										\
  */
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/include/net/sock.h b/include/net/sock.h
index 60bcb13f045c31..7fdd69f40151df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1611,10 +1611,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
+	int reclaimable, reclaim_threshold;
+
+	reclaim_threshold = 64 * 1024;
 	if (!sk_has_account(sk))
 		return;
 	sk_forward_alloc_add(sk, size);
-	sk_mem_reclaim(sk);
+	reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+	if (reclaimable > reclaim_threshold) {
+		reclaimable -= reclaim_threshold;
+		__sk_mem_reclaim(sk, reclaimable);
+	}
 }
 
 #if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
@@ -2999,7 +3006,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
  * platforms.  This makes socket queueing behavior and performance
  * not depend upon such differences.
  */
-#define _SK_MEM_PACKETS		256
+#define _SK_MEM_PACKETS		1024
 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
 #define SK_WMEM_DEFAULT		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_DEFAULT		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index 3bcc03f3aa4f07..43f794329ee599 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -82,7 +82,7 @@
 #define BOND_STATE_ACTIVE       0   /* link is active */
 #define BOND_STATE_BACKUP       1   /* link is backup */
 
-#define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
+#define BOND_DEFAULT_MAX_BONDS  0   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 6af29da8889ebf..a83c82ad7150a2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -476,7 +476,9 @@ void __init prepare_namespace(void)
 	 * For example, it is not atypical to wait 5 seconds here
 	 * for the touchpad of a laptop to initialize.
 	 */
+	async_synchronize_full();
 	wait_for_device_probe();
+	async_synchronize_full();
 
 	md_run_setup();
 
diff --git a/init/init_task.c b/init/init_task.c
index a55e2189206fa4..be4ced220eb747 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -143,7 +143,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-	.timer_slack_ns = 50000, /* 50 usec default slack */
+	.timer_slack_ns = 50, /* 50 nsec default slack */
 	.thread_pid	= &init_struct_pid,
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDIT
diff --git a/init/main.c b/init/main.c
index 07a3116811c5d7..c8e07ac605e904 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1220,10 +1220,13 @@ static __init_or_module void
 trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
 {
 	ktime_t rettime, *calltime = data;
+	long long delta;
 
 	rettime = ktime_get();
-	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
+	delta = ktime_us_delta(rettime, *calltime);
+	if (ret || delta)
+		printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
+			fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
 }
 
 static __init_or_module void
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d200..1d5923996fa5e2 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -746,6 +746,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int i = 0;
 
 	lockdep_assert_preemption_disabled();
 
@@ -782,7 +783,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 			break;
 		}
 
-		cpu_relax();
+		if (i++ > 1000)
+			cpu_relax();
 	}
 
 	return state;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5b752324270b08..df39ba1906003e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -191,7 +191,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  */
 static unsigned int get_update_sysctl_factor(void)
 {
-	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
+	unsigned int cpus = num_online_cpus();
 	unsigned int factor;
 
 	switch (sysctl_sched_tunable_scaling) {
@@ -12847,7 +12847,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 
 		update_next_balance(sd, &next_balance);
 
-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+		if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost)
 			break;
 
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 77ae87f36e8412..caa7f9629160d8 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1343,10 +1343,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 	return ret;
 }
 
+static DEFINE_PER_CPU(unsigned long, last_yield);
+
 static void do_sched_yield(void)
 {
 	struct rq_flags rf;
 	struct rq *rq;
+	int cpu = raw_smp_processor_id();
+
+	cond_resched();
+
+	/* rate limit yielding to something sensible */
+
+	if (!time_after(jiffies, per_cpu(last_yield, cpu)))
+		return;
+
+	per_cpu(last_yield, cpu) = jiffies;
 
 	rq = this_rq_lock_irq(&rf);
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 20f27e2cf7aec6..9ddd02e7551ccf 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -66,6 +66,17 @@ int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
 }
 EXPORT_SYMBOL_GPL(add_wait_queue_priority_exclusive);
 
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
@@ -277,6 +288,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
+	set_current_state(state);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
 void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
 	wq_entry->flags = flags;
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 799e0e5eac26db..cb0f0ad85ea243 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,7 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
 #endif
 
 struct raid6_calls raid6_call;
@@ -138,8 +139,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 
 	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
 		if (!best || (*algo)->priority > best->priority)
-			if (!(*algo)->valid || (*algo)->valid())
+			if (!(*algo)->valid || (*algo)->valid()) {
 				best = *algo;
+				break;
+			}
 
 	if (best) {
 		raid6_2data_recov = best->data2;
@@ -155,12 +158,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -170,26 +176,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
 			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -206,31 +206,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index cdd1e12aac8c03..b72336d324451f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -633,7 +633,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8a18aeca7ab074..8e0d6b92b48d63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5267,7 +5267,7 @@ void __init tcp_init(void)
 	tcp_init_mem();
 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-	max_wshare = min(4UL*1024*1024, limit);
+	max_wshare = min(16UL*1024*1024, limit);
 	max_rshare = min(32UL*1024*1024, limit);
 
 	init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;