From e550f7dbffdf00b136f7a736a74557c2488d9f73 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 23 Jun 2015 01:26:52 -0500
Subject: [PATCH 01/22] i8042: decrease debug message level to info

Author:    Arjan van de Ven <arjan@linux.intel.com>

Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com>
---
 drivers/input/serio/i8042.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c135254665b6b4..c29c0bd04edf7d 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -622,7 +622,7 @@ static int i8042_enable_kbd_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_KBDINT;
 		i8042_ctr |= I8042_CTR_KBDDIS;
-		pr_err("Failed to enable KBD port\n");
+		pr_info("Failed to enable KBD port\n");
 		return -EIO;
 	}
 
@@ -641,7 +641,7 @@ static int i8042_enable_aux_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_AUXINT;
 		i8042_ctr |= I8042_CTR_AUXDIS;
-		pr_err("Failed to enable AUX port\n");
+		pr_info("Failed to enable AUX port\n");
 		return -EIO;
 	}
 
@@ -733,7 +733,7 @@ static int i8042_check_mux(void)
 	i8042_ctr &= ~I8042_CTR_AUXINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
-		pr_err("Failed to disable AUX port, can't use MUX\n");
+		pr_info("Failed to disable AUX port, can't use MUX\n");
 		return -EIO;
 	}
 
@@ -950,7 +950,7 @@ static int i8042_controller_selftest(void)
 	do {
 
 		if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
-			pr_err("i8042 controller selftest timeout\n");
+			pr_info("i8042 controller selftest timeout\n");
 			return -ENODEV;
 		}
 
@@ -972,7 +972,7 @@ static int i8042_controller_selftest(void)
 	pr_info("giving up on controller selftest, continuing anyway...\n");
 	return 0;
 #else
-	pr_err("i8042 controller selftest failed\n");
+	pr_info("i8042 controller selftest failed\n");
 	return -EIO;
 #endif
 }

From d1267f217a0249a9d9ec7c9a41082fbc9abb9aed Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH 02/22] pci pme wakeups

Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 drivers/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006cca..fedc8444d0fb60 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -60,7 +60,7 @@ struct pci_pme_device {
 	struct pci_dev *dev;
 };
 
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
 
 /*
  * Following exit from Conventional Reset, devices must be ready within 1 sec

From 3eeed85975e7a69226a868d38101488e4c6f84a7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH 03/22] intel_idle: tweak cpuidle cstates

Increase target_residency in cpuidle cstate

Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 drivers/idle/intel_idle.c | 56 +++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 9ba83954c25558..48392bf119158e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -594,7 +594,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -602,7 +602,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
-		.target_residency = 100,
+		.target_residency = 900,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -610,7 +610,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -618,7 +618,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 1500,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -626,7 +626,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -634,7 +634,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 5000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -642,7 +642,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -662,7 +662,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -670,7 +670,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -678,7 +678,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -686,7 +686,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -694,7 +694,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 4000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -702,7 +702,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 7000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -710,7 +710,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -731,7 +731,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -739,7 +739,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -747,7 +747,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 85,
-		.target_residency = 200,
+		.target_residency = 600,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -755,7 +755,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 124,
-		.target_residency = 800,
+		.target_residency = 3000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -763,7 +763,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 200,
-		.target_residency = 800,
+		.target_residency = 3200,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -771,7 +771,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 480,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -779,7 +779,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 890,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -800,7 +800,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 300,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -829,7 +829,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 4,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -837,7 +837,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 170,
-		.target_residency = 600,
+		.target_residency = 900,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -987,7 +987,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 2,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1041,7 +1041,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
 					   CPUIDLE_FLAG_INIT_XSTATE,
 		.exit_latency = 290,
-		.target_residency = 800,
+		.target_residency = 1200,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1072,7 +1072,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
 					   CPUIDLE_FLAG_INIT_XSTATE |
 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
 		.exit_latency = 170,
-		.target_residency = 650,
+		.target_residency = 1250,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -1082,7 +1082,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
 					   CPUIDLE_FLAG_INIT_XSTATE |
 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
 		.exit_latency = 210,
-		.target_residency = 1000,
+		.target_residency = 2000,
 		.enter = intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{

From 3962ef9415577771d8874d2c2a6f8944837df0ab Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 17:28:14 -0600
Subject: [PATCH 04/22] smpboot: reuse timer calibration

NO point recalibrating for known-constant tsc ...
saves 200ms+ of boot time.

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 arch/x86/kernel/tsc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 87e749106dda66..0388a7173104a1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1597,6 +1597,9 @@ unsigned long calibrate_delay_is_known(void)
 	if (!constant_tsc || !mask)
 		return 0;
 
+	if (cpu != 0)
+		return cpu_data(0).loops_per_jiffy;
+
 	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;

From b009615331573b5ca20c4335aa6f6b52fb2d7a66 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH 05/22] ipv4/tcp: allow the memory tuning for tcp to go a
 little bigger than default

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8a18aeca7ab074..8e0d6b92b48d63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5267,7 +5267,7 @@ void __init tcp_init(void)
 	tcp_init_mem();
 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-	max_wshare = min(4UL*1024*1024, limit);
+	max_wshare = min(16UL*1024*1024, limit);
 	max_rshare = min(32UL*1024*1024, limit);
 
 	init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;

From 038a03f91c8190005c3323f098bb361beae3dd68 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 17 May 2017 01:52:11 +0000
Subject: [PATCH 06/22] init: wait for partition and retry scan

As Clear Linux boots fast the device is not ready when
the mounting code is reached, so a retry device scan will
be performed every 0.5 sec for at least 40 sec
and synchronize the async task.

Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
 block/early-lookup.c | 15 +++++++++++++--
 init/do_mounts.c     |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/block/early-lookup.c b/block/early-lookup.c
index 3fb57f7d2b1276..243ad0ca102121 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -5,6 +5,7 @@
  */
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 struct uuidcmp {
 	const char *uuid;
@@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt)
  */
 int __init early_lookup_bdev(const char *name, dev_t *devt)
 {
-	if (strncmp(name, "PARTUUID=", 9) == 0)
-		return devt_from_partuuid(name + 9, devt);
+	if (strncmp(name, "PARTUUID=", 9) == 0) {
+		int res;
+		int  needtowait = 40<<1;
+		res = devt_from_partuuid(name + 9, devt);
+		if (!res) return res;
+		while (res && needtowait) {
+			msleep(500);
+			res = devt_from_partuuid(name + 9, devt);
+			needtowait--;
+		}
+		return res;
+	}
 	if (strncmp(name, "PARTLABEL=", 10) == 0)
 		return devt_from_partlabel(name + 10, devt);
 	if (strncmp(name, "/dev/", 5) == 0)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 6af29da8889ebf..a83c82ad7150a2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -476,7 +476,9 @@ void __init prepare_namespace(void)
 	 * For example, it is not atypical to wait 5 seconds here
 	 * for the touchpad of a laptop to initialize.
 	 */
+	async_synchronize_full();
 	wait_for_device_probe();
+	async_synchronize_full();
 
 	md_run_setup();
 

From 751138d3d861017f17a0be51fd29bc50b48d72c4 Mon Sep 17 00:00:00 2001
From: Auke Kok <auke-jan.h.kok@intel.com>
Date: Thu, 2 Aug 2018 12:03:22 -0700
Subject: [PATCH 07/22] migrate some systemd defaults to the kernel defaults.

These settings are needed to prevent networking issues when
the networking modules come up by default without explicit
settings, which breaks some cases.

We don't want the modprobe settings to be read at boot time
if we're not going to do anything else ever.

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 drivers/net/dummy.c             | 2 +-
 include/uapi/linux/if_bonding.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index d6bdad4baadd80..6171a4ab1b512f 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -44,7 +44,7 @@
 
 #define DRV_NAME	"dummy"
 
-static int numdummies = 1;
+static int numdummies = 0;
 
 /* fake multicast ability */
 static void set_multicast_list(struct net_device *dev)
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index 3bcc03f3aa4f07..43f794329ee599 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -82,7 +82,7 @@
 #define BOND_STATE_ACTIVE       0   /* link is active */
 #define BOND_STATE_BACKUP       1   /* link is backup */
 
-#define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
+#define BOND_DEFAULT_MAX_BONDS  0   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
 

From 758d0c9d3a347c26bf4f86bffaa7eacb5056d21e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 13 Dec 2018 01:00:49 +0000
Subject: [PATCH 08/22] do accept() in LIFO order for cache efficiency

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 include/linux/wait.h            |  2 ++
 kernel/sched/wait.c             | 24 ++++++++++++++++++++++++
 net/ipv4/inet_connection_sock.c |  2 +-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index f648044466d5f5..61c333708cbacf 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
 					     struct wait_queue_entry *wq_entry);
@@ -1209,6 +1210,7 @@ do {										\
  */
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 20f27e2cf7aec6..9ddd02e7551ccf 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -66,6 +66,17 @@ int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
 }
 EXPORT_SYMBOL_GPL(add_wait_queue_priority_exclusive);
 
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
@@ -277,6 +288,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
+	set_current_state(state);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
 void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
 	wq_entry->flags = flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index cdd1e12aac8c03..b72336d324451f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -633,7 +633,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))

From 2313aba395e7ebbff53db95625bf91ed9b3c6c35 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH 09/22] locking: rwsem: spin faster

tweak rwsem owner spinning a bit

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 kernel/locking/rwsem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d200..1d5923996fa5e2 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -746,6 +746,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int i = 0;
 
 	lockdep_assert_preemption_disabled();
 
@@ -782,7 +783,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 			break;
 		}
 
-		cpu_relax();
+		if (i++ > 1000)
+			cpu_relax();
 	}
 
 	return state;

From 7c720d56b0841814b894d5ed01bb1a2aa2b9925e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan.van.de.ven@intel.com>
Date: Mon, 27 Sep 2021 17:43:01 +0000
Subject: [PATCH 10/22] lib/raid6: add patch

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 lib/raid6/algos.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 799e0e5eac26db..7205b666ea3fa6 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -138,8 +138,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 
 	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
 		if (!best || (*algo)->priority > best->priority)
-			if (!(*algo)->valid || (*algo)->valid())
+			if (!(*algo)->valid || (*algo)->valid()) {
 				best = *algo;
+				break;
+			}
 
 	if (best) {
 		raid6_2data_recov = best->data2;

From 53f43693bf4cba442965408c6ea6fa95e3651595 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan.van.de.ven@intel.com>
Date: Tue, 16 Nov 2021 17:39:25 +0000
Subject: [PATCH 11/22] itmt_epb: use epb to scale itmt

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 arch/x86/include/asm/topology.h |  1 +
 arch/x86/kernel/cpu/intel_epb.c |  4 ++++
 arch/x86/kernel/itmt.c          | 29 ++++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21041898157a1f..971cb4bbf32362 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -265,6 +265,7 @@ extern bool __read_mostly sysctl_sched_itmt_enabled;
 
 /* Interface to set priority of a cpu */
 void sched_set_itmt_core_prio(int prio, int core_cpu);
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu);
 
 /* Interface to notify scheduler that system supports ITMT */
 int sched_set_itmt_support(void);
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index bc7671f920a7ed..c69afdb06d2ccc 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	/* update the ITMT scheduler logic to use the power policy data */
+	/* scale the val up by 2 so the range is 224 - 256 */
+	sched_set_itmt_power_ratio(256 - val * 2, cpu);
+
 	return count;
 }
 
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 243a769fdd97b9..3d4bce75bbc434 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -26,6 +26,7 @@
 
 static DEFINE_MUTEX(itmt_update_mutex);
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio);
 
 /* Boolean to track if system has ITMT capabilities */
 static bool __read_mostly sched_itmt_capable;
@@ -167,7 +168,12 @@ void sched_clear_itmt_support(void)
 
 int arch_asym_cpu_priority(int cpu)
 {
-	return per_cpu(sched_core_priority, cpu);
+	int power_ratio = per_cpu(sched_power_ratio, cpu);
+
+	/* a power ratio of 0 (uninitialized) is assumed to be maximum */
+	if (power_ratio == 0)
+		power_ratio = 256 - 2 * 6;
+	return per_cpu(sched_core_priority, cpu) * power_ratio / 256;
 }
 
 /**
@@ -188,3 +194,24 @@ void sched_set_itmt_core_prio(int prio, int cpu)
 {
 	per_cpu(sched_core_priority, cpu) = prio;
 }
+
+/**
+ * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT
+ * @power_ratio:	The power scaling ratio [1..256] for the core
+ * @core_cpu:		The cpu number associated with the core
+ *
+ * Set a scaling to the cpu performance based on long term power
+ * settings (like EPB).
+ *
+ * Note this is for the policy not for the actual dynamic frequency;
+ * the frequency will increase itself as workloads run on a core.
+ */
+
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu)
+{
+	int cpu;
+
+	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+		per_cpu(sched_power_ratio, cpu) = power_ratio;
+	}
+}

From f36cb5cabfbe336290fde6073fd52ccd6db4cf4e Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 18 Nov 2021 16:09:47 +0000
Subject: [PATCH 12/22] itmt2 ADL fixes

On systems with overclocking enabled, CPPC Highest Performance can be
hard coded to 0xff. In this case even if we have cores with different
highest performance, ITMT can't be enabled as the current implementation
depends on CPPC Highest Performance.

On such systems we can use MSR_HWP_CAPABILITIES maximum performance field
when CPPC.Highest Performance is 0xff.

Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as
in some older systems CPPC Highest Performance is the only way to identify
different performing cores.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 492a10f1bdbfa3..1cdc189c79ca1f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -382,6 +382,13 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	 * update them at any time after it has been called.
 	 */
 	sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
+	/*
+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+	 */
+	if (cppc_perf.highest_perf == 0xff)
+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
 
 	if (max_highest_perf <= min_highest_perf) {
 		if (cppc_perf.highest_perf > max_highest_perf)

From b4e832bed39994ca390497d6d42ec821424d2963 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@intel.com>
Date: Fri, 28 Apr 2023 17:01:35 +0100
Subject: [PATCH 13/22] md/raid6 algorithms: scale test duration for speedier
 boots

Instead of using jiffies and waiting for jiffies to wrap before
measuring use the higher precision local_time for benchmarking.
Measure 2500 loops, which works out to be accurate enough for
benchmarking the raid algo data rates. Also add division by zero
checking in case timing measurements are bogus.

Speeds up raid benchmarking from 48,000 usecs to 4000 usecs, saving
0.044 seconds on boot.

Signed-off-by: Colin Ian King <colin.king@intel.com>
---
 lib/raid6/algos.c | 52 +++++++++++++++++++----------------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7205b666ea3fa6..cb0f0ad85ea243 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,7 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
 #endif
 
 struct raid6_calls raid6_call;
@@ -157,12 +158,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -172,26 +176,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
 			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -208,31 +206,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }

From e22161ce29f4ca7e409288160b3697b6cb8c7ec0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@intel.com>
Date: Fri, 20 Jan 2023 11:16:42 +0000
Subject: [PATCH 14/22] initcall: only print non-zero initcall debug to speed
 up boot

Printing initcall timings that successfully return after 0 usecs
provides not much useful information and takes a small amount of time
to do so. Disable the initcall timings for these specific cases. On
an Alderlake i9-12900 this reduces kernel boot time by 0.67% (timed
up to the invocation of systemd starting) based on 10 boot measurements.

Signed-off-by: Colin Ian King <colin.king@intel.com>
---
 init/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/init/main.c b/init/main.c
index 07a3116811c5d7..c8e07ac605e904 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1220,10 +1220,13 @@ static __init_or_module void
 trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
 {
 	ktime_t rettime, *calltime = data;
+	long long delta;
 
 	rettime = ktime_get();
-	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
+	delta = ktime_us_delta(rettime, *calltime);
+	if (ret || delta)
+		printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
+			fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
 }
 
 static __init_or_module void

From db172545576f19116ff8b2fdf121f5ac3537f975 Mon Sep 17 00:00:00 2001
From: Kai Krakow <kai@kaishome.de>
Date: Sun, 14 Dec 2025 15:26:02 +0100
Subject: [PATCH 15/22] Place libraries right below the binary for PIE binaries

Author: Intel ClearLinux <unknown>

Place libraries right below the binary for PIE binaries, this helps code locality
(and thus performance).

Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 fs/binfmt_elf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e4653bb99946b1..fc3093dad1924c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1301,6 +1301,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	mm = current->mm;
 	mm->end_code = end_code;
 	mm->start_code = start_code;
+	if (start_code >= ELF_ET_DYN_BASE)
+		mm->mmap_base = start_code;
 	mm->start_data = start_data;
 	mm->end_data = end_data;
 	mm->start_stack = bprm->p;

From 52781eae857078b32c186e1a75eaebb842e13e9e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@intel.com>
Date: Thu, 17 Oct 2024 16:29:50 +0100
Subject: [PATCH 16/22] handle sched_yield gracefully when being hammered

Some misguided apps hammer sched_yield() in a tight loop (they should be using futexes instead)
which causes massive lock contention even if there is little work to do or to yield to.
rare limit yielding since the base scheduler does a pretty good job already about just
running the right things

Signed-off-by: Colin Ian King <colin.i.king@intel.com>
---
 kernel/sched/syscalls.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 77ae87f36e8412..caa7f9629160d8 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1343,10 +1343,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 	return ret;
 }
 
+static DEFINE_PER_CPU(unsigned long, last_yield);
+
 static void do_sched_yield(void)
 {
 	struct rq_flags rf;
 	struct rq *rq;
+	int cpu = raw_smp_processor_id();
+
+	cond_resched();
+
+	/* rate limit yielding to something sensible */
+
+	if (!time_after(jiffies, per_cpu(last_yield, cpu)))
+		return;
+
+	per_cpu(last_yield, cpu) = jiffies;
 
 	rq = this_rq_lock_irq(&rf);
 

From a8f18d36aaeb34ce0ed23d8101beb555bcf68999 Mon Sep 17 00:00:00 2001
From: Kai Krakow <kai@kaishome.de>
Date: Sun, 14 Dec 2025 15:26:04 +0100
Subject: [PATCH 17/22] scale net alloc

Author: Intel ClearLinux <unknown>
Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 include/net/sock.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 60bcb13f045c31..9f290ba104635c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1611,10 +1611,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
+	int reclaimable, reclaim_threshold;
+
+	reclaim_threshold = 64 * 1024;
 	if (!sk_has_account(sk))
 		return;
 	sk_forward_alloc_add(sk, size);
-	sk_mem_reclaim(sk);
+	reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+	if (reclaimable > reclaim_threshold) {
+		reclaimable -= reclaim_threshold;
+		__sk_mem_reclaim(sk, reclaimable);
+	}
 }
 
 #if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)

From af6435dddfa7d3ff725681b95ec4e484e8a9aedd Mon Sep 17 00:00:00 2001
From: Kai Krakow <kai@kaishome.de>
Date: Sun, 14 Dec 2025 15:26:04 +0100
Subject: [PATCH 18/22] better idle balance

Author: Intel ClearLinux <unknown>
Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5b752324270b08..09d909c8b2ecc1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12847,7 +12847,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 
 		update_next_balance(sd, &next_balance);
 
-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+		if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost)
 			break;
 
 		if (sd->flags & SD_BALANCE_NEWIDLE) {

From a01ae52d2ce4aa2260823bc1bce075ba577499b6 Mon Sep 17 00:00:00 2001
From: Kai Krakow <kai@kaishome.de>
Date: Sun, 14 Dec 2025 15:27:08 +0100
Subject: [PATCH 19/22] timer slack

Author: Intel ClearLinux <unknown>
Signed-off-by: Kai Krakow <kai@kaishome.de>
---
 init/init_task.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/init_task.c b/init/init_task.c
index a55e2189206fa4..be4ced220eb747 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -143,7 +143,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-	.timer_slack_ns = 50000, /* 50 usec default slack */
+	.timer_slack_ns = 50, /* 50 nsec default slack */
 	.thread_pid	= &init_struct_pid,
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDIT

From db1d2e3a1d4fc5f71e78df29f6b3d791575b8f37 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@intel.com>
Date: Tue, 14 Nov 2023 13:29:45 +0000
Subject: [PATCH 20/22] sched/fair: remove upper limit on cpu number

Signed-off-by: Colin Ian King <colin.i.king@intel.com>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 09d909c8b2ecc1..df39ba1906003e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -191,7 +191,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  */
 static unsigned int get_update_sysctl_factor(void)
 {
-	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
+	unsigned int cpus = num_online_cpus();
 	unsigned int factor;
 
 	switch (sysctl_sched_tunable_scaling) {

From 9d60df44241969164ea31a8ac110bed2f33428fd Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@intel.com>
Date: Wed, 24 Apr 2024 16:45:47 +0100
Subject: [PATCH 21/22] net: sock: increase default number of _SK_MEM_PACKETS
 to 1024

scale these by a factor of 4 to improve socket performance

Signed-off-by: Colin Ian King <colin.i.king@intel.com>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9f290ba104635c..7fdd69f40151df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -3006,7 +3006,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
  * platforms.  This makes socket queueing behavior and performance
  * not depend upon such differences.
  */
-#define _SK_MEM_PACKETS		256
+#define _SK_MEM_PACKETS		1024
 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
 #define SK_WMEM_DEFAULT		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_DEFAULT		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)

From 9ee58f98a9eea965b5788152be4e098a964c115f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 27 May 2025 15:12:58 +0100
Subject: [PATCH 22/22] readdir: add unlikely hint on len check

Currently the out of bounds check for the length is very unlikely
to be false for valid name strings. Analysis with gcov coverage show
this to be so.

Add an unlikely hint on the error return path check. This improves
performance when testing with single instance stress-ng dentry and
dirent stressors. Tested with a 6.15 kernel, built with gcc 14.2.0
on a Debian Ultra 9 285K system with turbo disabled to reduce test
jitter on tmpfs. Each test case was run 25 times and the % standard
deviation was less than 0.4%. Geometric mean of 25 results show the
following stress-ng bogo-ops performance improvments:

getdent: 1.1%
dentry:  0.9%

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
---
 fs/readdir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/readdir.c b/fs/readdir.c
index 7764b863897888..c501155ed99a31 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -147,7 +147,7 @@ EXPORT_SYMBOL(iterate_dir);
  */
 static int verify_dirent_name(const char *name, int len)
 {
-	if (len <= 0 || len >= PATH_MAX)
+	if (unlikely(len <= 0 || len >= PATH_MAX))
 		return -EIO;
 	if (memchr(name, '/', len))
 		return -EIO;