From af300b7aa1a07ef2d61b43b527cb187912ce2716 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 10:50:45 -0600 Subject: [PATCH 01/48] winesync: Introduce the winesync driver and character device. Signed-off-by: Kai Krakow --- drivers/misc/Kconfig | 11 +++++++ drivers/misc/Makefile | 1 + drivers/misc/winesync.c | 64 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 drivers/misc/winesync.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0cef98319f0e5f..c8a776455a995d 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -497,6 +497,17 @@ config VCPU_STALL_DETECTOR If you do not intend to run this kernel as a guest, say N. +config WINESYNC + tristate "Synchronization primitives for Wine" + help + This module provides kernel support for synchronization primitives + used by Wine. It is not a hardware driver. + + To compile this driver as a module, choose M here: the + module will be called winesync. + + If unsure, say N. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index ac9b3e757ba1df..5b82622a001733 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_HABANA_AI) += habanalabs/ obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o +obj-$(CONFIG_WINESYNC) += winesync.o obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o obj-$(CONFIG_OPEN_DICE) += open-dice.o obj-$(CONFIG_GP_PCI1XXXX) += mchp_pci1xxxx/ diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c new file mode 100644 index 00000000000000..111f33c5676e6c --- /dev/null +++ b/drivers/misc/winesync.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * winesync.c - Kernel driver for Wine synchronization primitives + * + * Copyright (C) 2021 Zebediah Figura + */ + +#include +#include +#include + +#define WINESYNC_NAME "winesync" + +static int winesync_char_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + +static int winesync_char_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static long winesync_char_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + switch (cmd) { + default: + return -ENOSYS; + } +} + +static const struct file_operations winesync_fops = { + .owner = THIS_MODULE, + .open = winesync_char_open, + .release = winesync_char_release, + .unlocked_ioctl = winesync_char_ioctl, + .compat_ioctl = winesync_char_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice winesync_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = WINESYNC_NAME, + .fops = &winesync_fops, +}; + +static int __init winesync_init(void) +{ + return misc_register(&winesync_misc); +} + +static void __exit winesync_exit(void) +{ + misc_deregister(&winesync_misc); +} + +module_init(winesync_init); +module_exit(winesync_exit); + +MODULE_AUTHOR("Zebediah Figura"); +MODULE_DESCRIPTION("Kernel driver for Wine synchronization primitives"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("devname:" WINESYNC_NAME); From ab85c28968b289a03fc8d44f8652fcab8bfe2d24 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 10:57:06 -0600 Subject: [PATCH 02/48] winesync: Reserve a minor device number and ioctl range. Signed-off-by: Kai Krakow --- Documentation/admin-guide/devices.txt | 3 ++- Documentation/userspace-api/ioctl/ioctl-number.rst | 2 ++ drivers/misc/winesync.c | 3 ++- include/linux/miscdevice.h | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt index 9764d6edb18928..a4696d3b4a5a0d 100644 --- a/Documentation/admin-guide/devices.txt +++ b/Documentation/admin-guide/devices.txt @@ -376,8 +376,9 @@ 240 = /dev/userio Serio driver testing device 241 = /dev/vhost-vsock Host kernel driver for virtio vsock 242 = /dev/rfkill Turning off radio transmissions (rfkill) + 243 = /dev/winesync Wine synchronization primitive device - 243-254 Reserved for local use + 244-254 Reserved for local use 255 Reserved for MISC_DYNAMIC_MINOR 11 char Raw keyboard device (Linux/SPARC only) diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 5f81e2a24a5c04..cc9d57b7fd8636 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -374,6 +374,8 @@ Code Seq# Include File Comments 0xF6 all LTTng Linux Trace Toolkit Next Generation +0xF7 00-0F uapi/linux/winesync.h Wine synchronization primitives + 0xF8 all arch/x86/include/uapi/asm/amd_hsmp.h AMD HSMP EPYC system management interface driver 0xFD all linux/dm-ioctl.h diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 111f33c5676e6c..85cb6ccaa077db 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -40,7 +40,7 @@ static const struct file_operations winesync_fops = { }; static struct miscdevice winesync_misc = { - .minor = MISC_DYNAMIC_MINOR, + .minor = WINESYNC_MINOR, .name = WINESYNC_NAME, .fops = &winesync_fops, }; @@ -62,3 +62,4 @@ MODULE_AUTHOR("Zebediah Figura"); MODULE_DESCRIPTION("Kernel driver for Wine synchronization primitives"); MODULE_LICENSE("GPL"); MODULE_ALIAS("devname:" WINESYNC_NAME); +MODULE_ALIAS_MISCDEV(WINESYNC_MINOR); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index c0fea6ca507681..36fc5d5315a414 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -71,6 +71,7 @@ #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 #define RFKILL_MINOR 242 +#define WINESYNC_MINOR 243 #define MISC_DYNAMIC_MINOR 255 struct device; From da8c53f8b2384ab5e9b5bc92c7c526aadbd6a8e5 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:15:39 -0600 Subject: [PATCH 03/48] winesync: Introduce WINESYNC_IOC_CREATE_SEM and WINESYNC_IOC_DELETE. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 117 ++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 25 ++++++++ 2 files changed, 142 insertions(+) create mode 100644 include/uapi/linux/winesync.h diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 85cb6ccaa077db..36e31bbe039037 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -8,23 +8,140 @@ #include #include #include +#include +#include +#include #define WINESYNC_NAME "winesync" +enum winesync_type { + WINESYNC_TYPE_SEM, +}; + +struct winesync_obj { + struct rcu_head rhead; + struct kref refcount; + + enum winesync_type type; + + union { + struct { + __u32 count; + __u32 max; + } sem; + } u; +}; + +struct winesync_device { + struct xarray objects; +}; + +static void destroy_obj(struct kref *ref) +{ + struct winesync_obj *obj = container_of(ref, struct winesync_obj, refcount); + + kfree_rcu(obj, rhead); +} + +static void put_obj(struct winesync_obj *obj) +{ + kref_put(&obj->refcount, destroy_obj); +} + static int winesync_char_open(struct inode *inode, struct file *file) { + struct winesync_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + xa_init_flags(&dev->objects, XA_FLAGS_ALLOC); + + file->private_data = dev; return nonseekable_open(inode, file); } static int winesync_char_release(struct inode *inode, struct file *file) { + struct winesync_device *dev = file->private_data; + struct winesync_obj *obj; + unsigned long id; + + xa_for_each(&dev->objects, id, obj) + put_obj(obj); + + xa_destroy(&dev->objects); + + kfree(dev); + + return 0; +} + +static void init_obj(struct winesync_obj *obj) +{ + kref_init(&obj->refcount); +} + +static int winesync_create_sem(struct winesync_device *dev, void __user *argp) +{ + struct winesync_sem_args __user *user_args = argp; + struct winesync_sem_args args; + struct winesync_obj *sem; + __u32 id; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + if (args.count > args.max) + return -EINVAL; + + sem = kzalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return -ENOMEM; + + init_obj(sem); + sem->type = WINESYNC_TYPE_SEM; + sem->u.sem.count = args.count; + sem->u.sem.max = args.max; + + ret = xa_alloc(&dev->objects, &id, sem, xa_limit_32b, GFP_KERNEL); + if (ret < 0) { + kfree(sem); + return ret; + } + + return put_user(id, &user_args->sem); +} + +static int winesync_delete(struct winesync_device *dev, void __user *argp) +{ + struct winesync_obj *obj; + __u32 id; + + if (get_user(id, (__u32 __user *)argp)) + return -EFAULT; + + obj = xa_erase(&dev->objects, id); + if (!obj) + return -EINVAL; + + put_obj(obj); return 0; } static long winesync_char_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { + struct winesync_device *dev = file->private_data; + void __user *argp = (void __user *)parm; + switch (cmd) { + case WINESYNC_IOC_CREATE_SEM: + return winesync_create_sem(dev, argp); + case WINESYNC_IOC_DELETE: + return winesync_delete(dev, argp); default: return -ENOSYS; } diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h new file mode 100644 index 00000000000000..aabb491f39d2dc --- /dev/null +++ b/include/uapi/linux/winesync.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Kernel support for Wine synchronization primitives + * + * Copyright (C) 2021 Zebediah Figura + */ + +#ifndef __LINUX_WINESYNC_H +#define __LINUX_WINESYNC_H + +#include + +struct winesync_sem_args { + __u32 sem; + __u32 count; + __u32 max; +}; + +#define WINESYNC_IOC_BASE 0xf7 + +#define WINESYNC_IOC_CREATE_SEM _IOWR(WINESYNC_IOC_BASE, 0, \ + struct winesync_sem_args) +#define WINESYNC_IOC_DELETE _IOW (WINESYNC_IOC_BASE, 1, __u32) + +#endif From 6089e295c6c90e70218986f50e99176b4ef007c5 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:22:42 -0600 Subject: [PATCH 04/48] winesync: Introduce WINESYNC_IOC_PUT_SEM. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 76 +++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 + 2 files changed, 78 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 36e31bbe039037..84b5a5c9e0ce72 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -21,9 +21,11 @@ enum winesync_type { struct winesync_obj { struct rcu_head rhead; struct kref refcount; + spinlock_t lock; enum winesync_type type; + /* The following fields are protected by the object lock. */ union { struct { __u32 count; @@ -36,6 +38,19 @@ struct winesync_device { struct xarray objects; }; +static struct winesync_obj *get_obj(struct winesync_device *dev, __u32 id) +{ + struct winesync_obj *obj; + + rcu_read_lock(); + obj = xa_load(&dev->objects, id); + if (obj && !kref_get_unless_zero(&obj->refcount)) + obj = NULL; + rcu_read_unlock(); + + return obj; +} + static void destroy_obj(struct kref *ref) { struct winesync_obj *obj = container_of(ref, struct winesync_obj, refcount); @@ -48,6 +63,18 @@ static void put_obj(struct winesync_obj *obj) kref_put(&obj->refcount, destroy_obj); } +static struct winesync_obj *get_obj_typed(struct winesync_device *dev, __u32 id, + enum winesync_type type) +{ + struct winesync_obj *obj = get_obj(dev, id); + + if (obj && obj->type != type) { + put_obj(obj); + return NULL; + } + return obj; +} + static int winesync_char_open(struct inode *inode, struct file *file) { struct winesync_device *dev; @@ -81,6 +108,7 @@ static int winesync_char_release(struct inode *inode, struct file *file) static void init_obj(struct winesync_obj *obj) { kref_init(&obj->refcount); + spin_lock_init(&obj->lock); } static int winesync_create_sem(struct winesync_device *dev, void __user *argp) @@ -131,6 +159,52 @@ static int winesync_delete(struct winesync_device *dev, void __user *argp) return 0; } +/* + * Actually change the semaphore state, returning -EOVERFLOW if it is made + * invalid. + */ +static int put_sem_state(struct winesync_obj *sem, __u32 count) +{ + lockdep_assert_held(&sem->lock); + + if (sem->u.sem.count + count < sem->u.sem.count || + sem->u.sem.count + count > sem->u.sem.max) + return -EOVERFLOW; + + sem->u.sem.count += count; + return 0; +} + +static int winesync_put_sem(struct winesync_device *dev, void __user *argp) +{ + struct winesync_sem_args __user *user_args = argp; + struct winesync_sem_args args; + struct winesync_obj *sem; + __u32 prev_count; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + sem = get_obj_typed(dev, args.sem, WINESYNC_TYPE_SEM); + if (!sem) + return -EINVAL; + + spin_lock(&sem->lock); + + prev_count = sem->u.sem.count; + ret = put_sem_state(sem, args.count); + + spin_unlock(&sem->lock); + + put_obj(sem); + + if (!ret && put_user(prev_count, &user_args->count)) + ret = -EFAULT; + + return ret; +} + static long winesync_char_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { @@ -142,6 +216,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_create_sem(dev, argp); case WINESYNC_IOC_DELETE: return winesync_delete(dev, argp); + case WINESYNC_IOC_PUT_SEM: + return winesync_put_sem(dev, argp); default: return -ENOSYS; } diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index aabb491f39d2dc..7681a168eb92ec 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -21,5 +21,7 @@ struct winesync_sem_args { #define WINESYNC_IOC_CREATE_SEM _IOWR(WINESYNC_IOC_BASE, 0, \ struct winesync_sem_args) #define WINESYNC_IOC_DELETE _IOW (WINESYNC_IOC_BASE, 1, __u32) +#define WINESYNC_IOC_PUT_SEM _IOWR(WINESYNC_IOC_BASE, 2, \ + struct winesync_sem_args) #endif From 90274ee2585777906c2eb6655e64ddde41a7a31a Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:31:44 -0600 Subject: [PATCH 05/48] winesync: Introduce WINESYNC_IOC_WAIT_ANY. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 226 ++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 11 ++ 2 files changed, 237 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 84b5a5c9e0ce72..d9b5ab159520df 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -23,6 +23,8 @@ struct winesync_obj { struct kref refcount; spinlock_t lock; + struct list_head any_waiters; + enum winesync_type type; /* The following fields are protected by the object lock. */ @@ -34,6 +36,28 @@ struct winesync_obj { } u; }; +struct winesync_q_entry { + struct list_head node; + struct winesync_q *q; + struct winesync_obj *obj; + __u32 index; +}; + +struct winesync_q { + struct task_struct *task; + __u32 owner; + + /* + * Protected via atomic_cmpxchg(). Only the thread that wins the + * compare-and-swap may actually change object states and wake this + * task. + */ + atomic_t signaled; + + __u32 count; + struct winesync_q_entry entries[]; +}; + struct winesync_device { struct xarray objects; }; @@ -109,6 +133,26 @@ static void init_obj(struct winesync_obj *obj) { kref_init(&obj->refcount); spin_lock_init(&obj->lock); + INIT_LIST_HEAD(&obj->any_waiters); +} + +static void try_wake_any_sem(struct winesync_obj *sem) +{ + struct winesync_q_entry *entry; + + lockdep_assert_held(&sem->lock); + + list_for_each_entry(entry, &sem->any_waiters, node) { + struct winesync_q *q = entry->q; + + if (!sem->u.sem.count) + break; + + if (atomic_cmpxchg(&q->signaled, -1, entry->index) == -1) { + sem->u.sem.count--; + wake_up_process(q->task); + } + } } static int winesync_create_sem(struct winesync_device *dev, void __user *argp) @@ -194,6 +238,8 @@ static int winesync_put_sem(struct winesync_device *dev, void __user *argp) prev_count = sem->u.sem.count; ret = put_sem_state(sem, args.count); + if (!ret) + try_wake_any_sem(sem); spin_unlock(&sem->lock); @@ -205,6 +251,184 @@ static int winesync_put_sem(struct winesync_device *dev, void __user *argp) return ret; } +static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) +{ + int ret = 0; + + do { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + set_current_state(TASK_INTERRUPTIBLE); + if (atomic_read(&q->signaled) != -1) { + ret = 0; + break; + } + ret = schedule_hrtimeout(timeout, HRTIMER_MODE_ABS); + } while (ret < 0); + __set_current_state(TASK_RUNNING); + + return ret; +} + +/* + * Allocate and initialize the winesync_q structure, but do not queue us yet. + * Also, calculate the relative timeout. + */ +static int setup_wait(struct winesync_device *dev, + const struct winesync_wait_args *args, + ktime_t *ret_timeout, struct winesync_q **ret_q) +{ + const __u32 count = args->count; + struct winesync_q *q; + ktime_t timeout = 0; + __u32 *ids; + __u32 i, j; + + if (!args->owner || args->pad) + return -EINVAL; + + if (args->timeout) { + struct timespec64 to; + + if (get_timespec64(&to, u64_to_user_ptr(args->timeout))) + return -EFAULT; + if (!timespec64_valid(&to)) + return -EINVAL; + + timeout = timespec64_to_ns(&to); + } + + ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + if (!ids) + return -ENOMEM; + if (copy_from_user(ids, u64_to_user_ptr(args->objs), + array_size(count, sizeof(*ids)))) { + kfree(ids); + return -EFAULT; + } + + q = kmalloc(struct_size(q, entries, count), GFP_KERNEL); + if (!q) { + kfree(ids); + return -ENOMEM; + } + q->task = current; + q->owner = args->owner; + atomic_set(&q->signaled, -1); + q->count = count; + + for (i = 0; i < count; i++) { + struct winesync_q_entry *entry = &q->entries[i]; + struct winesync_obj *obj = get_obj(dev, ids[i]); + + if (!obj) + goto err; + + entry->obj = obj; + entry->q = q; + entry->index = i; + } + + kfree(ids); + + *ret_q = q; + *ret_timeout = timeout; + return 0; + +err: + for (j = 0; j < i; j++) + put_obj(q->entries[j].obj); + kfree(ids); + kfree(q); + return -EINVAL; +} + +static void try_wake_any_obj(struct winesync_obj *obj) +{ + switch (obj->type) { + case WINESYNC_TYPE_SEM: + try_wake_any_sem(obj); + break; + } +} + +static int winesync_wait_any(struct winesync_device *dev, void __user *argp) +{ + struct winesync_wait_args args; + struct winesync_q *q; + ktime_t timeout; + int signaled; + __u32 i; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + ret = setup_wait(dev, &args, &timeout, &q); + if (ret < 0) + return ret; + + /* queue ourselves */ + + for (i = 0; i < args.count; i++) { + struct winesync_q_entry *entry = &q->entries[i]; + struct winesync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_add_tail(&entry->node, &obj->any_waiters); + spin_unlock(&obj->lock); + } + + /* check if we are already signaled */ + + for (i = 0; i < args.count; i++) { + struct winesync_obj *obj = q->entries[i].obj; + + if (atomic_read(&q->signaled) != -1) + break; + + spin_lock(&obj->lock); + try_wake_any_obj(obj); + spin_unlock(&obj->lock); + } + + /* sleep */ + + ret = winesync_schedule(q, args.timeout ? &timeout : NULL); + + /* and finally, unqueue */ + + for (i = 0; i < args.count; i++) { + struct winesync_q_entry *entry = &q->entries[i]; + struct winesync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_del(&entry->node); + spin_unlock(&obj->lock); + + put_obj(obj); + } + + signaled = atomic_read(&q->signaled); + if (signaled != -1) { + struct winesync_wait_args __user *user_args = argp; + + /* even if we caught a signal, we need to communicate success */ + ret = 0; + + if (put_user(signaled, &user_args->index)) + ret = -EFAULT; + } else if (!ret) { + ret = -ETIMEDOUT; + } + + kfree(q); + return ret; +} + static long winesync_char_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { @@ -218,6 +442,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_delete(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); + case WINESYNC_IOC_WAIT_ANY: + return winesync_wait_any(dev, argp); default: return -ENOSYS; } diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 7681a168eb92ec..f57ebfbe1dd928 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -16,6 +16,15 @@ struct winesync_sem_args { __u32 max; }; +struct winesync_wait_args { + __u64 timeout; + __u64 objs; + __u32 count; + __u32 owner; + __u32 index; + __u32 pad; +}; + #define WINESYNC_IOC_BASE 0xf7 #define WINESYNC_IOC_CREATE_SEM _IOWR(WINESYNC_IOC_BASE, 0, \ @@ -23,5 +32,7 @@ struct winesync_sem_args { #define WINESYNC_IOC_DELETE _IOW (WINESYNC_IOC_BASE, 1, __u32) #define WINESYNC_IOC_PUT_SEM _IOWR(WINESYNC_IOC_BASE, 2, \ struct winesync_sem_args) +#define WINESYNC_IOC_WAIT_ANY _IOWR(WINESYNC_IOC_BASE, 3, \ + struct winesync_wait_args) #endif From 75a6a4ed805acd94e69b98d1470d68d5254ed9b1 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:36:09 -0600 Subject: [PATCH 06/48] winesync: Introduce WINESYNC_IOC_WAIT_ALL. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 242 ++++++++++++++++++++++++++++++++-- include/uapi/linux/winesync.h | 2 + 2 files changed, 236 insertions(+), 8 deletions(-) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index d9b5ab159520df..2b708c5b88a678 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -23,7 +23,34 @@ struct winesync_obj { struct kref refcount; spinlock_t lock; + /* + * any_waiters is protected by the object lock, but all_waiters is + * protected by the device wait_all_lock. + */ struct list_head any_waiters; + struct list_head all_waiters; + + /* + * Hint describing how many tasks are queued on this object in a + * wait-all operation. + * + * Any time we do a wake, we may need to wake "all" waiters as well as + * "any" waiters. In order to atomically wake "all" waiters, we must + * lock all of the objects, and that means grabbing the wait_all_lock + * below (and, due to lock ordering rules, before locking this object). + * However, wait-all is a rare operation, and grabbing the wait-all + * lock for every wake would create unnecessary contention. Therefore we + * first check whether all_hint is zero, and, if it is, we skip trying + * to wake "all" waiters. + * + * This hint isn't protected by any lock. It might change during the + * course of a wake, but there's no meaningful race there; it's only a + * hint. + * + * Since wait requests must originate from user-space threads, we're + * limited here by PID_MAX_LIMIT, so there's no risk of saturation. + */ + atomic_t all_hint; enum winesync_type type; @@ -54,11 +81,25 @@ struct winesync_q { */ atomic_t signaled; + bool all; __u32 count; struct winesync_q_entry entries[]; }; struct winesync_device { + /* + * Wait-all operations must atomically grab all objects, and be totally + * ordered with respect to each other and wait-any operations. If one + * thread is trying to acquire several objects, another thread cannot + * touch the object at the same time. + * + * We achieve this by grabbing multiple object locks at the same time. + * However, this creates a lock ordering problem. To solve that problem, + * wait_all_lock is taken first whenever multiple objects must be locked + * at the same time. + */ + spinlock_t wait_all_lock; + struct xarray objects; }; @@ -107,6 +148,8 @@ static int winesync_char_open(struct inode *inode, struct file *file) if (!dev) return -ENOMEM; + spin_lock_init(&dev->wait_all_lock); + xa_init_flags(&dev->objects, XA_FLAGS_ALLOC); file->private_data = dev; @@ -132,8 +175,82 @@ static int winesync_char_release(struct inode *inode, struct file *file) static void init_obj(struct winesync_obj *obj) { kref_init(&obj->refcount); + atomic_set(&obj->all_hint, 0); spin_lock_init(&obj->lock); INIT_LIST_HEAD(&obj->any_waiters); + INIT_LIST_HEAD(&obj->all_waiters); +} + +static bool is_signaled(struct winesync_obj *obj, __u32 owner) +{ + lockdep_assert_held(&obj->lock); + + switch (obj->type) { + case WINESYNC_TYPE_SEM: + return !!obj->u.sem.count; + } + + WARN(1, "bad object type %#x\n", obj->type); + return false; +} + +/* + * "locked_obj" is an optional pointer to an object which is already locked and + * should not be locked again. This is necessary so that changing an object's + * state and waking it can be a single atomic operation. + */ +static void try_wake_all(struct winesync_device *dev, struct winesync_q *q, + struct winesync_obj *locked_obj) +{ + __u32 count = q->count; + bool can_wake = true; + __u32 i; + + lockdep_assert_held(&dev->wait_all_lock); + if (locked_obj) + lockdep_assert_held(&locked_obj->lock); + + for (i = 0; i < count; i++) { + if (q->entries[i].obj != locked_obj) + spin_lock(&q->entries[i].obj->lock); + } + + for (i = 0; i < count; i++) { + if (!is_signaled(q->entries[i].obj, q->owner)) { + can_wake = false; + break; + } + } + + if (can_wake && atomic_cmpxchg(&q->signaled, -1, 0) == -1) { + for (i = 0; i < count; i++) { + struct winesync_obj *obj = q->entries[i].obj; + + switch (obj->type) { + case WINESYNC_TYPE_SEM: + obj->u.sem.count--; + break; + } + } + wake_up_process(q->task); + } + + for (i = 0; i < count; i++) { + if (q->entries[i].obj != locked_obj) + spin_unlock(&q->entries[i].obj->lock); + } +} + +static void try_wake_all_obj(struct winesync_device *dev, + struct winesync_obj *obj) +{ + struct winesync_q_entry *entry; + + lockdep_assert_held(&dev->wait_all_lock); + lockdep_assert_held(&obj->lock); + + list_for_each_entry(entry, &obj->all_waiters, node) + try_wake_all(dev, entry->q, obj); } static void try_wake_any_sem(struct winesync_obj *sem) @@ -234,14 +351,29 @@ static int winesync_put_sem(struct winesync_device *dev, void __user *argp) if (!sem) return -EINVAL; - spin_lock(&sem->lock); + if (atomic_read(&sem->all_hint) > 0) { + spin_lock(&dev->wait_all_lock); + spin_lock(&sem->lock); + + prev_count = sem->u.sem.count; + ret = put_sem_state(sem, args.count); + if (!ret) { + try_wake_all_obj(dev, sem); + try_wake_any_sem(sem); + } - prev_count = sem->u.sem.count; - ret = put_sem_state(sem, args.count); - if (!ret) - try_wake_any_sem(sem); + spin_unlock(&sem->lock); + spin_unlock(&dev->wait_all_lock); + } else { + spin_lock(&sem->lock); - spin_unlock(&sem->lock); + prev_count = sem->u.sem.count; + ret = put_sem_state(sem, args.count); + if (!ret) + try_wake_any_sem(sem); + + spin_unlock(&sem->lock); + } put_obj(sem); @@ -278,7 +410,7 @@ static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) * Also, calculate the relative timeout. */ static int setup_wait(struct winesync_device *dev, - const struct winesync_wait_args *args, + const struct winesync_wait_args *args, bool all, ktime_t *ret_timeout, struct winesync_q **ret_q) { const __u32 count = args->count; @@ -318,6 +450,7 @@ static int setup_wait(struct winesync_device *dev, q->task = current; q->owner = args->owner; atomic_set(&q->signaled, -1); + q->all = all; q->count = count; for (i = 0; i < count; i++) { @@ -327,6 +460,16 @@ static int setup_wait(struct winesync_device *dev, if (!obj) goto err; + if (all) { + /* Check that the objects are all distinct. */ + for (j = 0; j < i; j++) { + if (obj == q->entries[j].obj) { + put_obj(obj); + goto err; + } + } + } + entry->obj = obj; entry->q = q; entry->index = i; @@ -367,7 +510,7 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - ret = setup_wait(dev, &args, &timeout, &q); + ret = setup_wait(dev, &args, false, &timeout, &q); if (ret < 0) return ret; @@ -429,6 +572,87 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) return ret; } +static int winesync_wait_all(struct winesync_device *dev, void __user *argp) +{ + struct winesync_wait_args args; + struct winesync_q *q; + ktime_t timeout; + int signaled; + __u32 i; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + ret = setup_wait(dev, &args, true, &timeout, &q); + if (ret < 0) + return ret; + + /* queue ourselves */ + + spin_lock(&dev->wait_all_lock); + + for (i = 0; i < args.count; i++) { + struct winesync_q_entry *entry = &q->entries[i]; + struct winesync_obj *obj = entry->obj; + + atomic_inc(&obj->all_hint); + + /* + * obj->all_waiters is protected by dev->wait_all_lock rather + * than obj->lock, so there is no need to acquire it here. + */ + list_add_tail(&entry->node, &obj->all_waiters); + } + + /* check if we are already signaled */ + + try_wake_all(dev, q, NULL); + + spin_unlock(&dev->wait_all_lock); + + /* sleep */ + + ret = winesync_schedule(q, args.timeout ? &timeout : NULL); + + /* and finally, unqueue */ + + spin_lock(&dev->wait_all_lock); + + for (i = 0; i < args.count; i++) { + struct winesync_q_entry *entry = &q->entries[i]; + struct winesync_obj *obj = entry->obj; + + /* + * obj->all_waiters is protected by dev->wait_all_lock rather + * than obj->lock, so there is no need to acquire it here. + */ + list_del(&entry->node); + + atomic_dec(&obj->all_hint); + + put_obj(obj); + } + + spin_unlock(&dev->wait_all_lock); + + signaled = atomic_read(&q->signaled); + if (signaled != -1) { + struct winesync_wait_args __user *user_args = argp; + + /* even if we caught a signal, we need to communicate success */ + ret = 0; + + if (put_user(signaled, &user_args->index)) + ret = -EFAULT; + } else if (!ret) { + ret = -ETIMEDOUT; + } + + kfree(q); + return ret; +} + static long winesync_char_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { @@ -442,6 +666,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_delete(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); + case WINESYNC_IOC_WAIT_ALL: + return winesync_wait_all(dev, argp); case WINESYNC_IOC_WAIT_ANY: return winesync_wait_any(dev, argp); default: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index f57ebfbe1dd928..44025a510cb99b 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -34,5 +34,7 @@ struct winesync_wait_args { struct winesync_sem_args) #define WINESYNC_IOC_WAIT_ANY _IOWR(WINESYNC_IOC_BASE, 3, \ struct winesync_wait_args) +#define WINESYNC_IOC_WAIT_ALL _IOWR(WINESYNC_IOC_BASE, 4, \ + struct winesync_wait_args) #endif From d163b59bffe28e2a5fc6ea7f0d9f57c62cc9404b Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:41:10 -0600 Subject: [PATCH 07/48] winesync: Introduce WINESYNC_IOC_CREATE_MUTEX. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 72 +++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 8 ++++ 2 files changed, 80 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 2b708c5b88a678..18eb0597590732 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -16,6 +16,7 @@ enum winesync_type { WINESYNC_TYPE_SEM, + WINESYNC_TYPE_MUTEX, }; struct winesync_obj { @@ -60,6 +61,10 @@ struct winesync_obj { __u32 count; __u32 max; } sem; + struct { + __u32 count; + __u32 owner; + } mutex; } u; }; @@ -188,6 +193,10 @@ static bool is_signaled(struct winesync_obj *obj, __u32 owner) switch (obj->type) { case WINESYNC_TYPE_SEM: return !!obj->u.sem.count; + case WINESYNC_TYPE_MUTEX: + if (obj->u.mutex.owner && obj->u.mutex.owner != owner) + return false; + return obj->u.mutex.count < UINT_MAX; } WARN(1, "bad object type %#x\n", obj->type); @@ -230,6 +239,10 @@ static void try_wake_all(struct winesync_device *dev, struct winesync_q *q, case WINESYNC_TYPE_SEM: obj->u.sem.count--; break; + case WINESYNC_TYPE_MUTEX: + obj->u.mutex.count++; + obj->u.mutex.owner = q->owner; + break; } } wake_up_process(q->task); @@ -272,6 +285,28 @@ static void try_wake_any_sem(struct winesync_obj *sem) } } +static void try_wake_any_mutex(struct winesync_obj *mutex) +{ + struct winesync_q_entry *entry; + + lockdep_assert_held(&mutex->lock); + + list_for_each_entry(entry, &mutex->any_waiters, node) { + struct winesync_q *q = entry->q; + + if (mutex->u.mutex.count == UINT_MAX) + break; + if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner) + continue; + + if (atomic_cmpxchg(&q->signaled, -1, entry->index) == -1) { + mutex->u.mutex.count++; + mutex->u.mutex.owner = q->owner; + wake_up_process(q->task); + } + } +} + static int winesync_create_sem(struct winesync_device *dev, void __user *argp) { struct winesync_sem_args __user *user_args = argp; @@ -304,6 +339,38 @@ static int winesync_create_sem(struct winesync_device *dev, void __user *argp) return put_user(id, &user_args->sem); } +static int winesync_create_mutex(struct winesync_device *dev, void __user *argp) +{ + struct winesync_mutex_args __user *user_args = argp; + struct winesync_mutex_args args; + struct winesync_obj *mutex; + __u32 id; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + if (!args.owner != !args.count) + return -EINVAL; + + mutex = kzalloc(sizeof(*mutex), GFP_KERNEL); + if (!mutex) + return -ENOMEM; + + init_obj(mutex); + mutex->type = WINESYNC_TYPE_MUTEX; + mutex->u.mutex.count = args.count; + mutex->u.mutex.owner = args.owner; + + ret = xa_alloc(&dev->objects, &id, mutex, xa_limit_32b, GFP_KERNEL); + if (ret < 0) { + kfree(mutex); + return ret; + } + + return put_user(id, &user_args->mutex); +} + static int winesync_delete(struct winesync_device *dev, void __user *argp) { struct winesync_obj *obj; @@ -495,6 +562,9 @@ static void try_wake_any_obj(struct winesync_obj *obj) case WINESYNC_TYPE_SEM: try_wake_any_sem(obj); break; + case WINESYNC_TYPE_MUTEX: + try_wake_any_mutex(obj); + break; } } @@ -660,6 +730,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)parm; switch (cmd) { + case WINESYNC_IOC_CREATE_MUTEX: + return winesync_create_mutex(dev, argp); case WINESYNC_IOC_CREATE_SEM: return winesync_create_sem(dev, argp); case WINESYNC_IOC_DELETE: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 44025a510cb99b..23606a3b1546ae 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -16,6 +16,12 @@ struct winesync_sem_args { __u32 max; }; +struct winesync_mutex_args { + __u32 mutex; + __u32 owner; + __u32 count; +}; + struct winesync_wait_args { __u64 timeout; __u64 objs; @@ -36,5 +42,7 @@ struct winesync_wait_args { struct winesync_wait_args) #define WINESYNC_IOC_WAIT_ALL _IOWR(WINESYNC_IOC_BASE, 4, \ struct winesync_wait_args) +#define WINESYNC_IOC_CREATE_MUTEX _IOWR(WINESYNC_IOC_BASE, 5, \ + struct winesync_mutex_args) #endif From cdebeb4a1bbd926c1ee4087cd42f5a1b431e21ff Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:44:41 -0600 Subject: [PATCH 08/48] winesync: Introduce WINESYNC_IOC_PUT_MUTEX. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 67 +++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 69 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 18eb0597590732..d18d08a6854663 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -450,6 +450,71 @@ static int winesync_put_sem(struct winesync_device *dev, void __user *argp) return ret; } +/* + * Actually change the mutex state, returning -EPERM if not the owner. + */ +static int put_mutex_state(struct winesync_obj *mutex, + const struct winesync_mutex_args *args) +{ + lockdep_assert_held(&mutex->lock); + + if (mutex->u.mutex.owner != args->owner) + return -EPERM; + + if (!--mutex->u.mutex.count) + mutex->u.mutex.owner = 0; + return 0; +} + +static int winesync_put_mutex(struct winesync_device *dev, void __user *argp) +{ + struct winesync_mutex_args __user *user_args = argp; + struct winesync_mutex_args args; + struct winesync_obj *mutex; + __u32 prev_count; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (!args.owner) + return -EINVAL; + + mutex = get_obj_typed(dev, args.mutex, WINESYNC_TYPE_MUTEX); + if (!mutex) + return -EINVAL; + + if (atomic_read(&mutex->all_hint) > 0) { + spin_lock(&dev->wait_all_lock); + spin_lock(&mutex->lock); + + prev_count = mutex->u.mutex.count; + ret = put_mutex_state(mutex, &args); + if (!ret) { + try_wake_all_obj(dev, mutex); + try_wake_any_mutex(mutex); + } + + spin_unlock(&mutex->lock); + spin_unlock(&dev->wait_all_lock); + } else { + spin_lock(&mutex->lock); + + prev_count = mutex->u.mutex.count; + ret = put_mutex_state(mutex, &args); + if (!ret) + try_wake_any_mutex(mutex); + + spin_unlock(&mutex->lock); + } + + put_obj(mutex); + + if (!ret && put_user(prev_count, &user_args->count)) + ret = -EFAULT; + + return ret; +} + static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) { int ret = 0; @@ -736,6 +801,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_create_sem(dev, argp); case WINESYNC_IOC_DELETE: return winesync_delete(dev, argp); + case WINESYNC_IOC_PUT_MUTEX: + return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); case WINESYNC_IOC_WAIT_ALL: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 23606a3b1546ae..fde08cb8ab959d 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -44,5 +44,7 @@ struct winesync_wait_args { struct winesync_wait_args) #define WINESYNC_IOC_CREATE_MUTEX _IOWR(WINESYNC_IOC_BASE, 5, \ struct winesync_mutex_args) +#define WINESYNC_IOC_PUT_MUTEX _IOWR(WINESYNC_IOC_BASE, 6, \ + struct winesync_mutex_args) #endif From 5df341f41a77a3130d38274e02988f978888ef61 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:46:46 -0600 Subject: [PATCH 09/48] winesync: Introduce WINESYNC_IOC_KILL_OWNER. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 80 ++++++++++++++++++++++++++++++++++- include/uapi/linux/winesync.h | 1 + 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index d18d08a6854663..891537063bb658 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -64,6 +64,7 @@ struct winesync_obj { struct { __u32 count; __u32 owner; + bool ownerdead; } mutex; } u; }; @@ -87,6 +88,7 @@ struct winesync_q { atomic_t signaled; bool all; + bool ownerdead; __u32 count; struct winesync_q_entry entries[]; }; @@ -240,6 +242,9 @@ static void try_wake_all(struct winesync_device *dev, struct winesync_q *q, obj->u.sem.count--; break; case WINESYNC_TYPE_MUTEX: + if (obj->u.mutex.ownerdead) + q->ownerdead = true; + obj->u.mutex.ownerdead = false; obj->u.mutex.count++; obj->u.mutex.owner = q->owner; break; @@ -300,6 +305,9 @@ static void try_wake_any_mutex(struct winesync_obj *mutex) continue; if (atomic_cmpxchg(&q->signaled, -1, entry->index) == -1) { + if (mutex->u.mutex.ownerdead) + q->ownerdead = true; + mutex->u.mutex.ownerdead = false; mutex->u.mutex.count++; mutex->u.mutex.owner = q->owner; wake_up_process(q->task); @@ -515,6 +523,71 @@ static int winesync_put_mutex(struct winesync_device *dev, void __user *argp) return ret; } +/* + * Actually change the mutex state to mark its owner as dead. + */ +static void put_mutex_ownerdead_state(struct winesync_obj *mutex) +{ + lockdep_assert_held(&mutex->lock); + + mutex->u.mutex.ownerdead = true; + mutex->u.mutex.owner = 0; + mutex->u.mutex.count = 0; +} + +static int winesync_kill_owner(struct winesync_device *dev, void __user *argp) +{ + struct winesync_obj *obj; + unsigned long id; + __u32 owner; + + if (get_user(owner, (__u32 __user *)argp)) + return -EFAULT; + if (!owner) + return -EINVAL; + + rcu_read_lock(); + + xa_for_each(&dev->objects, id, obj) { + if (!kref_get_unless_zero(&obj->refcount)) + continue; + + if (obj->type != WINESYNC_TYPE_MUTEX) { + put_obj(obj); + continue; + } + + if (atomic_read(&obj->all_hint) > 0) { + spin_lock(&dev->wait_all_lock); + spin_lock(&obj->lock); + + if (obj->u.mutex.owner == owner) { + put_mutex_ownerdead_state(obj); + try_wake_all_obj(dev, obj); + try_wake_any_mutex(obj); + } + + spin_unlock(&obj->lock); + spin_unlock(&dev->wait_all_lock); + } else { + spin_lock(&obj->lock); + + if (obj->u.mutex.owner == owner) { + put_mutex_ownerdead_state(obj); + try_wake_any_mutex(obj); + } + + spin_unlock(&obj->lock); + } + + put_obj(obj); + } + + rcu_read_unlock(); + + return 0; +} + static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) { int ret = 0; @@ -583,6 +656,7 @@ static int setup_wait(struct winesync_device *dev, q->owner = args->owner; atomic_set(&q->signaled, -1); q->all = all; + q->ownerdead = false; q->count = count; for (i = 0; i < count; i++) { @@ -695,7 +769,7 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) struct winesync_wait_args __user *user_args = argp; /* even if we caught a signal, we need to communicate success */ - ret = 0; + ret = q->ownerdead ? -EOWNERDEAD : 0; if (put_user(signaled, &user_args->index)) ret = -EFAULT; @@ -776,7 +850,7 @@ static int winesync_wait_all(struct winesync_device *dev, void __user *argp) struct winesync_wait_args __user *user_args = argp; /* even if we caught a signal, we need to communicate success */ - ret = 0; + ret = q->ownerdead ? -EOWNERDEAD : 0; if (put_user(signaled, &user_args->index)) ret = -EFAULT; @@ -801,6 +875,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_create_sem(dev, argp); case WINESYNC_IOC_DELETE: return winesync_delete(dev, argp); + case WINESYNC_IOC_KILL_OWNER: + return winesync_kill_owner(dev, argp); case WINESYNC_IOC_PUT_MUTEX: return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index fde08cb8ab959d..f57aa76d57f54d 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -46,5 +46,6 @@ struct winesync_wait_args { struct winesync_mutex_args) #define WINESYNC_IOC_PUT_MUTEX _IOWR(WINESYNC_IOC_BASE, 6, \ struct winesync_mutex_args) +#define WINESYNC_IOC_KILL_OWNER _IOW (WINESYNC_IOC_BASE, 7, __u32) #endif From 8601753f467e60a505b1ac41888b3fed84ff3849 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:47:55 -0600 Subject: [PATCH 10/48] winesync: Introduce WINESYNC_IOC_READ_SEM. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 29 +++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 31 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 891537063bb658..98bedda2f8eb9c 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -523,6 +523,33 @@ static int winesync_put_mutex(struct winesync_device *dev, void __user *argp) return ret; } +static int winesync_read_sem(struct winesync_device *dev, void __user *argp) +{ + struct winesync_sem_args __user *user_args = argp; + struct winesync_sem_args args; + struct winesync_obj *sem; + __u32 id; + + if (get_user(id, &user_args->sem)) + return -EFAULT; + + sem = get_obj_typed(dev, id, WINESYNC_TYPE_SEM); + if (!sem) + return -EINVAL; + + args.sem = id; + spin_lock(&sem->lock); + args.count = sem->u.sem.count; + args.max = sem->u.sem.max; + spin_unlock(&sem->lock); + + put_obj(sem); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return 0; +} + /* * Actually change the mutex state to mark its owner as dead. */ @@ -881,6 +908,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); + case WINESYNC_IOC_READ_SEM: + return winesync_read_sem(dev, argp); case WINESYNC_IOC_WAIT_ALL: return winesync_wait_all(dev, argp); case WINESYNC_IOC_WAIT_ANY: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index f57aa76d57f54d..311eb810647d22 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -47,5 +47,7 @@ struct winesync_wait_args { #define WINESYNC_IOC_PUT_MUTEX _IOWR(WINESYNC_IOC_BASE, 6, \ struct winesync_mutex_args) #define WINESYNC_IOC_KILL_OWNER _IOW (WINESYNC_IOC_BASE, 7, __u32) +#define WINESYNC_IOC_READ_SEM _IOWR(WINESYNC_IOC_BASE, 8, \ + struct winesync_sem_args) #endif From 1134b2f24374f58055b97ac1c3706ae2cf43b818 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:48:10 -0600 Subject: [PATCH 11/48] winesync: Introduce WINESYNC_IOC_READ_MUTEX. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 31 +++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 98bedda2f8eb9c..eae272663abe7c 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -550,6 +550,35 @@ static int winesync_read_sem(struct winesync_device *dev, void __user *argp) return 0; } +static int winesync_read_mutex(struct winesync_device *dev, void __user *argp) +{ + struct winesync_mutex_args __user *user_args = argp; + struct winesync_mutex_args args; + struct winesync_obj *mutex; + __u32 id; + int ret; + + if (get_user(id, &user_args->mutex)) + return -EFAULT; + + mutex = get_obj_typed(dev, id, WINESYNC_TYPE_MUTEX); + if (!mutex) + return -EINVAL; + + args.mutex = id; + spin_lock(&mutex->lock); + args.count = mutex->u.mutex.count; + args.owner = mutex->u.mutex.owner; + ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0; + spin_unlock(&mutex->lock); + + put_obj(mutex); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return ret; +} + /* * Actually change the mutex state to mark its owner as dead. */ @@ -908,6 +937,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); + case WINESYNC_IOC_READ_MUTEX: + return winesync_read_mutex(dev, argp); case WINESYNC_IOC_READ_SEM: return winesync_read_sem(dev, argp); case WINESYNC_IOC_WAIT_ALL: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 311eb810647d22..3371a303a9270a 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -49,5 +49,7 @@ struct winesync_wait_args { #define WINESYNC_IOC_KILL_OWNER _IOW (WINESYNC_IOC_BASE, 7, __u32) #define WINESYNC_IOC_READ_SEM _IOWR(WINESYNC_IOC_BASE, 8, \ struct winesync_sem_args) +#define WINESYNC_IOC_READ_MUTEX _IOWR(WINESYNC_IOC_BASE, 9, \ + struct winesync_mutex_args) #endif From 0a9f622b6918a9e9841e35bc284c083cab916a9c Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 11:50:49 -0600 Subject: [PATCH 12/48] docs: winesync: Add documentation for the winesync uAPI. Signed-off-by: Kai Krakow --- Documentation/userspace-api/index.rst | 1 + Documentation/userspace-api/winesync.rst | 324 +++++++++++++++++++++++ 2 files changed, 325 insertions(+) create mode 100644 Documentation/userspace-api/winesync.rst diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index c78da9ce0ec44e..ffdfd8dbec73f1 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -30,6 +30,7 @@ place where this information is gathered. sysfs-platform_profile vduse futex2 + winesync .. only:: subproject and html diff --git a/Documentation/userspace-api/winesync.rst b/Documentation/userspace-api/winesync.rst new file mode 100644 index 00000000000000..34e54be229cfc2 --- /dev/null +++ b/Documentation/userspace-api/winesync.rst @@ -0,0 +1,324 @@ +===================================== +Wine synchronization primitive driver +===================================== + +This page documents the user-space API for the winesync driver. + +winesync is a support driver for emulation of NT synchronization +primitives by the Wine project or other NT emulators. It exists +because implementation in user-space, using existing tools, cannot +simultaneously satisfy performance, correctness, and security +constraints. It is implemented entirely in software, and does not +drive any hardware device. + +This interface is meant as a compatibility tool only, and should not +be used for general synchronization. Instead use generic, versatile +interfaces such as futex(2) and poll(2). + +Synchronization primitives +========================== + +The winesync driver exposes two types of synchronization primitives, +semaphores and mutexes. + +A semaphore holds a single volatile 32-bit counter, and a static +32-bit integer denoting the maximum value. It is considered signaled +when the counter is nonzero. The counter is decremented by one when a +wait is satisfied. Both the initial and maximum count are established +when the semaphore is created. + +A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit +identifier denoting its owner. A mutex is considered signaled when its +owner is zero (indicating that it is not owned). The recursion count +is incremented when a wait is satisfied, and ownership is set to the +given identifier. + +A mutex also holds an internal flag denoting whether its previous +owner has died; such a mutex is said to be inconsistent. Owner death +is not tracked automatically based on thread death, but rather must be +communicated using ``WINESYNC_IOC_KILL_OWNER``. An inconsistent mutex +is inherently considered unowned. + +Except for the "unowned" semantics of zero, the actual value of the +owner identifier is not interpreted by the winesync driver at all. The +intended use is to store a thread identifier; however, the winesync +driver does not actually validate that a calling thread provides +consistent or unique identifiers. + +Unless specified otherwise, all operations on an object are atomic and +totally ordered with respect to other operations on the same object. + +Objects are represented by unsigned 32-bit integers. + +Char device +=========== + +The winesync driver creates a single char device /dev/winesync. Each +file description opened on the device represents a unique namespace. +That is, objects created on one open file description are shared +across all its individual descriptors, but are not shared with other +open() calls on the same device. The same file description may be +shared across multiple processes. + +ioctl reference +=============== + +All operations on the device are done through ioctls. There are three +structures used in ioctl calls:: + + struct winesync_sem_args { + __u32 sem; + __u32 count; + __u32 max; + }; + + struct winesync_mutex_args { + __u32 mutex; + __u32 owner; + __u32 count; + }; + + struct winesync_wait_args { + __u64 timeout; + __u64 objs; + __u32 count; + __u32 owner; + __u32 index; + __u32 pad; + }; + +Depending on the ioctl, members of the structure may be used as input, +output, or not at all. All ioctls return 0 on success. + +The ioctls are as follows: + +.. c:macro:: WINESYNC_IOC_CREATE_SEM + + Create a semaphore object. Takes a pointer to struct + :c:type:`winesync_sem_args`, which is used as follows: + + .. list-table:: + + * - ``sem`` + - On output, contains the identifier of the created semaphore. + * - ``count`` + - Initial count of the semaphore. + * - ``max`` + - Maximum count of the semaphore. + + Fails with ``EINVAL`` if ``count`` is greater than ``max``. + +.. c:macro:: WINESYNC_IOC_CREATE_MUTEX + + Create a mutex object. Takes a pointer to struct + :c:type:`winesync_mutex_args`, which is used as follows: + + .. list-table:: + + * - ``mutex`` + - On output, contains the identifier of the created mutex. + * - ``count`` + - Initial recursion count of the mutex. + * - ``owner`` + - Initial owner of the mutex. + + If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is + zero and ``count`` is nonzero, the function fails with ``EINVAL``. + +.. c:macro:: WINESYNC_IOC_DELETE + + Delete an object of any type. Takes an input-only pointer to a + 32-bit integer denoting the object to delete. + + Wait ioctls currently in progress are not interrupted, and behave as + if the object remains valid. + +.. c:macro:: WINESYNC_IOC_PUT_SEM + + Post to a semaphore object. Takes a pointer to struct + :c:type:`winesync_sem_args`, which is used as follows: + + .. list-table:: + + * - ``sem`` + - Semaphore object to post to. + * - ``count`` + - Count to add to the semaphore. On output, contains the + previous count of the semaphore. + * - ``max`` + - Not used. + + If adding ``count`` to the semaphore's current count would raise the + latter past the semaphore's maximum count, the ioctl fails with + ``EOVERFLOW`` and the semaphore is not affected. If raising the + semaphore's count causes it to become signaled, eligible threads + waiting on this semaphore will be woken and the semaphore's count + decremented appropriately. + +.. c:macro:: WINESYNC_IOC_PUT_MUTEX + + Release a mutex object. Takes a pointer to struct + :c:type:`winesync_mutex_args`, which is used as follows: + + .. list-table:: + + * - ``mutex`` + - Mutex object to release. + * - ``owner`` + - Mutex owner identifier. + * - ``count`` + - On output, contains the previous recursion count. + + If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner`` + is not the current owner of the mutex, the ioctl fails with + ``EPERM``. + + The mutex's count will be decremented by one. If decrementing the + mutex's count causes it to become zero, the mutex is marked as + unowned and signaled, and eligible threads waiting on it will be + woken as appropriate. + +.. c:macro:: WINESYNC_IOC_READ_SEM + + Read the current state of a semaphore object. Takes a pointer to + struct :c:type:`winesync_sem_args`, which is used as follows: + + .. list-table:: + + * - ``sem`` + - Semaphore object to read. + * - ``count`` + - On output, contains the current count of the semaphore. + * - ``max`` + - On output, contains the maximum count of the semaphore. + +.. c:macro:: WINESYNC_IOC_READ_MUTEX + + Read the current state of a mutex object. Takes a pointer to struct + :c:type:`winesync_mutex_args`, which is used as follows: + + .. list-table:: + + * - ``mutex`` + - Mutex object to read. + * - ``owner`` + - On output, contains the current owner of the mutex, or zero + if the mutex is not currently owned. + * - ``count`` + - On output, contains the current recursion count of the mutex. + + If the mutex is marked as inconsistent, the function fails with + ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to + zero. + +.. c:macro:: WINESYNC_IOC_KILL_OWNER + + Mark any mutexes owned by the given owner as unowned and + inconsistent. Takes an input-only pointer to a 32-bit integer + denoting the owner. If the owner is zero, the ioctl fails with + ``EINVAL``. + + For each mutex currently owned by the given owner, eligible threads + waiting on said mutex will be woken as appropriate (and such waits + will fail with ``EOWNERDEAD``, as described below). + + The operation as a whole is not atomic; however, the modification of + each mutex is atomic and totally ordered with respect to other + operations on the same mutex. + +.. c:macro:: WINESYNC_IOC_WAIT_ANY + + Poll on any of a list of objects, atomically acquiring at most one. + Takes a pointer to struct :c:type:`winesync_wait_args`, which is + used as follows: + + .. list-table:: + + * - ``timeout`` + - Optional pointer to a 64-bit struct :c:type:`timespec` + (specified as an integer so that the structure has the same + size regardless of architecture). The timeout is specified in + absolute format, as measured against the MONOTONIC clock. If + the timeout is equal to or earlier than the current time, the + function returns immediately without sleeping. If ``timeout`` + is zero, i.e. NULL, the function will sleep until an object + is signaled, and will not fail with ``ETIMEDOUT``. + * - ``objs`` + - Pointer to an array of ``count`` 32-bit object identifiers + (specified as an integer so that the structure has the same + size regardless of architecture). If any identifier is + invalid, the function fails with ``EINVAL``. + * - ``count`` + - Number of object identifiers specified in the ``objs`` array. + * - ``owner`` + - Mutex owner identifier. If any object in ``objs`` is a mutex, + the ioctl will attempt to acquire that mutex on behalf of + ``owner``. If ``owner`` is zero, the ioctl fails with + ``EINVAL``. + * - ``index`` + - On success, contains the index (into ``objs``) of the object + which was signaled. + * - ``pad`` + - This field is not used and must be set to zero. + + This function attempts to acquire one of the given objects. If + unable to do so, it sleeps until an object becomes signaled, + subsequently acquiring it, or the timeout expires. In the latter + case the ioctl fails with ``ETIMEDOUT``. The function only acquires + one object, even if multiple objects are signaled. + + A semaphore is considered to be signaled if its count is nonzero, + and is acquired by decrementing its count by one. A mutex is + considered to be signaled if it is unowned or if its owner matches + the ``owner`` argument, and is acquired by incrementing its + recursion count by one and setting its owner to the ``owner`` + argument. + + Acquisition is atomic and totally ordered with respect to other + operations on the same object. If two wait operations (with + different ``owner`` identifiers) are queued on the same mutex, only + one is signaled. If two wait operations are queued on the same + semaphore, and a value of one is posted to it, only one is signaled. + The order in which threads are signaled is not specified. + + If an inconsistent mutex is acquired, the ioctl fails with + ``EOWNERDEAD``. Although this is a failure return, the function may + otherwise be considered successful. The mutex is marked as owned by + the given owner (with a recursion count of 1) and as no longer + inconsistent, and ``index`` is still set to the index of the mutex. + + It is valid to pass the same object more than once. If a wakeup + occurs due to that object being signaled, ``index`` is set to the + lowest index corresponding to that object. + + The function may fail with ``EINTR`` if a signal is received. + +.. c:macro:: WINESYNC_IOC_WAIT_ALL + + Poll on a list of objects, atomically acquiring all of them. Takes a + pointer to struct :c:type:`winesync_wait_args`, which is used + identically to ``WINESYNC_IOC_WAIT_ANY``, except that ``index`` is + always filled with zero on success. + + This function attempts to simultaneously acquire all of the given + objects. If unable to do so, it sleeps until all objects become + simultaneously signaled, subsequently acquiring them, or the timeout + expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and + no objects are modified. + + Objects may become signaled and subsequently designaled (through + acquisition by other threads) while this thread is sleeping. Only + once all objects are simultaneously signaled does the ioctl acquire + them and return. The entire acquisition is atomic and totally + ordered with respect to other operations on any of the given + objects. + + If an inconsistent mutex is acquired, the ioctl fails with + ``EOWNERDEAD``. Similarly to ``WINESYNC_IOC_WAIT_ANY``, all objects + are nevertheless marked as acquired. Note that if multiple mutex + objects are specified, there is no way to know which were marked as + inconsistent. + + Unlike ``WINESYNC_IOC_WAIT_ANY``, it is not valid to pass the same + object more than once. If this is attempted, the function fails with + ``EINVAL``. From 933b7244b5a8ea01fc4faad376779853573b83e7 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:06:23 -0600 Subject: [PATCH 13/48] selftests: winesync: Add some tests for semaphore state. Signed-off-by: Kai Krakow --- tools/testing/selftests/Makefile | 1 + .../selftests/drivers/winesync/Makefile | 8 + .../testing/selftests/drivers/winesync/config | 1 + .../selftests/drivers/winesync/winesync.c | 153 ++++++++++++++++++ 4 files changed, 163 insertions(+) create mode 100644 tools/testing/selftests/drivers/winesync/Makefile create mode 100644 tools/testing/selftests/drivers/winesync/config create mode 100644 tools/testing/selftests/drivers/winesync/winesync.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index aae64eb2ae737f..f04298f1f6b175 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -15,6 +15,7 @@ TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net/bonding TARGETS += drivers/net/team +TARGETS += drivers/winesync TARGETS += efivarfs TARGETS += exec TARGETS += filesystems diff --git a/tools/testing/selftests/drivers/winesync/Makefile b/tools/testing/selftests/drivers/winesync/Makefile new file mode 100644 index 00000000000000..43b39fdeea10ec --- /dev/null +++ b/tools/testing/selftests/drivers/winesync/Makefile @@ -0,0 +1,8 @@ +# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only +TEST_GEN_PROGS := winesync + +top_srcdir =../../../../.. +CFLAGS += -I$(top_srcdir)/usr/include +LDLIBS += -lpthread + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/winesync/config b/tools/testing/selftests/drivers/winesync/config new file mode 100644 index 00000000000000..60539c826d0624 --- /dev/null +++ b/tools/testing/selftests/drivers/winesync/config @@ -0,0 +1 @@ +CONFIG_WINESYNC=y diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c new file mode 100644 index 00000000000000..58ade297fef90d --- /dev/null +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Various unit tests for the "winesync" synchronization primitive driver. + * + * Copyright (C) 2021 Zebediah Figura + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "../../kselftest_harness.h" + +static int read_sem_state(int fd, __u32 sem, __u32 *count, __u32 *max) +{ + struct winesync_sem_args args; + int ret; + + args.sem = sem; + args.count = 0xdeadbeef; + args.max = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_SEM, &args); + *count = args.count; + *max = args.max; + return ret; +} + +#define check_sem_state(fd, sem, count, max) \ + ({ \ + __u32 __count, __max; \ + int ret = read_sem_state((fd), (sem), &__count, &__max); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((max), __max); \ + }) + +static int put_sem(int fd, __u32 sem, __u32 *count) +{ + struct winesync_sem_args args; + int ret; + + args.sem = sem; + args.count = *count; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &args); + *count = args.count; + return ret; +} + +static int wait_any(int fd, __u32 count, const __u32 *objs, __u32 owner, + __u32 *index) +{ + struct winesync_wait_args args = {0}; + struct timespec timeout; + int ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + args.timeout = (uintptr_t)&timeout; + args.count = count; + args.objs = (uintptr_t)objs; + args.owner = owner; + args.index = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &args); + *index = args.index; + return ret; +} + +TEST(semaphore_state) +{ + struct winesync_sem_args sem_args; + struct timespec timeout; + __u32 sem, count, index; + int fd, ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 3; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + sem_args.count = 2; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + check_sem_state(fd, sem, 2, 2); + + count = 0; + ret = put_sem(fd, sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_sem_state(fd, sem, 2, 2); + + count = 1; + ret = put_sem(fd, sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(fd, sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem, 1, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem, 0, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + count = 3; + ret = put_sem(fd, sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(fd, sem, 0, 2); + + count = 2; + ret = put_sem(fd, sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(fd, sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + + count = 1; + ret = put_sem(fd, sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(fd, sem, 1, 2); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem); + EXPECT_EQ(0, ret); + + close(fd); +} + +TEST_HARNESS_MAIN From 224ff64f97d9ea68af16b932434773420a11c61b Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:07:04 -0600 Subject: [PATCH 14/48] selftests: winesync: Add some tests for mutex state. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 58ade297fef90d..801b776da5aa4f 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -49,6 +49,42 @@ static int put_sem(int fd, __u32 sem, __u32 *count) return ret; } +static int read_mutex_state(int fd, __u32 mutex, __u32 *count, __u32 *owner) +{ + struct winesync_mutex_args args; + int ret; + + args.mutex = mutex; + args.count = 0xdeadbeef; + args.owner = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &args); + *count = args.count; + *owner = args.owner; + return ret; +} + +#define check_mutex_state(fd, mutex, count, owner) \ + ({ \ + __u32 __count, __owner; \ + int ret = read_mutex_state((fd), (mutex), &__count, &__owner); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((owner), __owner); \ + }) + +static int put_mutex(int fd, __u32 mutex, __u32 owner, __u32 *count) +{ + struct winesync_mutex_args args; + int ret; + + args.mutex = mutex; + args.owner = owner; + args.count = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_PUT_MUTEX, &args); + *count = args.count; + return ret; +} + static int wait_any(int fd, __u32 count, const __u32 *objs, __u32 owner, __u32 *index) { @@ -150,4 +186,156 @@ TEST(semaphore_state) close(fd); } +TEST(mutex_state) +{ + struct winesync_mutex_args mutex_args; + __u32 mutex, owner, count, index; + struct timespec timeout; + int fd, ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + mutex_args.owner = 123; + mutex_args.count = 0; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 0; + mutex_args.count = 2; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 123; + mutex_args.count = 2; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + mutex = mutex_args.mutex; + check_mutex_state(fd, mutex, 2, 123); + + ret = put_mutex(fd, mutex, 0, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = put_mutex(fd, mutex, 456, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + check_mutex_state(fd, mutex, 2, 123); + + ret = put_mutex(fd, mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(fd, mutex, 1, 123); + + ret = put_mutex(fd, mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + check_mutex_state(fd, mutex, 0, 0); + + ret = put_mutex(fd, mutex, 123, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(fd, mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(fd, mutex, 2, 456); + + ret = put_mutex(fd, mutex, 456, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(fd, mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 0; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + owner = 123; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(0, ret); + check_mutex_state(fd, mutex, 1, 456); + + owner = 456; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(0, ret); + + mutex_args.count = 0xdeadbeef; + mutex_args.owner = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + mutex_args.count = 0xdeadbeef; + mutex_args.owner = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(fd, mutex, 1, 123); + + owner = 123; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(0, ret); + + mutex_args.count = 0xdeadbeef; + mutex_args.owner = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(fd, mutex, 1, 123); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex); + EXPECT_EQ(0, ret); + + mutex_args.owner = 0; + mutex_args.count = 0; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + mutex = mutex_args.mutex; + check_mutex_state(fd, mutex, 0, 0); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(fd, mutex, 1, 123); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST_HARNESS_MAIN From 5fcf18312dda27b25c1f297b3d95ed175198a434 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:07:45 -0600 Subject: [PATCH 15/48] selftests: winesync: Add some tests for WINESYNC_IOC_WAIT_ANY. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 801b776da5aa4f..5903061d38b6b5 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -338,4 +338,111 @@ TEST(mutex_state) close(fd); } +TEST(test_wait_any) +{ + struct winesync_mutex_args mutex_args = {0}; + struct winesync_wait_args wait_args = {0}; + struct winesync_sem_args sem_args = {0}; + __u32 objs[2], owner, index; + struct timespec timeout; + int fd, ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + + mutex_args.owner = 0; + mutex_args.count = 0; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + + objs[0] = sem_args.sem; + objs[1] = mutex_args.mutex; + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 1, 3); + check_mutex_state(fd, mutex_args.mutex, 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 123); + + sem_args.count = 1; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 123); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 2, 123); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 123; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(0, ret); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(1, index); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + /* test waiting on the same object twice */ + sem_args.count = 2; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + + objs[0] = objs[1] = sem_args.sem; + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, wait_args.index); + check_sem_state(fd, sem_args.sem, 1, 3); + + ret = wait_any(fd, 0, NULL, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem_args.sem); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST_HARNESS_MAIN From 5550802d655b44413c7845a518e5af0e3d7d7876 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:08:25 -0600 Subject: [PATCH 16/48] selftests: winesync: Add some tests for WINESYNC_IOC_WAIT_ALL. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 104 +++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 5903061d38b6b5..0718219f54bf34 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -85,8 +85,8 @@ static int put_mutex(int fd, __u32 mutex, __u32 owner, __u32 *count) return ret; } -static int wait_any(int fd, __u32 count, const __u32 *objs, __u32 owner, - __u32 *index) +static int wait_objs(int fd, unsigned long request, __u32 count, + const __u32 *objs, __u32 owner, __u32 *index) { struct winesync_wait_args args = {0}; struct timespec timeout; @@ -99,11 +99,23 @@ static int wait_any(int fd, __u32 count, const __u32 *objs, __u32 owner, args.objs = (uintptr_t)objs; args.owner = owner; args.index = 0xdeadbeef; - ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &args); + ret = ioctl(fd, request, &args); *index = args.index; return ret; } +static int wait_any(int fd, __u32 count, const __u32 *objs, + __u32 owner, __u32 *index) +{ + return wait_objs(fd, WINESYNC_IOC_WAIT_ANY, count, objs, owner, index); +} + +static int wait_all(int fd, __u32 count, const __u32 *objs, + __u32 owner, __u32 *index) +{ + return wait_objs(fd, WINESYNC_IOC_WAIT_ALL, count, objs, owner, index); +} + TEST(semaphore_state) { struct winesync_sem_args sem_args; @@ -445,4 +457,90 @@ TEST(test_wait_any) close(fd); } +TEST(test_wait_all) +{ + struct winesync_mutex_args mutex_args = {0}; + struct winesync_sem_args sem_args = {0}; + __u32 objs[2], owner, index; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + + mutex_args.owner = 0; + mutex_args.count = 0; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + + objs[0] = sem_args.sem; + objs[1] = mutex_args.mutex; + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 1, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 123); + + ret = wait_all(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(fd, sem_args.sem, 1, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 2, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(fd, sem_args.sem, 0, 3); + check_mutex_state(fd, mutex_args.mutex, 2, 123); + + sem_args.count = 3; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 2, 3); + check_mutex_state(fd, mutex_args.mutex, 3, 123); + + owner = 123; + ret = ioctl(fd, WINESYNC_IOC_KILL_OWNER, &owner); + EXPECT_EQ(0, ret); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + check_sem_state(fd, sem_args.sem, 1, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 123); + + /* test waiting on the same object twice */ + objs[0] = objs[1] = sem_args.sem; + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem_args.sem); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST_HARNESS_MAIN From 5baf95247536ba34432a0b3fe1031c6b6f875415 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:08:54 -0600 Subject: [PATCH 17/48] selftests: winesync: Add some tests for invalid object handling. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 0718219f54bf34..8a9fb496f5e04d 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -543,4 +543,97 @@ TEST(test_wait_all) close(fd); } +TEST(invalid_objects) +{ + struct winesync_mutex_args mutex_args = {0}; + struct winesync_wait_args wait_args = {0}; + struct winesync_sem_args sem_args = {0}; + __u32 objs[2] = {0}; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_SEM, &sem_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_PUT_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + wait_args.objs = (uintptr_t)objs; + wait_args.count = 1; + ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + ret = ioctl(fd, WINESYNC_IOC_WAIT_ALL, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &objs[0]); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + sem_args.max = 1; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + + mutex_args.mutex = sem_args.sem; + ret = ioctl(fd, WINESYNC_IOC_PUT_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_MUTEX, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + objs[0] = sem_args.sem; + objs[1] = sem_args.sem + 1; + wait_args.count = 2; + ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + ret = ioctl(fd, WINESYNC_IOC_WAIT_ALL, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + objs[0] = sem_args.sem + 1; + objs[1] = sem_args.sem; + ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + ret = ioctl(fd, WINESYNC_IOC_WAIT_ALL, &wait_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem_args.sem); + EXPECT_EQ(0, ret); + + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + + sem_args.sem = mutex_args.mutex; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_SEM, &sem_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST_HARNESS_MAIN From b3d542553d88da8c569f2ba2a51e16d2f77625d8 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:09:32 -0600 Subject: [PATCH 18/48] selftests: winesync: Add some tests for wakeup signaling with WINESYNC_IOC_WAIT_ANY. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 8a9fb496f5e04d..04855df0089402 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -636,4 +636,158 @@ TEST(invalid_objects) close(fd); } +struct wake_args +{ + int fd; + __u32 obj; +}; + +struct wait_args +{ + int fd; + unsigned long request; + struct winesync_wait_args *args; + int ret; + int err; +}; + +static void *wait_thread(void *arg) +{ + struct wait_args *args = arg; + + args->ret = ioctl(args->fd, args->request, args->args); + args->err = errno; + return NULL; +} + +static void get_abs_timeout(struct timespec *timeout, clockid_t clock, + unsigned int ms) +{ + clock_gettime(clock, timeout); + timeout->tv_nsec += ms * 1000000; + timeout->tv_sec += (timeout->tv_nsec / 1000000000); + timeout->tv_nsec %= 1000000000; +} + +static int wait_for_thread(pthread_t thread, unsigned int ms) +{ + struct timespec timeout; + get_abs_timeout(&timeout, CLOCK_REALTIME, ms); + return pthread_timedjoin_np(thread, NULL, &timeout); +} + +TEST(wake_any) +{ + struct winesync_mutex_args mutex_args = {0}; + struct winesync_wait_args wait_args = {0}; + struct winesync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 objs[2], count, index; + struct timespec timeout; + pthread_t thread; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + + mutex_args.owner = 123; + mutex_args.count = 1; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + + objs[0] = sem_args.sem; + objs[1] = mutex_args.mutex; + + /* test waking the semaphore */ + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + wait_args.timeout = (uintptr_t)&timeout; + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 456; + wait_args.index = 0xdeadbeef; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = WINESYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + sem_args.count = 1; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + check_sem_state(fd, sem_args.sem, 0, 3); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(0, wait_args.index); + + /* test waking the mutex */ + + /* first grab it again for owner 123 */ + ret = wait_any(fd, 1, &mutex_args.mutex, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + wait_args.owner = 456; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = put_mutex(fd, mutex_args.mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + ret = put_mutex(fd, mutex_args.mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, mutex_args.count); + check_mutex_state(fd, mutex_args.mutex, 1, 456); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + /* delete an object while it's being waited on */ + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem_args.sem); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + TEST_HARNESS_MAIN From 6016d6fa5698979e7a2348c1484950aac732da68 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:09:36 -0600 Subject: [PATCH 19/48] selftests: winesync: Add some tests for wakeup signaling with WINESYNC_IOC_WAIT_ALL. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 04855df0089402..ad6d0f9a2a35b5 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -790,4 +790,106 @@ TEST(wake_any) close(fd); } +TEST(wake_all) +{ + struct winesync_mutex_args mutex_args = {0}; + struct winesync_wait_args wait_args = {0}; + struct winesync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 objs[2], count, index; + struct timespec timeout; + pthread_t thread; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + + mutex_args.owner = 123; + mutex_args.count = 1; + mutex_args.mutex = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, mutex_args.mutex); + + objs[0] = sem_args.sem; + objs[1] = mutex_args.mutex; + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + wait_args.timeout = (uintptr_t)&timeout; + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 456; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = WINESYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + sem_args.count = 1; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_sem_state(fd, sem_args.sem, 1, 3); + + ret = wait_any(fd, 1, &sem_args.sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = put_mutex(fd, mutex_args.mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_mutex_state(fd, mutex_args.mutex, 0, 0); + + sem_args.count = 2; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, sem_args.count); + check_sem_state(fd, sem_args.sem, 1, 3); + check_mutex_state(fd, mutex_args.mutex, 1, 456); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + + /* delete an object while it's being waited on */ + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &sem_args.sem); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + TEST_HARNESS_MAIN From e7b33ff77db040acd55fb99e1e2917326df6e3e9 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Fri, 5 Mar 2021 12:22:55 -0600 Subject: [PATCH 20/48] maintainers: Add an entry for winesync. Signed-off-by: Kai Krakow --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 07a9c274c0e298..7dc1c1144fd5b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22173,6 +22173,15 @@ M: David Härdeman S: Maintained F: drivers/media/rc/winbond-cir.c +WINESYNC SYNCHRONIZATION PRIMITIVE DRIVER +M: Zebediah Figura +L: wine-devel@winehq.org +S: Supported +F: Documentation/userspace-api/winesync.rst +F: drivers/misc/winesync.c +F: include/uapi/linux/winesync.h +F: tools/testing/selftests/drivers/winesync/ + WINSYSTEMS EBC-C384 WATCHDOG DRIVER M: William Breathitt Gray L: linux-watchdog@vger.kernel.org From 951a50b481d26613e19fc7d1e1693a9ba30999a5 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 18:21:03 -0600 Subject: [PATCH 21/48] winesync: Introduce WINESYNC_IOC_CREATE_EVENT. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 65 +++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 8 +++++ 2 files changed, 73 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index eae272663abe7c..eaba41510784ec 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -17,6 +17,7 @@ enum winesync_type { WINESYNC_TYPE_SEM, WINESYNC_TYPE_MUTEX, + WINESYNC_TYPE_EVENT, }; struct winesync_obj { @@ -66,6 +67,10 @@ struct winesync_obj { __u32 owner; bool ownerdead; } mutex; + struct { + bool manual; + bool signaled; + } event; } u; }; @@ -199,6 +204,8 @@ static bool is_signaled(struct winesync_obj *obj, __u32 owner) if (obj->u.mutex.owner && obj->u.mutex.owner != owner) return false; return obj->u.mutex.count < UINT_MAX; + case WINESYNC_TYPE_EVENT: + return obj->u.event.signaled; } WARN(1, "bad object type %#x\n", obj->type); @@ -248,6 +255,10 @@ static void try_wake_all(struct winesync_device *dev, struct winesync_q *q, obj->u.mutex.count++; obj->u.mutex.owner = q->owner; break; + case WINESYNC_TYPE_EVENT: + if (!obj->u.event.manual) + obj->u.event.signaled = false; + break; } } wake_up_process(q->task); @@ -315,6 +326,26 @@ static void try_wake_any_mutex(struct winesync_obj *mutex) } } +static void try_wake_any_event(struct winesync_obj *event) +{ + struct winesync_q_entry *entry; + + lockdep_assert_held(&event->lock); + + list_for_each_entry(entry, &event->any_waiters, node) { + struct winesync_q *q = entry->q; + + if (!event->u.event.signaled) + break; + + if (atomic_cmpxchg(&q->signaled, -1, entry->index) == -1) { + if (!event->u.event.manual) + event->u.event.signaled = false; + wake_up_process(q->task); + } + } +} + static int winesync_create_sem(struct winesync_device *dev, void __user *argp) { struct winesync_sem_args __user *user_args = argp; @@ -379,6 +410,35 @@ static int winesync_create_mutex(struct winesync_device *dev, void __user *argp) return put_user(id, &user_args->mutex); } +static int winesync_create_event(struct winesync_device *dev, void __user *argp) +{ + struct winesync_event_args __user *user_args = argp; + struct winesync_event_args args; + struct winesync_obj *event; + __u32 id; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + init_obj(event); + event->type = WINESYNC_TYPE_EVENT; + event->u.event.manual = args.manual; + event->u.event.signaled = args.signaled; + + ret = xa_alloc(&dev->objects, &id, event, xa_limit_32b, GFP_KERNEL); + if (ret < 0) { + kfree(event); + return ret; + } + + return put_user(id, &user_args->event); +} + static int winesync_delete(struct winesync_device *dev, void __user *argp) { struct winesync_obj *obj; @@ -760,6 +820,9 @@ static void try_wake_any_obj(struct winesync_obj *obj) case WINESYNC_TYPE_MUTEX: try_wake_any_mutex(obj); break; + case WINESYNC_TYPE_EVENT: + try_wake_any_event(obj); + break; } } @@ -925,6 +988,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)parm; switch (cmd) { + case WINESYNC_IOC_CREATE_EVENT: + return winesync_create_event(dev, argp); case WINESYNC_IOC_CREATE_MUTEX: return winesync_create_mutex(dev, argp); case WINESYNC_IOC_CREATE_SEM: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 3371a303a9270a..3999407534e099 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -22,6 +22,12 @@ struct winesync_mutex_args { __u32 count; }; +struct winesync_event_args { + __u32 event; + __u32 manual; + __u32 signaled; +}; + struct winesync_wait_args { __u64 timeout; __u64 objs; @@ -51,5 +57,7 @@ struct winesync_wait_args { struct winesync_sem_args) #define WINESYNC_IOC_READ_MUTEX _IOWR(WINESYNC_IOC_BASE, 9, \ struct winesync_mutex_args) +#define WINESYNC_IOC_CREATE_EVENT _IOWR(WINESYNC_IOC_BASE, 10, \ + struct winesync_event_args) #endif From 7f3746bca5101d11a26a004c6357f65b42666394 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 18:43:30 -0600 Subject: [PATCH 22/48] winesync: Introduce WINESYNC_IOC_SET_EVENT. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 45 +++++++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 47 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index eaba41510784ec..658ad7b80c291b 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -704,6 +704,49 @@ static int winesync_kill_owner(struct winesync_device *dev, void __user *argp) return 0; } +static int winesync_set_event(struct winesync_device *dev, void __user *argp) +{ + struct winesync_event_args __user *user_args = argp; + struct winesync_event_args args; + struct winesync_obj *event; + bool prev_state; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + event = get_obj_typed(dev, args.event, WINESYNC_TYPE_EVENT); + if (!event) + return -EINVAL; + + if (atomic_read(&event->all_hint) > 0) { + spin_lock(&dev->wait_all_lock); + spin_lock(&event->lock); + + prev_state = event->u.event.signaled; + event->u.event.signaled = true; + try_wake_all_obj(dev, event); + try_wake_any_event(event); + + spin_unlock(&event->lock); + spin_unlock(&dev->wait_all_lock); + } else { + spin_lock(&event->lock); + + prev_state = event->u.event.signaled; + event->u.event.signaled = true; + try_wake_any_event(event); + + spin_unlock(&event->lock); + } + + put_obj(event); + + if (put_user(prev_state, &user_args->signaled)) + return -EFAULT; + + return 0; +} + static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) { int ret = 0; @@ -1006,6 +1049,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_read_mutex(dev, argp); case WINESYNC_IOC_READ_SEM: return winesync_read_sem(dev, argp); + case WINESYNC_IOC_SET_EVENT: + return winesync_set_event(dev, argp); case WINESYNC_IOC_WAIT_ALL: return winesync_wait_all(dev, argp); case WINESYNC_IOC_WAIT_ANY: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 3999407534e099..34cd65d879a847 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -59,5 +59,7 @@ struct winesync_wait_args { struct winesync_mutex_args) #define WINESYNC_IOC_CREATE_EVENT _IOWR(WINESYNC_IOC_BASE, 10, \ struct winesync_event_args) +#define WINESYNC_IOC_SET_EVENT _IOWR(WINESYNC_IOC_BASE, 11, \ + struct winesync_event_args) #endif From 344afe7a58303bf2f3b1b1d9b30685089e8a4039 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 19:00:25 -0600 Subject: [PATCH 23/48] winesync: Introduce WINESYNC_IOC_RESET_EVENT. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 31 +++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 658ad7b80c291b..a93f173127f44c 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -747,6 +747,35 @@ static int winesync_set_event(struct winesync_device *dev, void __user *argp) return 0; } +static int winesync_reset_event(struct winesync_device *dev, void __user *argp) +{ + struct winesync_event_args __user *user_args = argp; + struct winesync_event_args args; + struct winesync_obj *event; + bool prev_state; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + event = get_obj_typed(dev, args.event, WINESYNC_TYPE_EVENT); + if (!event) + return -EINVAL; + + spin_lock(&event->lock); + + prev_state = event->u.event.signaled; + event->u.event.signaled = false; + + spin_unlock(&event->lock); + + put_obj(event); + + if (put_user(prev_state, &user_args->signaled)) + return -EFAULT; + + return 0; +} + static int winesync_schedule(const struct winesync_q *q, ktime_t *timeout) { int ret = 0; @@ -1049,6 +1078,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_read_mutex(dev, argp); case WINESYNC_IOC_READ_SEM: return winesync_read_sem(dev, argp); + case WINESYNC_IOC_RESET_EVENT: + return winesync_reset_event(dev, argp); case WINESYNC_IOC_SET_EVENT: return winesync_set_event(dev, argp); case WINESYNC_IOC_WAIT_ALL: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 34cd65d879a847..e71271fc44ba87 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -61,5 +61,7 @@ struct winesync_wait_args { struct winesync_event_args) #define WINESYNC_IOC_SET_EVENT _IOWR(WINESYNC_IOC_BASE, 11, \ struct winesync_event_args) +#define WINESYNC_IOC_RESET_EVENT _IOWR(WINESYNC_IOC_BASE, 12, \ + struct winesync_event_args) #endif From 51e15853338b65b2fe3209ad70c9e83ceea1462d Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 19:10:12 -0600 Subject: [PATCH 24/48] winesync: Introduce WINESYNC_IOC_PULSE_EVENT. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 11 +++++++++-- include/uapi/linux/winesync.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index a93f173127f44c..27d5baa457dfcc 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -704,7 +704,8 @@ static int winesync_kill_owner(struct winesync_device *dev, void __user *argp) return 0; } -static int winesync_set_event(struct winesync_device *dev, void __user *argp) +static int winesync_set_event(struct winesync_device *dev, void __user *argp, + bool pulse) { struct winesync_event_args __user *user_args = argp; struct winesync_event_args args; @@ -726,6 +727,8 @@ static int winesync_set_event(struct winesync_device *dev, void __user *argp) event->u.event.signaled = true; try_wake_all_obj(dev, event); try_wake_any_event(event); + if (pulse) + event->u.event.signaled = false; spin_unlock(&event->lock); spin_unlock(&dev->wait_all_lock); @@ -735,6 +738,8 @@ static int winesync_set_event(struct winesync_device *dev, void __user *argp) prev_state = event->u.event.signaled; event->u.event.signaled = true; try_wake_any_event(event); + if (pulse) + event->u.event.signaled = false; spin_unlock(&event->lock); } @@ -1070,6 +1075,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_delete(dev, argp); case WINESYNC_IOC_KILL_OWNER: return winesync_kill_owner(dev, argp); + case WINESYNC_IOC_PULSE_EVENT: + return winesync_set_event(dev, argp, true); case WINESYNC_IOC_PUT_MUTEX: return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: @@ -1081,7 +1088,7 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, case WINESYNC_IOC_RESET_EVENT: return winesync_reset_event(dev, argp); case WINESYNC_IOC_SET_EVENT: - return winesync_set_event(dev, argp); + return winesync_set_event(dev, argp, false); case WINESYNC_IOC_WAIT_ALL: return winesync_wait_all(dev, argp); case WINESYNC_IOC_WAIT_ANY: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index e71271fc44ba87..7c09d0e9733cdf 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -63,5 +63,7 @@ struct winesync_wait_args { struct winesync_event_args) #define WINESYNC_IOC_RESET_EVENT _IOWR(WINESYNC_IOC_BASE, 12, \ struct winesync_event_args) +#define WINESYNC_IOC_PULSE_EVENT _IOWR(WINESYNC_IOC_BASE, 13, \ + struct winesync_event_args) #endif From 76a83ac803ebc0728ea5973dcc450adfe406ba09 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 19:14:00 -0600 Subject: [PATCH 25/48] winesync: Introduce WINESYNC_IOC_READ_EVENT. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 30 ++++++++++++++++++++++++++++++ include/uapi/linux/winesync.h | 2 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 27d5baa457dfcc..0f8a8a94eef833 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -639,6 +639,34 @@ static int winesync_read_mutex(struct winesync_device *dev, void __user *argp) return ret; } +static int winesync_read_event(struct winesync_device *dev, void __user *argp) +{ + struct winesync_event_args __user *user_args = argp; + struct winesync_event_args args; + struct winesync_obj *event; + __u32 id; + int ret; + + if (get_user(id, &user_args->event)) + return -EFAULT; + + event = get_obj_typed(dev, id, WINESYNC_TYPE_EVENT); + if (!event) + return -EINVAL; + + args.event = id; + spin_lock(&event->lock); + args.manual = event->u.event.manual; + args.signaled = event->u.event.signaled; + spin_unlock(&event->lock); + + put_obj(event); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return ret; +} + /* * Actually change the mutex state to mark its owner as dead. */ @@ -1081,6 +1109,8 @@ static long winesync_char_ioctl(struct file *file, unsigned int cmd, return winesync_put_mutex(dev, argp); case WINESYNC_IOC_PUT_SEM: return winesync_put_sem(dev, argp); + case WINESYNC_IOC_READ_EVENT: + return winesync_read_event(dev, argp); case WINESYNC_IOC_READ_MUTEX: return winesync_read_mutex(dev, argp); case WINESYNC_IOC_READ_SEM: diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index 7c09d0e9733cdf..fb3788339ffe9a 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -65,5 +65,7 @@ struct winesync_wait_args { struct winesync_event_args) #define WINESYNC_IOC_PULSE_EVENT _IOWR(WINESYNC_IOC_BASE, 13, \ struct winesync_event_args) +#define WINESYNC_IOC_READ_EVENT _IOWR(WINESYNC_IOC_BASE, 14, \ + struct winesync_event_args) #endif From 8854d6d4d8728548b2b622a6db4a82eb3fef7444 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 19:34:47 -0600 Subject: [PATCH 26/48] selftests: winesync: Add some tests for manual-reset event state. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index ad6d0f9a2a35b5..7e99f09b113b8d 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -85,6 +85,30 @@ static int put_mutex(int fd, __u32 mutex, __u32 owner, __u32 *count) return ret; } +static int read_event_state(int fd, __u32 event, __u32 *signaled, __u32 *manual) +{ + struct winesync_event_args args; + int ret; + + args.event = event; + args.signaled = 0xdeadbeef; + args.manual = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_READ_EVENT, &args); + *signaled = args.signaled; + *manual = args.manual; + return ret; +} + +#define check_event_state(fd, event, signaled, manual) \ + ({ \ + __u32 __signaled, __manual; \ + int ret = read_event_state((fd), (event), \ + &__signaled, &__manual); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((signaled), __signaled); \ + EXPECT_EQ((manual), __manual); \ + }) + static int wait_objs(int fd, unsigned long request, __u32 count, const __u32 *objs, __u32 owner, __u32 *index) { @@ -350,6 +374,74 @@ TEST(mutex_state) close(fd); } +TEST(manual_event_state) +{ + struct winesync_event_args event_args; + __u32 index; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 1; + event_args.signaled = 0; + event_args.event = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, event_args.event); + check_event_state(fd, event_args.event, 0, 1); + + event_args.signaled = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 1, 1); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + check_event_state(fd, event_args.event, 1, 1); + + ret = wait_any(fd, 1, &event_args.event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(fd, event_args.event, 1, 1); + + event_args.signaled = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + check_event_state(fd, event_args.event, 0, 1); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 1); + + ret = wait_any(fd, 1, &event_args.event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + check_event_state(fd, event_args.event, 0, 1); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 1); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST(test_wait_any) { struct winesync_mutex_args mutex_args = {0}; From ec4618bf4932e73056e00503fa6c4ce6d5763b3b Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 19:45:39 -0600 Subject: [PATCH 27/48] selftests: winesync: Add some tests for auto-reset event state. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 7e99f09b113b8d..3a9ac69308afc0 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -442,6 +442,65 @@ TEST(manual_event_state) close(fd); } +TEST(auto_event_state) +{ + struct winesync_event_args event_args; + __u32 index; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 0; + event_args.signaled = 1; + event_args.event = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, event_args.event); + + check_event_state(fd, event_args.event, 1, 0); + + event_args.signaled = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + check_event_state(fd, event_args.event, 1, 0); + + ret = wait_any(fd, 1, &event_args.event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(fd, event_args.event, 0, 0); + + event_args.signaled = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 0); + + ret = wait_any(fd, 1, &event_args.event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + check_event_state(fd, event_args.event, 0, 0); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 0); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST(test_wait_any) { struct winesync_mutex_args mutex_args = {0}; From d51e698e68362c62af955ef7f947004b94b2ca4e Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 21:00:50 -0600 Subject: [PATCH 28/48] selftests: winesync: Add some tests for wakeup signaling with events. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 152 +++++++++++++++++- 1 file changed, 150 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 3a9ac69308afc0..2ccc51510230bf 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -610,6 +610,7 @@ TEST(test_wait_any) TEST(test_wait_all) { + struct winesync_event_args event_args = {0}; struct winesync_mutex_args mutex_args = {0}; struct winesync_sem_args sem_args = {0}; __u32 objs[2], owner, index; @@ -632,6 +633,11 @@ TEST(test_wait_all) EXPECT_EQ(0, ret); EXPECT_NE(0xdeadbeef, mutex_args.mutex); + event_args.manual = true; + event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + objs[0] = sem_args.sem; objs[1] = mutex_args.mutex; @@ -680,6 +686,14 @@ TEST(test_wait_all) check_sem_state(fd, sem_args.sem, 1, 3); check_mutex_state(fd, mutex_args.mutex, 1, 123); + objs[0] = sem_args.sem; + objs[1] = event_args.event; + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(fd, sem_args.sem, 0, 3); + check_event_state(fd, event_args.event, 1, 1); + /* test waiting on the same object twice */ objs[0] = objs[1] = sem_args.sem; ret = wait_all(fd, 2, objs, 123, &index); @@ -690,6 +704,8 @@ TEST(test_wait_all) EXPECT_EQ(0, ret); ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); close(fd); } @@ -829,6 +845,7 @@ static int wait_for_thread(pthread_t thread, unsigned int ms) TEST(wake_any) { + struct winesync_event_args event_args = {0}; struct winesync_mutex_args mutex_args = {0}; struct winesync_wait_args wait_args = {0}; struct winesync_sem_args sem_args = {0}; @@ -918,10 +935,103 @@ TEST(wake_any) EXPECT_EQ(0, thread_args.ret); EXPECT_EQ(1, wait_args.index); + /* test waking events */ + + event_args.manual = false; + event_args.signaled = false; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + objs[1] = event_args.event; + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + event_args.manual = true; + event_args.signaled = false; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + objs[1] = event_args.event; + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 1, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, event_args.signaled); + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, event_args.signaled); + check_event_state(fd, event_args.event, 0, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + /* delete an object while it's being waited on */ get_abs_timeout(&timeout, CLOCK_MONOTONIC, 200); wait_args.owner = 123; + objs[1] = mutex_args.mutex; ret = pthread_create(&thread, NULL, wait_thread, &thread_args); EXPECT_EQ(0, ret); @@ -943,11 +1053,13 @@ TEST(wake_any) TEST(wake_all) { + struct winesync_event_args manual_event_args = {0}; + struct winesync_event_args auto_event_args = {0}; struct winesync_mutex_args mutex_args = {0}; struct winesync_wait_args wait_args = {0}; struct winesync_sem_args sem_args = {0}; struct wait_args thread_args; - __u32 objs[2], count, index; + __u32 objs[4], count, index; struct timespec timeout; pthread_t thread; int fd, ret; @@ -969,13 +1081,25 @@ TEST(wake_all) EXPECT_EQ(0, ret); EXPECT_NE(0xdeadbeef, mutex_args.mutex); + manual_event_args.manual = true; + manual_event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &manual_event_args); + EXPECT_EQ(0, ret); + + auto_event_args.manual = false; + auto_event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &auto_event_args); + EXPECT_EQ(0, ret); + objs[0] = sem_args.sem; objs[1] = mutex_args.mutex; + objs[2] = manual_event_args.event; + objs[3] = auto_event_args.event; get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); wait_args.timeout = (uintptr_t)&timeout; wait_args.objs = (uintptr_t)objs; - wait_args.count = 2; + wait_args.count = 4; wait_args.owner = 456; thread_args.fd = fd; thread_args.args = &wait_args; @@ -1009,12 +1133,32 @@ TEST(wake_all) check_mutex_state(fd, mutex_args.mutex, 0, 0); + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &manual_event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, manual_event_args.signaled); + sem_args.count = 2; ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); EXPECT_EQ(0, ret); EXPECT_EQ(0, sem_args.count); + check_sem_state(fd, sem_args.sem, 2, 3); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &auto_event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, auto_event_args.signaled); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &manual_event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, manual_event_args.signaled); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &auto_event_args); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, auto_event_args.signaled); + check_sem_state(fd, sem_args.sem, 1, 3); check_mutex_state(fd, mutex_args.mutex, 1, 456); + check_event_state(fd, manual_event_args.event, 1, 1); + check_event_state(fd, auto_event_args.event, 0, 0); ret = wait_for_thread(thread, 100); EXPECT_EQ(0, ret); @@ -1034,6 +1178,10 @@ TEST(wake_all) EXPECT_EQ(0, ret); ret = ioctl(fd, WINESYNC_IOC_DELETE, &mutex_args.mutex); EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &manual_event_args.event); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &auto_event_args.event); + EXPECT_EQ(0, ret); ret = wait_for_thread(thread, 200); EXPECT_EQ(0, ret); From a6136d46f2f06c8dcb3d1561c0d4cb3f48188944 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 21:06:22 -0600 Subject: [PATCH 29/48] selftests: winesync: Add some tests for invalid object handling with events. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index 2ccc51510230bf..f2e18836c73372 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -712,6 +712,7 @@ TEST(test_wait_all) TEST(invalid_objects) { + struct winesync_event_args event_args = {0}; struct winesync_mutex_args mutex_args = {0}; struct winesync_wait_args wait_args = {0}; struct winesync_sem_args sem_args = {0}; @@ -737,6 +738,22 @@ TEST(invalid_objects) EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno); + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + wait_args.objs = (uintptr_t)objs; wait_args.count = 1; ret = ioctl(fd, WINESYNC_IOC_WAIT_ANY, &wait_args); @@ -763,6 +780,23 @@ TEST(invalid_objects) EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno); + event_args.event = sem_args.sem; + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_PULSE_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = ioctl(fd, WINESYNC_IOC_READ_EVENT, &event_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + objs[0] = sem_args.sem; objs[1] = sem_args.sem + 1; wait_args.count = 2; From 09c86f23e90f7136150b5f1c45e56b4d1675f3a5 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 19 Jan 2022 22:01:46 -0600 Subject: [PATCH 30/48] docs: winesync: Document event APIs. Signed-off-by: Kai Krakow --- Documentation/userspace-api/winesync.rst | 104 ++++++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/Documentation/userspace-api/winesync.rst b/Documentation/userspace-api/winesync.rst index 34e54be229cfc2..ffa2f8fbc7e3d0 100644 --- a/Documentation/userspace-api/winesync.rst +++ b/Documentation/userspace-api/winesync.rst @@ -18,8 +18,8 @@ interfaces such as futex(2) and poll(2). Synchronization primitives ========================== -The winesync driver exposes two types of synchronization primitives, -semaphores and mutexes. +The winesync driver exposes three types of synchronization primitives: +semaphores, mutexes, and events. A semaphore holds a single volatile 32-bit counter, and a static 32-bit integer denoting the maximum value. It is considered signaled @@ -45,6 +45,12 @@ intended use is to store a thread identifier; however, the winesync driver does not actually validate that a calling thread provides consistent or unique identifiers. +An event holds a volatile boolean state denoting whether it is +signaled or not. There are two types of events, auto-reset and +manual-reset. An auto-reset event is designaled when a wait is +satisfied; a manual-reset event is not. The event type is specified +when the event is created. + Unless specified otherwise, all operations on an object are atomic and totally ordered with respect to other operations on the same object. @@ -78,6 +84,12 @@ structures used in ioctl calls:: __u32 count; }; + struct winesync_event_args { + __u32 event; + __u32 signaled; + __u32 manual; + }; + struct winesync_wait_args { __u64 timeout; __u64 objs; @@ -125,6 +137,22 @@ The ioctls are as follows: If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is zero and ``count`` is nonzero, the function fails with ``EINVAL``. +.. c:macro:: WINESYNC_IOC_CREATE_EVENT + + Create an event object. Takes a pointer to struct + :c:type:`winesync_event_args`, which is used as follows: + + .. list-table:: + + * - ``event`` + - On output, contains the identifier of the created event. + * - ``signaled`` + - If nonzero, the event is initially signaled, otherwise + nonsignaled. + * - ``manual`` + - If nonzero, the event is a manual-reset event, otherwise + auto-reset. + .. c:macro:: WINESYNC_IOC_DELETE Delete an object of any type. Takes an input-only pointer to a @@ -178,6 +206,60 @@ The ioctls are as follows: unowned and signaled, and eligible threads waiting on it will be woken as appropriate. +.. c:macro:: WINESYNC_IOC_SET_EVENT + + Signal an event object. Takes a pointer to struct + :c:type:`winesync_event_args`, which is used as follows: + + .. list-table:: + + * - ``event`` + - Event object to set. + * - ``signaled`` + - On output, contains the previous state of the event. + * - ``manual`` + - Unused. + + Eligible threads will be woken, and auto-reset events will be + designaled appropriately. + +.. c:macro:: WINESYNC_IOC_RESET_EVENT + + Designal an event object. Takes a pointer to struct + :c:type:`winesync_event_args`, which is used as follows: + + .. list-table:: + + * - ``event`` + - Event object to reset. + * - ``signaled`` + - On output, contains the previous state of the event. + * - ``manual`` + - Unused. + +.. c:macro:: WINESYNC_IOC_PULSE_EVENT + + Wake threads waiting on an event object without leaving it in a + signaled state. Takes a pointer to struct + :c:type:`winesync_event_args`, which is used as follows: + + .. list-table:: + + * - ``event`` + - Event object to pulse. + * - ``signaled`` + - On output, contains the previous state of the event. + * - ``manual`` + - Unused. + + A pulse operation can be thought of as a set followed by a reset, + performed as a single atomic operation. If two threads are waiting + on an auto-reset event which is pulsed, only one will be woken. If + two threads are waiting a manual-reset event which is pulsed, both + will be woken. However, in both cases, the event will be unsignaled + afterwards, and a simultaneous read operation will always report the + event as unsignaled. + .. c:macro:: WINESYNC_IOC_READ_SEM Read the current state of a semaphore object. Takes a pointer to @@ -211,6 +293,21 @@ The ioctls are as follows: ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to zero. +.. c:macro:: WINESYNC_IOC_READ_EVENT + + Read the current state of an event object. Takes a pointer to struct + :c:type:`winesync_event_args`, which is used as follows: + + .. list-table:: + + * - ``event`` + - Event object. + * - ``signaled`` + - On output, contains the current state of the event. + * - ``manual`` + - On output, contains 1 if the event is a manual-reset event, + and 0 otherwise. + .. c:macro:: WINESYNC_IOC_KILL_OWNER Mark any mutexes owned by the given owner as unowned and @@ -272,7 +369,8 @@ The ioctls are as follows: considered to be signaled if it is unowned or if its owner matches the ``owner`` argument, and is acquired by incrementing its recursion count by one and setting its owner to the ``owner`` - argument. + argument. An auto-reset event is acquired by designaling it; a + manual-reset event is not affected by acquisition. Acquisition is atomic and totally ordered with respect to other operations on the same object. If two wait operations (with From b128c316cdd36db92532b95cf2f2aa4175933f6c Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 13 Apr 2022 20:02:39 -0500 Subject: [PATCH 31/48] winesync: Introduce alertable waits. Signed-off-by: Kai Krakow --- drivers/misc/winesync.c | 68 ++++++++++++++++++++++++++++++----- include/uapi/linux/winesync.h | 2 +- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/misc/winesync.c b/drivers/misc/winesync.c index 0f8a8a94eef833..64b379d846dbee 100644 --- a/drivers/misc/winesync.c +++ b/drivers/misc/winesync.c @@ -842,10 +842,11 @@ static int setup_wait(struct winesync_device *dev, const __u32 count = args->count; struct winesync_q *q; ktime_t timeout = 0; + __u32 total_count; __u32 *ids; __u32 i, j; - if (!args->owner || args->pad) + if (!args->owner) return -EINVAL; if (args->timeout) { @@ -859,7 +860,11 @@ static int setup_wait(struct winesync_device *dev, timeout = timespec64_to_ns(&to); } - ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + total_count = count; + if (args->alert) + total_count++; + + ids = kmalloc_array(total_count, sizeof(*ids), GFP_KERNEL); if (!ids) return -ENOMEM; if (copy_from_user(ids, u64_to_user_ptr(args->objs), @@ -867,8 +872,10 @@ static int setup_wait(struct winesync_device *dev, kfree(ids); return -EFAULT; } + if (args->alert) + ids[count] = args->alert; - q = kmalloc(struct_size(q, entries, count), GFP_KERNEL); + q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL); if (!q) { kfree(ids); return -ENOMEM; @@ -880,7 +887,7 @@ static int setup_wait(struct winesync_device *dev, q->ownerdead = false; q->count = count; - for (i = 0; i < count; i++) { + for (i = 0; i < total_count; i++) { struct winesync_q_entry *entry = &q->entries[i]; struct winesync_obj *obj = get_obj(dev, ids[i]); @@ -935,9 +942,9 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) { struct winesync_wait_args args; struct winesync_q *q; + __u32 i, total_count; ktime_t timeout; int signaled; - __u32 i; int ret; if (copy_from_user(&args, argp, sizeof(args))) @@ -947,9 +954,13 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) if (ret < 0) return ret; + total_count = args.count; + if (args.alert) + total_count++; + /* queue ourselves */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct winesync_q_entry *entry = &q->entries[i]; struct winesync_obj *obj = entry->obj; @@ -958,9 +969,15 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) spin_unlock(&obj->lock); } - /* check if we are already signaled */ + /* + * Check if we are already signaled. + * + * Note that the API requires that normal objects are checked before + * the alert event. Hence we queue the alert event last, and check + * objects in order. + */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct winesync_obj *obj = q->entries[i].obj; if (atomic_read(&q->signaled) != -1) @@ -977,7 +994,7 @@ static int winesync_wait_any(struct winesync_device *dev, void __user *argp) /* and finally, unqueue */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct winesync_q_entry *entry = &q->entries[i]; struct winesync_obj *obj = entry->obj; @@ -1037,6 +1054,14 @@ static int winesync_wait_all(struct winesync_device *dev, void __user *argp) */ list_add_tail(&entry->node, &obj->all_waiters); } + if (args.alert) { + struct winesync_q_entry *entry = &q->entries[args.count]; + struct winesync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_add_tail(&entry->node, &obj->any_waiters); + spin_unlock(&obj->lock); + } /* check if we are already signaled */ @@ -1044,6 +1069,21 @@ static int winesync_wait_all(struct winesync_device *dev, void __user *argp) spin_unlock(&dev->wait_all_lock); + /* + * Check if the alert event is signaled, making sure to do so only + * after checking if the other objects are signaled. + */ + + if (args.alert) { + struct winesync_obj *obj = q->entries[args.count].obj; + + if (atomic_read(&q->signaled) == -1) { + spin_lock(&obj->lock); + try_wake_any_obj(obj); + spin_unlock(&obj->lock); + } + } + /* sleep */ ret = winesync_schedule(q, args.timeout ? &timeout : NULL); @@ -1066,6 +1106,16 @@ static int winesync_wait_all(struct winesync_device *dev, void __user *argp) put_obj(obj); } + if (args.alert) { + struct winesync_q_entry *entry = &q->entries[args.count]; + struct winesync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_del(&entry->node); + spin_unlock(&obj->lock); + + put_obj(obj); + } spin_unlock(&dev->wait_all_lock); diff --git a/include/uapi/linux/winesync.h b/include/uapi/linux/winesync.h index fb3788339ffe9a..5b4e369f74693a 100644 --- a/include/uapi/linux/winesync.h +++ b/include/uapi/linux/winesync.h @@ -34,7 +34,7 @@ struct winesync_wait_args { __u32 count; __u32 owner; __u32 index; - __u32 pad; + __u32 alert; }; #define WINESYNC_IOC_BASE 0xf7 From 9e2ea60a4a2e4dacae8368ef3996f5199d86b8b1 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 20 Apr 2022 18:08:37 -0500 Subject: [PATCH 32/48] selftests: winesync: Add tests for alertable waits. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 191 +++++++++++++++++- 1 file changed, 188 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index f2e18836c73372..a87e3c48709b7c 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -110,7 +110,7 @@ static int read_event_state(int fd, __u32 event, __u32 *signaled, __u32 *manual) }) static int wait_objs(int fd, unsigned long request, __u32 count, - const __u32 *objs, __u32 owner, __u32 *index) + const __u32 *objs, __u32 owner, __u32 alert, __u32 *index) { struct winesync_wait_args args = {0}; struct timespec timeout; @@ -123,6 +123,7 @@ static int wait_objs(int fd, unsigned long request, __u32 count, args.objs = (uintptr_t)objs; args.owner = owner; args.index = 0xdeadbeef; + args.alert = alert; ret = ioctl(fd, request, &args); *index = args.index; return ret; @@ -131,13 +132,29 @@ static int wait_objs(int fd, unsigned long request, __u32 count, static int wait_any(int fd, __u32 count, const __u32 *objs, __u32 owner, __u32 *index) { - return wait_objs(fd, WINESYNC_IOC_WAIT_ANY, count, objs, owner, index); + return wait_objs(fd, WINESYNC_IOC_WAIT_ANY, + count, objs, owner, 0, index); } static int wait_all(int fd, __u32 count, const __u32 *objs, __u32 owner, __u32 *index) { - return wait_objs(fd, WINESYNC_IOC_WAIT_ALL, count, objs, owner, index); + return wait_objs(fd, WINESYNC_IOC_WAIT_ALL, + count, objs, owner, 0, index); +} + +static int wait_any_alert(int fd, __u32 count, const __u32 *objs, + __u32 owner, __u32 alert, __u32 *index) +{ + return wait_objs(fd, WINESYNC_IOC_WAIT_ANY, + count, objs, owner, alert, index); +} + +static int wait_all_alert(int fd, __u32 count, const __u32 *objs, + __u32 owner, __u32 alert, __u32 *index) +{ + return wait_objs(fd, WINESYNC_IOC_WAIT_ALL, + count, objs, owner, alert, index); } TEST(semaphore_state) @@ -1225,4 +1242,172 @@ TEST(wake_all) close(fd); } +TEST(alert_any) +{ + struct winesync_event_args event_args = {0}; + struct winesync_sem_args sem_args = {0}; + __u32 objs[2], index; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + objs[0] = sem_args.sem; + + sem_args.count = 1; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + objs[1] = sem_args.sem; + + event_args.manual = true; + event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + sem_args.sem = objs[0]; + sem_args.count = 1; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &objs[0]); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &objs[1]); + EXPECT_EQ(0, ret); + + close(fd); +} + +TEST(alert_all) +{ + struct winesync_event_args event_args = {0}; + struct winesync_sem_args sem_args = {0}; + __u32 objs[2], index; + int fd, ret; + + fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + objs[0] = sem_args.sem; + + sem_args.count = 1; + sem_args.max = 2; + sem_args.sem = 0xdeadbeef; + ret = ioctl(fd, WINESYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(0, ret); + EXPECT_NE(0xdeadbeef, sem_args.sem); + objs[1] = sem_args.sem; + + event_args.manual = true; + event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + ret = ioctl(fd, WINESYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_EQ(0, ret); + + sem_args.sem = objs[1]; + sem_args.count = 2; + ret = ioctl(fd, WINESYNC_IOC_PUT_SEM, &sem_args); + EXPECT_EQ(0, ret); + + ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); + EXPECT_EQ(0, ret); + + ret = ioctl(fd, WINESYNC_IOC_DELETE, &objs[0]); + EXPECT_EQ(0, ret); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &objs[1]); + EXPECT_EQ(0, ret); + + close(fd); +} + TEST_HARNESS_MAIN From e5702378c952ef437aa8f66cd97ca1664b970946 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 20 Apr 2022 18:24:43 -0500 Subject: [PATCH 33/48] serftests: winesync: Add some tests for wakeup signaling via alerts. Signed-off-by: Kai Krakow --- .../selftests/drivers/winesync/winesync.c | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tools/testing/selftests/drivers/winesync/winesync.c b/tools/testing/selftests/drivers/winesync/winesync.c index a87e3c48709b7c..169e922484b008 100644 --- a/tools/testing/selftests/drivers/winesync/winesync.c +++ b/tools/testing/selftests/drivers/winesync/winesync.c @@ -1245,8 +1245,12 @@ TEST(wake_all) TEST(alert_any) { struct winesync_event_args event_args = {0}; + struct winesync_wait_args wait_args = {0}; struct winesync_sem_args sem_args = {0}; + struct wait_args thread_args; + struct timespec timeout; __u32 objs[2], index; + pthread_t thread; int fd, ret; fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); @@ -1295,6 +1299,35 @@ TEST(alert_any) EXPECT_EQ(0, ret); EXPECT_EQ(2, index); + /* test wakeup via alert */ + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + wait_args.timeout = (uintptr_t)&timeout; + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event_args.event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = WINESYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); EXPECT_EQ(0, ret); @@ -1336,8 +1369,12 @@ TEST(alert_any) TEST(alert_all) { struct winesync_event_args event_args = {0}; + struct winesync_wait_args wait_args = {0}; struct winesync_sem_args sem_args = {0}; + struct wait_args thread_args; + struct timespec timeout; __u32 objs[2], index; + pthread_t thread; int fd, ret; fd = open("/dev/winesync", O_CLOEXEC | O_RDONLY); @@ -1372,6 +1409,35 @@ TEST(alert_all) EXPECT_EQ(0, ret); EXPECT_EQ(2, index); + /* test wakeup via alert */ + + ret = ioctl(fd, WINESYNC_IOC_RESET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + get_abs_timeout(&timeout, CLOCK_MONOTONIC, 1000); + wait_args.timeout = (uintptr_t)&timeout; + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event_args.event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = WINESYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(fd, WINESYNC_IOC_SET_EVENT, &event_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + ret = ioctl(fd, WINESYNC_IOC_DELETE, &event_args.event); EXPECT_EQ(0, ret); From d9231f50b695fc6c530977f249b2295ef01a926e Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Wed, 20 Apr 2022 18:58:17 -0500 Subject: [PATCH 34/48] docs: winesync: Document alertable waits. Signed-off-by: Kai Krakow --- Documentation/userspace-api/winesync.rst | 40 ++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/Documentation/userspace-api/winesync.rst b/Documentation/userspace-api/winesync.rst index ffa2f8fbc7e3d0..f0110d2744c709 100644 --- a/Documentation/userspace-api/winesync.rst +++ b/Documentation/userspace-api/winesync.rst @@ -354,9 +354,13 @@ The ioctls are as follows: ``EINVAL``. * - ``index`` - On success, contains the index (into ``objs``) of the object - which was signaled. - * - ``pad`` - - This field is not used and must be set to zero. + which was signaled. If ``alert`` was signaled instead, + this contains ``count``. + * - ``alert`` + - Optional event object identifier. If nonzero, this specifies + an "alert" event object which, if signaled, will terminate + the wait. If nonzero, the identifier must point to a valid + event. This function attempts to acquire one of the given objects. If unable to do so, it sleeps until an object becomes signaled, @@ -385,9 +389,19 @@ The ioctls are as follows: the given owner (with a recursion count of 1) and as no longer inconsistent, and ``index`` is still set to the index of the mutex. - It is valid to pass the same object more than once. If a wakeup - occurs due to that object being signaled, ``index`` is set to the - lowest index corresponding to that object. + The ``alert`` argument is an "extra" event which can terminate the + wait, independently of all other objects. If members of ``objs`` and + ``alert`` are both simultaneously signaled, a member of ``objs`` + will always be given priority and acquired first. Aside from this, + for "any" waits, there is no difference between passing an event as + this parameter, and passing it as an additional object at the end of + the ``objs`` array. For "all" waits, there is an additional + difference, as described below. + + It is valid to pass the same object more than once, including by + passing the same event in the ``objs`` array and in ``alert``. If a + wakeup occurs due to that object being signaled, ``index`` is set to + the lowest index corresponding to that object. The function may fail with ``EINTR`` if a signal is received. @@ -396,7 +410,7 @@ The ioctls are as follows: Poll on a list of objects, atomically acquiring all of them. Takes a pointer to struct :c:type:`winesync_wait_args`, which is used identically to ``WINESYNC_IOC_WAIT_ANY``, except that ``index`` is - always filled with zero on success. + always filled with zero on success if not woken via alert. This function attempts to simultaneously acquire all of the given objects. If unable to do so, it sleeps until all objects become @@ -417,6 +431,14 @@ The ioctls are as follows: objects are specified, there is no way to know which were marked as inconsistent. + As with "any" waits, the ``alert`` argument is an "extra" event + which can terminate the wait. Critically, however, an "all" wait + will succeed if all members in ``objs`` are signaled, *or* if + ``alert`` is signaled. In the latter case ``index`` will be set to + ``count``. As with "any" waits, if both conditions are filled, the + former takes priority, and objects in ``objs`` will be acquired. + Unlike ``WINESYNC_IOC_WAIT_ANY``, it is not valid to pass the same - object more than once. If this is attempted, the function fails with - ``EINVAL``. + object more than once, nor is it valid to pass the same object in + ``objs`` and in ``alert`` If this is attempted, the function fails + with ``EINVAL``. From bd14bef0d025a74d2329d72aa34f68bb7c968cd1 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:21:09 +0100 Subject: [PATCH 35/48] ZEN: INTERACTIVE: Enable background reclaim of hugepages Use [defer+madvise] as default khugepaged defrag strategy: For some reason, the default strategy to respond to THP fault fallbacks is still just madvise, meaning stall if the program wants transparent hugepages, but don't trigger a background reclaim / compaction if THP begins to fail allocations. This creates a snowball affect where we still use the THP code paths, but we almost always fail once a system has been active and busy for a while. The option "defer" was created for interactive systems where THP can still improve performance. If we have to fallback to a regular page due to an allocation failure or anything else, we will trigger a background reclaim and compaction so future THP attempts succeed and previous attempts eventually have their smaller pages combined without stalling running applications. We still want madvise to stall applications that explicitely want THP, so defer+madvise _does_ make a ton of sense. Make it the default for interactive systems, especially if the kernel maintainer left transparent hugepages on "always". Reasoning and details in the original patch: https://lwn.net/Articles/711248/ Signed-off-by: Kai Krakow --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b20fef29e5bb5d..29800361475dd9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -61,7 +61,7 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1< Date: Sun, 7 Nov 2021 15:10:03 +0200 Subject: [PATCH 36/48] blk: elevator: always use bfq unless overridden by flag Also add ifdefs so that elevator_get_default() remains unchanged with respect to upstream if CONFIG_IOSCHED_BFQ is disabled. Signed-off-by: Juuso Alasuutari --- block/elevator.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index bd71f0fc4e4b67..97466a2601129c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -630,19 +630,25 @@ static inline bool elv_support_iosched(struct request_queue *q) } /* - * For single queue devices, default to using mq-deadline. If we have multiple - * queues or mq-deadline is not available, default to "none". + * Always use bfq as our elevator if possible. If bfq is unavailable default + * to mq-deadline for single-queue and none for multi-queue devices. */ static struct elevator_type *elevator_get_default(struct request_queue *q) { if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL; +#ifndef CONFIG_IOSCHED_BFQ if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) return NULL; +#endif +#if defined(CONFIG_IOSCHED_BFQ) + return elevator_get(q, "bfq", false); +#else return elevator_get(q, "mq-deadline", false); +#endif } /* From b8c44ff6bfdfdc30f6db48dc559a589b861e566b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Wed, 7 Dec 2016 21:13:16 +1100 Subject: [PATCH 37/48] Make threaded IRQs optionally the default which can be disabled. Signed-off-by: Kai Krakow --- include/linux/interrupt.h | 3 +++ kernel/irq/Kconfig | 17 +++++++++++++++++ kernel/irq/manage.c | 11 +++++++++++ 3 files changed, 31 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 4a1dc88ddbff9a..74d33218cada80 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -509,6 +509,9 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, #ifdef CONFIG_IRQ_FORCED_THREADING # ifdef CONFIG_PREEMPT_RT # define force_irqthreads() (true) +# elif defined(CONFIG_FORCE_IRQ_THREADING) +DECLARE_STATIC_KEY_TRUE(force_irqthreads_key); +# define force_irqthreads() (static_branch_likely(&force_irqthreads_key)) # else DECLARE_STATIC_KEY_FALSE(force_irqthreads_key); # define force_irqthreads() (static_branch_unlikely(&force_irqthreads_key)) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index db3d174c53d484..6c926e417c3511 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -112,6 +112,23 @@ config GENERIC_IRQ_RESERVATION_MODE config IRQ_FORCED_THREADING bool +config FORCE_IRQ_THREADING + bool "Make IRQ threading compulsory" + depends on IRQ_FORCED_THREADING + default n + help + + Make IRQ threading mandatory for any IRQ handlers that support it + instead of being optional and requiring the threadirqs kernel + parameter. Instead they can be optionally disabled with the + nothreadirqs kernel parameter. + + Enabling this may make some architectures not boot with runqueue + sharing and MuQSS. + + Enable if you are building for a desktop or low latency system, + otherwise say N. + config SPARSE_IRQ bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ help diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 40fe7806cc8c99..d9923417fc3983 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -25,7 +25,18 @@ #include "internals.h" #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_FORCE_IRQ_THREADING +DEFINE_STATIC_KEY_TRUE(force_irqthreads_key); +#else DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); +#endif + +static int __init setup_noforced_irqthreads(char *arg) +{ + static_branch_disable(&force_irqthreads_key); + return 0; +} +early_param("nothreadirqs", setup_noforced_irqthreads); static int __init setup_forced_irqthreads(char *arg) { From 34292abe50827a3062a823c6b55486db5c5cfdee Mon Sep 17 00:00:00 2001 From: Paul Gofman Date: Wed, 6 May 2020 14:37:44 +0300 Subject: [PATCH 38/48] mm: Support soft dirty flag reset for VA range. Signed-off-by: Kai Krakow --- fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 26 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a954305fbc31b4..ad3582193f46df 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1073,6 +1073,8 @@ enum clear_refs_types { struct clear_refs_private { enum clear_refs_types type; + unsigned long start, end; + bool clear_range; }; #ifdef CONFIG_MEM_SOFT_DIRTY @@ -1164,6 +1166,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; + BUG_ON(addr < cp->start || end > cp->end); + ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { @@ -1220,9 +1224,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; - if (vma->vm_flags & VM_PFNMAP) + if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP)) return 1; + BUG_ON(start < cp->start || end > cp->end); + /* * Writing 1 to /proc/pid/clear_refs affects all pages. * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. @@ -1245,10 +1251,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF]; + char buffer[18]; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; + unsigned long start, end; + bool clear_range; int itype; int rv; @@ -1257,12 +1265,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - rv = kstrtoint(strstrip(buffer), 10, &itype); - if (rv < 0) - return rv; - type = (enum clear_refs_types)itype; - if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) - return -EINVAL; + + if (buffer[0] == '6') + { + static int once; + + if (!once++) + printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n"); + + if (count != 17) + return -EINVAL; + + type = CLEAR_REFS_SOFT_DIRTY; + start = *(unsigned long *)(buffer + 1); + end = *(unsigned long *)(buffer + 1 + 8); + } + else + { + rv = kstrtoint(strstrip(buffer), 10, &itype); + if (rv < 0) + return rv; + type = (enum clear_refs_types)itype; + + if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) + return -EINVAL; + + start = 0; + end = -1UL; + } task = get_proc_task(file_inode(file)); if (!task) @@ -1275,40 +1305,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .type = type, }; - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; + if (start || end != -1UL) + { + start = min(start, -1UL) & PAGE_MASK; + end = min(end, -1UL) & PAGE_MASK; + + if (start >= end) + { + count = -EINVAL; + goto out_mm; + } + clear_range = true; } + else + { + clear_range = false; + } + + cp.start = start; + cp.end = end; + cp.clear_range = clear_range; + if (type == CLEAR_REFS_MM_HIWATER_RSS) { + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } + /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - goto out_unlock; + mmap_write_unlock(mm); + goto out_mm; } if (type == CLEAR_REFS_SOFT_DIRTY) { - mas_for_each(&mas, vma, ULONG_MAX) { - if (!(vma->vm_flags & VM_SOFTDIRTY)) - continue; - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); + if (mmap_read_lock_killable(mm)) { + count = -EINTR; + goto out_mm; } - + if (!clear_range) + mas_for_each(&mas, vma, ULONG_MAX) { + if (!(vma->vm_flags & VM_SOFTDIRTY)) + continue; + mmap_read_unlock(mm); + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } + mas_for_each(&mas, vma, ULONG_MAX) { + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); + } + mmap_write_downgrade(mm); + break; + } inc_tlb_flush_pending(mm); mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, - 0, NULL, mm, 0, -1UL); + 0, NULL, mm, start, end); mmu_notifier_invalidate_range_start(&range); } - walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp); + else + { + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } + } + walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp); if (type == CLEAR_REFS_SOFT_DIRTY) { mmu_notifier_invalidate_range_end(&range); flush_tlb_mm(mm); dec_tlb_flush_pending(mm); + mmap_read_unlock(mm); + } + else + { + mmap_write_unlock(mm); } -out_unlock: - mmap_write_unlock(mm); out_mm: mmput(mm); } @@ -1340,6 +1416,7 @@ struct pagemapread { #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) +#define PM_SOFT_DIRTY_PAGE BIT_ULL(57) #define PM_UFFD_WP BIT_ULL(57) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) @@ -1414,13 +1491,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_PRESENT; page = vm_normal_page(vma, addr, pte); if (pte_soft_dirty(pte)) - flags |= PM_SOFT_DIRTY; + flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE; if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; } else if (is_swap_pte(pte)) { swp_entry_t entry; if (pte_swp_soft_dirty(pte)) - flags |= PM_SOFT_DIRTY; + flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE; if (pte_swp_uffd_wp(pte)) flags |= PM_UFFD_WP; entry = pte_to_swp_entry(pte); @@ -1480,7 +1557,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, flags |= PM_PRESENT; if (pmd_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; + flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE; if (pmd_uffd_wp(pmd)) flags |= PM_UFFD_WP; if (pm->show_pfn) @@ -1504,7 +1581,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, } flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; + flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE; if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); From 28cc36e404ab2b46d9b4a1c1b908c18cf2b95d96 Mon Sep 17 00:00:00 2001 From: Paul Gofman Date: Thu, 7 May 2020 14:05:31 +0300 Subject: [PATCH 39/48] mm: Support soft dirty flag read with reset. Signed-off-by: Kai Krakow --- fs/proc/base.c | 3 + fs/proc/internal.h | 1 + fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 130 insertions(+), 18 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 74442e01793f33..b81c23bfaf29af 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3281,6 +3281,9 @@ static const struct pid_entry tgid_base_stuff[] = { REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), +#ifdef CONFIG_MEM_SOFT_DIRTY + REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations), +#endif #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 6b921826d85b62..c1329edb2d1bd0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; +extern const struct file_operations proc_pagemap_reset_operations; extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ad3582193f46df..fc504528f16fd2 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1095,8 +1095,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, return page_maybe_dma_pinned(page); } -static inline void clear_soft_dirty(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte) +static inline bool clear_soft_dirty(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte) { /* * The soft-dirty tracker uses #PF-s to catch writes @@ -1105,37 +1105,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, * of how soft-dirty works. */ pte_t ptent = *pte; + bool ret = false; if (pte_present(ptent)) { pte_t old_pte; if (pte_is_pinned(vma, addr, ptent)) - return; + return ret; old_pte = ptep_modify_prot_start(vma, addr, pte); + ret = pte_soft_dirty(old_pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { + ret = pte_swp_soft_dirty(ptent); ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } + return ret; } #else -static inline void clear_soft_dirty(struct vm_area_struct *vma, +static inline bool clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { + return false; } #endif #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, +static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; + bool ret = false; if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); + + ret = pmd_soft_dirty(old); + if (pmd_dirty(old)) pmd = pmd_mkdirty(pmd); if (pmd_young(old)) @@ -1146,14 +1155,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + ret = pmd_swp_soft_dirty(pmd); pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } + return ret; } #else -static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, +static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { + return false; } #endif @@ -1406,6 +1418,7 @@ struct pagemapread { int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; bool show_pfn; + bool reset; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) @@ -1438,6 +1451,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, return 0; } +static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm) +{ + ((unsigned long *)pm->buffer)[pm->pos++] = addr; + if (pm->pos >= pm->len) + return PM_END_OF_BUFFER; + return 0; +} + static int pagemap_pte_hole(unsigned long start, unsigned long end, __always_unused int depth, struct mm_walk *walk) { @@ -1445,6 +1466,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, unsigned long addr = start; int err = 0; + if (pm->reset) + goto out; + while (addr < end) { struct vm_area_struct *vma = find_vma(walk->mm, addr); pagemap_entry_t pme = make_pme(0, 0); @@ -1479,8 +1503,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, } static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, - struct vm_area_struct *vma, unsigned long addr, pte_t pte) + struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr) { + pte_t pte = *pte_addr; u64 frame = 0, flags = 0; struct page *page = NULL; bool migration = false; @@ -1549,6 +1574,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; struct page *page = NULL; + if (pm->reset) + { + if (clear_soft_dirty_pmd(vma, addr, pmdp)) + { + for (; addr != end; addr += PAGE_SIZE) + { + err = add_addr_to_pagemap(addr, pm); + if (err) + break; + } + } + goto trans_huge_done; + } + if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1606,6 +1645,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, frame += (1 << MAX_SWAPFILES_SHIFT); } } +trans_huge_done: spin_unlock(ptl); return err; } @@ -1620,10 +1660,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, */ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); for (; addr < end; pte++, addr += PAGE_SIZE) { - pagemap_entry_t pme; + if (pm->reset) + { + if (clear_soft_dirty(vma, addr, pte)) + err = add_addr_to_pagemap(addr, pm); + } + else + { + pagemap_entry_t pme; - pme = pte_to_pagemap_entry(pm, vma, addr, *pte); - err = add_to_pagemap(addr, &pme, pm); + pme = pte_to_pagemap_entry(pm, vma, addr, pte); + err = add_to_pagemap(addr, &pme, pm); + } if (err) break; } @@ -1721,8 +1769,8 @@ static const struct mm_walk_ops pagemap_ops = { * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ -static ssize_t pagemap_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t do_pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos, bool reset) { struct mm_struct *mm = file->private_data; struct pagemapread pm; @@ -1731,6 +1779,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, unsigned long start_vaddr; unsigned long end_vaddr; int ret = 0, copied = 0; + struct mmu_notifier_range range; + size_t buffer_len; if (!mm || !mmget_not_zero(mm)) goto out; @@ -1746,19 +1796,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, /* do not disclose physical addresses: attack vector */ pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); + pm.reset = reset; - pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL); + buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES); + + pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL); ret = -ENOMEM; if (!pm.buffer) goto out_mm; src = *ppos; svpfn = src / PM_ENTRY_BYTES; - end_vaddr = mm->task_size; + + start_vaddr = svpfn << PAGE_SHIFT; + + if (reset) + { + if (count < sizeof(end_vaddr)) + { + ret = -EINVAL; + goto out_mm; + } + if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr))) + return -EFAULT; + end_vaddr = min(end_vaddr, mm->task_size); + } + else + { + end_vaddr = mm->task_size; + start_vaddr = end_vaddr; + } /* watch out for wraparound */ - start_vaddr = end_vaddr; if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); @@ -1778,18 +1847,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, unsigned long end; pm.pos = 0; - end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; + pm.len = min(buffer_len, count / PM_ENTRY_BYTES); + + end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT)); /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; + ret = mmap_read_lock_killable(mm); if (ret) goto out_free; + + if (reset) + { + inc_tlb_flush_pending(mm); + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, + 0, NULL, mm, start_vaddr, end); + mmu_notifier_invalidate_range_start(&range); + } ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); + if (reset) + { + mmu_notifier_invalidate_range_end(&range); + flush_tlb_mm(mm); + dec_tlb_flush_pending(mm); + } mmap_read_unlock(mm); - start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); + BUG_ON(ret && ret != PM_END_OF_BUFFER); if (copy_to_user(buf, pm.buffer, len)) { ret = -EFAULT; goto out_free; @@ -1797,6 +1883,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, copied += len; buf += len; count -= len; + + start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end; } *ppos += copied; if (!ret || ret == PM_END_OF_BUFFER) @@ -1810,6 +1898,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, return ret; } +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return do_pagemap_read(file, buf, count, ppos, false); +} + +static ssize_t pagemap_reset_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return do_pagemap_read(file, buf, count, ppos, true); +} + static int pagemap_open(struct inode *inode, struct file *file) { struct mm_struct *mm; @@ -1836,6 +1936,14 @@ const struct file_operations proc_pagemap_operations = { .open = pagemap_open, .release = pagemap_release, }; + +const struct file_operations proc_pagemap_reset_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_reset_read, + .open = pagemap_open, + .release = pagemap_release, +}; + #endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA From 3784f6646c4bbd4bc2164bfcff0c0b597aa7360d Mon Sep 17 00:00:00 2001 From: Mark Weiman Date: Sun, 12 Aug 2018 11:36:21 -0400 Subject: [PATCH 40/48] pci: Enable overrides for missing ACS capabilities This an updated version of Alex Williamson's patch from: https://lkml.org/lkml/2013/5/30/513 Original commit message follows: PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that allows us to control whether transactions are allowed to be redirected in various subnodes of a PCIe topology. For instance, if two endpoints are below a root port or downsteam switch port, the downstream port may optionally redirect transactions between the devices, bypassing upstream devices. The same can happen internally on multifunction devices. The transaction may never be visible to the upstream devices. One upstream device that we particularly care about is the IOMMU. If a redirection occurs in the topology below the IOMMU, then the IOMMU cannot provide isolation between devices. This is why the PCIe spec encourages topologies to include ACS support. Without it, we have to assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation. Unfortunately, far too many topologies do not support ACS to make this a steadfast requirement. Even the latest chipsets from Intel are only sporadically supporting ACS. We have trouble getting interconnect vendors to include the PCIe spec required PCIe capability, let alone suggested features. Therefore, we need to add some flexibility. The pcie_acs_override= boot option lets users opt-in specific devices or sets of devices to assume ACS support. The "downstream" option assumes full ACS support on root ports and downstream switch ports. The "multifunction" option assumes the subset of ACS features available on multifunction endpoints and upstream switch ports are supported. The "id:nnnn:nnnn" option enables ACS support on devices matching the provided vendor and device IDs, allowing more strategic ACS overrides. These options may be combined in any order. A maximum of 16 id specific overrides are available. It's suggested to use the most limited set of options necessary to avoid completely disabling ACS across the topology. Note to hardware vendors, we have facilities to permanently quirk specific devices which enforce isolation but not provide an ACS capability. Please contact me to have your devices added and save your customers the hassle of this boot option. Signed-off-by: Mark Weiman --- .../admin-guide/kernel-parameters.txt | 9 ++ drivers/pci/quirks.c | 103 ++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 31af352b4762d4..fac84c1d3f8d32 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4184,6 +4184,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + pcie_acs_override = + [PCIE] Override missing PCIe ACS support for: + downstream + All downstream ports - full ACS capabilities + multifunction + All multifunction devices - multifunction ACS subset + id:nnnn:nnnn + Specific device - full ACS capabilities + Specified as vid:did (vendor/device ID) in hex noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 472fa2c8ebcecd..e22fa5360dce04 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3388,6 +3388,107 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f8, quirk_intel_mc_errata); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata); +static bool acs_on_downstream; +static bool acs_on_multifunction; + +#define NUM_ACS_IDS 16 +struct acs_on_id { + unsigned short vendor; + unsigned short device; +}; +static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; +static u8 max_acs_id; + +static __init int pcie_acs_override_setup(char *p) +{ + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "downstream", 10)) + acs_on_downstream = true; + if (!strncmp(p, "multifunction", 13)) + acs_on_multifunction = true; + if (!strncmp(p, "id:", 3)) { + char opt[5]; + int ret; + long val; + + if (max_acs_id >= NUM_ACS_IDS - 1) { + pr_warn("Out of PCIe ACS override slots (%d)\n", + NUM_ACS_IDS); + goto next; + } + + p += 3; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].vendor = val; + + p += strcspn(p, ":"); + if (*p != ':') { + pr_warn("PCIe ACS invalid ID\n"); + goto next; + } + + p++; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].device = val; + max_acs_id++; + } +next: + p += strcspn(p, ","); + if (*p == ',') + p++; + } + + if (acs_on_downstream || acs_on_multifunction || max_acs_id) + pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); + + return 0; +} +early_param("pcie_acs_override", pcie_acs_override_setup); + +static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) +{ + int i; + + /* Never override ACS for legacy devices or devices with ACS caps */ + if (!pci_is_pcie(dev) || + pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) + return -ENOTTY; + + for (i = 0; i < max_acs_id; i++) + if (acs_on_ids[i].vendor == dev->vendor && + acs_on_ids[i].device == dev->device) + return 1; + + switch (pci_pcie_type(dev)) { + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_ROOT_PORT: + if (acs_on_downstream) + return 1; + break; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + if (acs_on_multifunction && dev->multifunction) + return 1; + } + + return -ENOTTY; +} + /* * Ivytown NTB BAR sizes are misreported by the hardware due to an erratum. * To work around this, query the size it should be configured to by the @@ -5017,6 +5118,8 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + /* custom ACS overrides for any PCIe device */ + { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, { 0 } }; From 26631d118528a5a515528ca661dc539d0dbacc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 25 Oct 2021 09:49:42 -0300 Subject: [PATCH 41/48] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an option to wait on multiple futexes using the old interface, that uses opcode 31 through futex() syscall. Do that by just translation the old interface to use the new code. This allows old and stable versions of Proton to still use fsync in new kernel releases. Signed-off-by: André Almeida --- include/uapi/linux/futex.h | 13 +++++++ kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 71a5df8d26898b..d375ab21cbf83f 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -22,6 +22,7 @@ #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_LOCK_PI2 13 +#define FUTEX_WAIT_MULTIPLE 31 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -68,6 +69,18 @@ struct futex_waitv { __u32 __reserved; }; +/** + * struct futex_wait_block - Block of futexes to be waited for + * @uaddr: User address of the futex + * @val: Futex value expected by userspace + * @bitset: Bitset for the optional bitmasked wakeup + */ +struct futex_wait_block { + __u32 __user *uaddr; + __u32 val; + __u32 bitset; +}; + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index a8074079b09e87..26d6da72d494c6 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -142,6 +142,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd) case FUTEX_LOCK_PI2: case FUTEX_WAIT_BITSET: case FUTEX_WAIT_REQUEUE_PI: + case FUTEX_WAIT_MULTIPLE: return true; } return false; @@ -154,13 +155,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) return -EINVAL; *t = timespec64_to_ktime(*ts); - if (cmd == FUTEX_WAIT) + if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) *t = ktime_add_safe(ktime_get(), *t); else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); return 0; } +/** + * futex_read_wait_block - Read an array of futex_wait_block from userspace + * @uaddr: Userspace address of the block + * @count: Number of blocks to be read + * + * This function creates and allocate an array of futex_q (we zero it to + * initialize the fields) and then, for each futex_wait_block element from + * userspace, fill a futex_q element with proper values. + */ +inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count) +{ + unsigned int i; + struct futex_vector *futexv; + struct futex_wait_block fwb; + struct futex_wait_block __user *entry = + (struct futex_wait_block __user *)uaddr; + + if (!count || count > FUTEX_WAITV_MAX) + return ERR_PTR(-EINVAL); + + futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL); + if (!futexv) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { + kfree(futexv); + return ERR_PTR(-EFAULT); + } + + futexv[i].w.flags = FUTEX_32; + futexv[i].w.val = fwb.val; + futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr); + futexv[i].q = futex_q_init; + } + + return futexv; +} + +int futex_wait_multiple(struct futex_vector *vs, unsigned int count, + struct hrtimer_sleeper *to); + +int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count) +{ + int ret; + struct futex_vector *vs; + struct hrtimer_sleeper *to = NULL, timeout; + + to = futex_setup_timer(abs_time, &timeout, 0, 0); + + vs = futex_read_wait_block(uaddr, count); + + if (IS_ERR(vs)) + return PTR_ERR(vs); + + ret = futex_wait_multiple(vs, count, abs_time ? to : NULL); + kfree(vs); + + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + + return ret; +} + SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, const struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, u32, val3) @@ -180,6 +247,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, tp = &t; } + if (cmd == FUTEX_WAIT_MULTIPLE) + return futex_opcode_31(tp, uaddr, val); + return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } @@ -373,6 +443,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, tp = &t; } + if (cmd == FUTEX_WAIT_MULTIPLE) + return futex_opcode_31(tp, uaddr, val); + return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */ From ef2bf78af7c0be7cf3fb0fc4d30a25e96e0028d2 Mon Sep 17 00:00:00 2001 From: Kai Krakow Date: Sun, 7 May 2023 23:33:41 +0200 Subject: [PATCH 42/48] cfs: Tune CFS for lower scheduling latency Link: https://codeberg.org/pf-kernel/linux Link: https://github.com/Frogging-Family/linux-tkg/tree/master/linux-tkg-patches Link: https://github.com/pop-os/system76-scheduler Signed-off-by: Kai Krakow --- kernel/sched/fair.c | 20 ++++++++++---------- kernel/sched/rt.c | 4 ++-- kernel/sched/topology.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 612873ec2197fc..4c90b27d355516 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -69,8 +69,8 @@ * * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) */ -unsigned int sysctl_sched_latency = 6000000ULL; -static unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_latency = 3000000ULL; +static unsigned int normalized_sysctl_sched_latency = 3000000ULL; /* * The initial- and re-scaling of tunables is configurable @@ -90,8 +90,8 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; * * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) */ -unsigned int sysctl_sched_min_granularity = 750000ULL; -static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; +unsigned int sysctl_sched_min_granularity = 300000ULL; +static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL; /* * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks. @@ -99,12 +99,12 @@ static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; * * (default: 0.75 msec) */ -unsigned int sysctl_sched_idle_min_granularity = 750000ULL; +unsigned int sysctl_sched_idle_min_granularity = 300000ULL; /* * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity */ -static unsigned int sched_nr_latency = 8; +static unsigned int sched_nr_latency = 10; /* * After fork, child runs first. If set to 0 (default) then @@ -121,10 +121,10 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; * * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) */ -unsigned int sysctl_sched_wakeup_granularity = 1000000UL; -static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; +unsigned int sysctl_sched_wakeup_granularity = 500000UL; +static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL; -const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +const_debug unsigned int sysctl_sched_migration_cost = 50000UL; int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) @@ -175,7 +175,7 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ -static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; #endif #ifdef CONFIG_SYSCTL diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 576eb2f51f0430..4ade34caaddbf3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -20,9 +20,9 @@ unsigned int sysctl_sched_rt_period = 1000000; /* * part of the period that we allow rt tasks to run in us. - * default: 0.95s + * XanMod default: 0.98s */ -int sysctl_sched_rt_runtime = 950000; +int sysctl_sched_rt_runtime = 980000; #ifdef CONFIG_SYSCTL static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8739c2a5a54eab..18ac2214bef23f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -206,7 +206,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) DEFINE_STATIC_KEY_FALSE(sched_energy_present); -static unsigned int sysctl_sched_energy_aware = 1; +static unsigned int sysctl_sched_energy_aware = 0; DEFINE_MUTEX(sched_energy_mutex); bool sched_energy_update; From 76d59e7c0ae4470e3e10fd37b90cec32de676c73 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 3 Aug 2020 17:05:04 +0000 Subject: [PATCH 43/48] mm: set 2 megabytes for address_space-level file read-ahead pages size Signed-off-by: Alexandre Frade --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bbccb404422247..2bba0123f8f9d2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1181,7 +1181,7 @@ struct readahead_control { ._index = i, \ } -#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) +#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); From 22f7f75202a15040306554e2622d08e16cc40197 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 20 Oct 2021 20:50:11 -0700 Subject: [PATCH 44/48] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce scheduling delays The page allocator processes free pages in groups of pageblocks, where the size of a pageblock is typically quite large (1024 pages without hugetlbpage support). Pageblocks are processed atomically with the zone lock held, which can cause severe scheduling delays on both the CPU going through the pageblock and any other CPUs waiting to acquire the zone lock. A frequent offender is move_freepages_block(), which is used by rmqueue() for page allocation. As it turns out, there's no requirement for pageblocks to be so large, so the pageblock order can simply be reduced to ease the scheduling delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a reasonable setting to ensure non-costly page allocation requests can still be serviced without always needing to free up more than one pageblock's worth of pages at a time. This has a noticeable effect on overall system latency when memory pressure is elevated. The various mm functions which operate on pageblocks no longer appear in the preemptoff tracer, where previously they would spend up to 100 ms on a mobile arm64 CPU processing a pageblock with preemption disabled and the zone lock held. Signed-off-by: Sultan Alsawaf --- include/linux/pageblock-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 5f1ae07d724b88..97cda629c9e909 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -48,7 +48,7 @@ extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE */ /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order (MAX_ORDER-1) +#define pageblock_order PAGE_ALLOC_COSTLY_ORDER #endif /* CONFIG_HUGETLB_PAGE */ From 74023cdf04b97cd7d8eb9974450dfd2a898d6012 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 20 Oct 2021 20:50:32 -0700 Subject: [PATCH 45/48] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk() There is noticeable scheduling latency and heavy zone lock contention stemming from rmqueue_bulk's single hold of the zone lock while doing its work, as seen with the preemptoff tracer. There's no actual need for rmqueue_bulk() to hold the zone lock the entire time; it only does so for supposed efficiency. As such, we can relax the zone lock and even reschedule when IRQs are enabled in order to keep the scheduling delays and zone lock contention at bay. Forward progress is still guaranteed, as the zone lock can only be relaxed after page removal. With this change, rmqueue_bulk() no longer appears as a serious offender in the preemptoff tracer, and system latency is noticeably improved. Signed-off-by: Sultan Alsawaf --- mm/page_alloc.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ca017c6008b7c9..e8391f8d7c70d7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3098,16 +3098,17 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } /* - * Obtain a specified number of elements from the buddy allocator, all under - * a single hold of the lock, for efficiency. Add them to the supplied list. - * Returns the number of new pages which were placed at *list. + * Obtain a specified number of elements from the buddy allocator, and relax the + * zone lock when needed. Add them to the supplied list. Returns the number of + * new pages which were placed at *list. */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { unsigned long flags; - int i, allocated = 0; + const bool can_resched = !preempt_count() && !irqs_disabled(); + int i, allocated = 0, last_mod = 0; spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { @@ -3116,6 +3117,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, if (unlikely(page == NULL)) break; + /* Reschedule and ease the contention on the lock if needed */ + if (i + 1 < count && ((can_resched && need_resched()) || + spin_needbreak(&zone->lock))) { + __mod_zone_page_state(zone, NR_FREE_PAGES, + -((i + 1 - last_mod) << order)); + last_mod = i + 1; + spin_unlock_irqrestore(&zone->lock, flags); + if (can_resched) + cond_resched(); + spin_lock_irqsave(&zone->lock, flags); + } + if (unlikely(check_pcp_refill(page, order))) continue; @@ -3142,7 +3155,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * on i. Do not confuse with 'allocated' which is the number of * pages added to the pcp list. */ - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); + __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order)); spin_unlock_irqrestore(&zone->lock, flags); return allocated; } From bce98aecce8171578dcc1b8fc4abc617d754c22f Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Fri, 19 Apr 2019 12:33:38 +0200 Subject: [PATCH 46/48] Set vm.max_map_count to 262144 by default The value is still pretty low, and AMD64-ABI and ELF extended numbering supports that, so we should be fine on modern x86 systems. This fixes crashes in some applications using more than 65535 vmas (also affects some windows games running in wine, such as Star Citizen). Signed-off-by: Kai Krakow --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index eefb0948110ae8..11bb0e73ecb599 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -198,8 +198,7 @@ static inline void __mm_zero_struct_page(struct page *page) * not a hard limit any more. Although some userspace tools can be surprised by * that. */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) +#define DEFAULT_MAX_MAP_COUNT (262144) extern int sysctl_max_map_count; From 97ad6cf66da0e513c1f127676989bd8d4f7956b5 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Mon, 27 Jul 2020 00:19:18 +0200 Subject: [PATCH 47/48] mm: bump DEFAULT_MAX_MAP_COUNT Some games such as Detroit: Become Human tend to be very crash prone with lower values. Signed-off-by: Kai Krakow --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 11bb0e73ecb599..d4ae8db7733972 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -198,7 +198,7 @@ static inline void __mm_zero_struct_page(struct page *page) * not a hard limit any more. Although some userspace tools can be surprised by * that. */ -#define DEFAULT_MAX_MAP_COUNT (262144) +#define DEFAULT_MAX_MAP_COUNT (16777216) extern int sysctl_max_map_count; From 5bc7728c20790bf602f1706f96ab1ec9a03f0c50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 May 2023 13:20:46 +0200 Subject: [PATCH 48/48] ZEN: sched/fair: Multi-LLC select_idle_sibling() Tejun reported that when he targets workqueues towards a specific LLC on his Zen2 machine with 3 cores / LLC and 4 LLCs in total, he gets significant idle time. This is, of course, because of how select_idle_sibling() will not consider anything outside of the local LLC, and since all these tasks are short running the periodic idle load balancer is ineffective. And while it is good to keep work cache local, it is better to not have significant idle time. Therefore, have select_idle_sibling() try other LLCs inside the same node when the local one comes up empty. Reported-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 37 +++++++++++++++++++++++++++++++++++++ kernel/sched/features.h | 1 + 2 files changed, 38 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4c90b27d355516..58afb0f71a5abe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6824,6 +6824,37 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool return idle_cpu; } +/* + * For the multiple-LLC per node case, make sure to try the other LLC's if the + * local LLC comes up empty. + */ +static int +select_idle_node(struct task_struct *p, struct sched_domain *sd, int target) +{ + struct sched_domain *parent = sd->parent; + struct sched_group *sg; + + /* Make sure to not cross nodes. */ + if (!parent || parent->flags & SD_NUMA) + return -1; + + sg = parent->groups; + do { + int cpu = cpumask_first(sched_group_span(sg)); + + if (!cpus_share_cache(cpu, target)) { + int i = select_idle_cpu(p, per_cpu(sd_llc, cpu), + test_idle_cores(cpu), cpu); + if ((unsigned)i < nr_cpumask_bits) + return i; + } + + sg = sg->next; + } while (sg != parent->groups); + + return -1; +} + /* * Scan the asym_capacity domain for idle CPUs; pick the first idle one on which * the task fits. If no CPU is big enough, but there are idle ones, try to @@ -6996,6 +7027,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if ((unsigned)i < nr_cpumask_bits) return i; + if (sched_feat(SIS_NODE)) { + i = select_idle_node(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + } + return target; } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index ee7f23c76bd336..9e390eb82e384e 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -62,6 +62,7 @@ SCHED_FEAT(TTWU_QUEUE, true) */ SCHED_FEAT(SIS_PROP, false) SCHED_FEAT(SIS_UTIL, true) +SCHED_FEAT(SIS_NODE, true) /* * Issue a WARN when we do multiple update_rq_clock() calls