Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Makefile CI

on:
push:
branches:
- 'rebase-*/btrfs-patches'
pull_request:
branches:
- 'rebase-*/btrfs-patches'
workflow_dispatch:

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Configure minimal kernel
run: make tinyconfig

- name: Configure btrfs
run: |
echo "CONFIG_BLOCK=y" >>.config
echo "CONFIG_BTRFS_FS=y" >>.config
echo "CONFIG_BTRFS_FS_POSIX_ACL=y" >>.config
echo "CONFIG_BTRFS_ALLOCATOR_HINTS=y" >>.config
echo "CONFIG_BTRFS_PER_DEVICE_IO_STATS=y" >>.config
echo "CONFIG_BTRFS_READ_POLICIES=y" >>.config
make oldconfig

- name: Compile kernel
run: make -j$(nproc) all
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,6 @@ sphinx_*/

# Rust analyzer configuration
/rust-project.json

# Allow Github workflows
!/.github
72 changes: 72 additions & 0 deletions fs/btrfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,78 @@ config BTRFS_ASSERT

If unsure, say N.

config BTRFS_ALLOCATOR_HINTS
bool "Btrfs allocator hints"
depends on BTRFS_FS
default n
help
Enable support for allocator hints. This feature allows to select
dedicated or preferred devices for meta data vs data, or prevent
allocation from a device at all. This feature does not interact
well with free space calculation because the formula expects to
allocate space always from a device with most free space which is
not true when hints are applied. It may also create issues if a
device from the pool dies resulting in a situation where there are
still enough RAID mirror members but the allocation hints don't
allow to allocate from specific devices.

You are advised to watch your free space closely with btrfs tools
instead of relying on df only.

Mounting a btrfs with this feature on or off is always possible,
there are no incompatible changes to the file system. But running
without this feature may place new chunks on unwanted devices and
you may want to clean up later by balancing the affected chunks.

Supported hint types in /sys/fs/btrfs/BTRFS-UUID/devinfo/ID/type:

- type = 0 - allocate data chunks from this ID first (recommended
for big disks with good sequential performance, e.g.
HDDs), prefers data on this device
- type = 1 - allocate meta data chunks from this ID first
(recommended for fast and small disks with good
latency, e.g. SSD/NVMe), prefers meta data on this
device
- type = 2 - allocate only meta data chunks from this ID, no data
chunks will ever be allocated from this device
- type = 3 - allocate only data chunks from this ID, no meta data
chunks will ever be allocated from this device
- type = 4 - allocate any chunks from this device last, will never
allocate any space from this device unless there isn't
enough space on other devices
- type = 5 - never allocate any new chunks, useful when putting a
device out of use and to avoid redundant chunk writes
during balance/replace

If unsure, say N.

config BTRFS_PER_DEVICE_IO_STATS
bool "Btrfs per io devices stats"
depends on BTRFS_FS
default n
help
Enable collecting io read stats per devices to evaluate the effects
of different read policies better.

This adds a new file /sys/fs/btrfs/BTRFS-UUID/devinfo/ID/read_stats.

If unsure, say N.

config BTRFS_READ_POLICIES
bool "Btrfs read policies"
depends on BTRFS_FS
default n
help
This enables btrfs read policies to control how btrfs selects stripes
from a mirror during read operations. This was originally part of
the experimental feature set but it is safe to use and can provide
huge performance benefits in certain scenarios without causing any
performance regressions.

This adds a new file /sys/fs/btrfs/BTRFS-UUID/read_policy.

If unsure, say N.

config BTRFS_EXPERIMENTAL
bool "Btrfs experimental features"
depends on BTRFS_FS
Expand Down
3 changes: 2 additions & 1 deletion fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
if (ret)
return ret;
ptr += btrfs_extent_inline_ref_size(type);
cond_resched();
}

return 0;
Expand Down Expand Up @@ -1229,7 +1230,7 @@ static int add_keyed_refs(struct btrfs_backref_walk_ctx *ctx,
}
if (ret)
return ret;

cond_resched();
}

return ret;
Expand Down
4 changes: 2 additions & 2 deletions fs/btrfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -2498,7 +2498,7 @@ static int __init btrfs_print_mod_info(void)
#endif
;

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
if (btrfs_get_mod_read_policy() == NULL)
pr_info("Btrfs loaded%s\n", options);
else
Expand Down Expand Up @@ -2565,7 +2565,7 @@ static const struct init_sequence mod_init_seq[] = {
}, {
.init_func = btrfs_extent_map_init,
.exit_func = btrfs_extent_map_exit,
#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
}, {
.init_func = btrfs_read_policy_init,
.exit_func = NULL,
Expand Down
120 changes: 113 additions & 7 deletions fs/btrfs/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#include "fs.h"
#include "accessors.h"

#ifdef CONFIG_BTRFS_PER_DEVICE_IO_STATS
#include <linux/part_stat.h>
#endif

/*
* Structure name Path
* --------------------------------------------------------------------------
Expand Down Expand Up @@ -1319,13 +1323,14 @@ BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);

static const char *btrfs_read_policy_name[] = {
"pid",
#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
"round-robin",
"queue",
"devid",
#endif
};

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES

/* Global module configuration parameters. */
static char *read_policy;
Expand All @@ -1337,7 +1342,7 @@ char *btrfs_get_mod_read_policy(void)
/* Set perms to 0, disable /sys/module/btrfs/parameter/read_policy interface. */
module_param(read_policy, charp, 0);
MODULE_PARM_DESC(read_policy,
"Global read policy: pid (default), round-robin[:<min_contig_read>], devid[:<devid>]");
"Global read policy: pid (default), round-robin[:<min_contig_read>], queue, devid[:<devid>]");
#endif

int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
Expand All @@ -1350,7 +1355,7 @@ int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)

strscpy(param, str);

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
/* Separate value from input in policy:value format. */
value_str = strchr(param, ':');
if (value_str) {
Expand All @@ -1372,7 +1377,7 @@ int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
return sysfs_match_string(btrfs_read_policy_name, param);
}

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
int __init btrfs_read_policy_init(void)
{
s64 value;
Expand Down Expand Up @@ -1403,7 +1408,7 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,

ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
if (i == BTRFS_READ_POLICY_RR)
ret += sysfs_emit_at(buf, ret, ":%u",
READ_ONCE(fs_devices->rr_min_contig_read));
Expand Down Expand Up @@ -1433,7 +1438,7 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
if (index < 0)
return -EINVAL;

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
/* If moving from RR then disable collecting fs stats. */
if (fs_devices->read_policy == BTRFS_READ_POLICY_RR && index != BTRFS_READ_POLICY_RR)
fs_devices->collect_fs_stats = false;
Expand Down Expand Up @@ -2140,19 +2145,120 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
}
BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);

#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
static ssize_t btrfs_devinfo_type_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
devid_kobj);

return scnprintf(buf, PAGE_SIZE, "0x%08llx\n", device->type);
}

static ssize_t btrfs_devinfo_type_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_fs_info *fs_info;
struct btrfs_root *root;
struct btrfs_device *device;
int ret;
struct btrfs_trans_handle *trans;

u64 type, prev_type;

device = container_of(kobj, struct btrfs_device, devid_kobj);
fs_info = device->fs_info;
if (!fs_info)
return -EPERM;

/*
* Changing the type field requires starting a transaction which will cause a NULL derefernce in
* __reserve_bytes if the file system is not fully open. Thus, return EBUSY if the file system is not fully
* initialized.
*/
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return -EBUSY;

root = fs_info->chunk_root;
if (sb_rdonly(fs_info->sb))
return -EROFS;

ret = kstrtou64(buf, 0, &type);
if (ret < 0)
return -EINVAL;

/* for now, only allow touching the 'allocation hint' bits */
if (type & ~((1 << BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1))
return -EINVAL;

trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);

prev_type = device->type;
device->type = type;

ret = btrfs_update_device(trans, device);

if (ret < 0) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto abort;
}

ret = btrfs_commit_transaction(trans);
if (ret < 0)
goto abort;

return len;
abort:
device->type = prev_type;
return ret;
}
BTRFS_ATTR_RW(devid, type, btrfs_devinfo_type_show, btrfs_devinfo_type_store);
#endif

#ifdef CONFIG_BTRFS_PER_DEVICE_IO_STATS
static ssize_t btrfs_devinfo_read_stats_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
devid_kobj);
u64 read_wait = part_stat_read(device->bdev, nsecs[READ]);
unsigned long read_ios = part_stat_read(device->bdev, ios[READ]);

u64 avg_wait = 0;
if (read_wait && read_ios && read_wait >= read_ios)
avg_wait = div_u64(read_wait, read_ios);

return scnprintf(buf, PAGE_SIZE, "ios %lu wait %llu avg %llu age %llu ignored %llu\n",
read_ios, read_wait, avg_wait,
(u64)atomic64_read(&device->last_io_age),
(u64)atomic64_read(&device->stripe_ignored));
}
BTRFS_ATTR(devid, read_stats, btrfs_devinfo_read_stats_show);
#endif

/*
* Information about one device.
*
* Path: /sys/fs/btrfs/<uuid>/devinfo/<devid>/
*/
static struct attribute *devid_attrs[] = {
#ifdef CONFIG_BTRFS_PER_DEVICE_IO_STATS
BTRFS_ATTR_PTR(devid, read_stats),
#endif
BTRFS_ATTR_PTR(devid, error_stats),
BTRFS_ATTR_PTR(devid, fsid),
BTRFS_ATTR_PTR(devid, in_fs_metadata),
BTRFS_ATTR_PTR(devid, missing),
BTRFS_ATTR_PTR(devid, replace_target),
BTRFS_ATTR_PTR(devid, scrub_speed_max),
BTRFS_ATTR_PTR(devid, writeable),
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
BTRFS_ATTR_PTR(devid, type),
#endif
NULL
};
ATTRIBUTE_GROUPS(devid);
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/sysfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup);
int btrfs_read_policy_to_enum(const char *str, s64 *value);

#ifdef CONFIG_BTRFS_EXPERIMENTAL
#ifdef CONFIG_BTRFS_READ_POLICIES
int __init btrfs_read_policy_init(void);
char *btrfs_get_mod_read_policy(void);
#endif
Expand Down
Loading