Skip to content

FFI calls are extremely slower than godot-cpp #1276

@beicause

Description

@beicause

I noticed processing a 1024x1024 image in godot-rust is very slow, which takes a few seconds.

godot-rust

    fn ready(&mut self) {
        let width = 1024;
        let height = 1024;
        let mut img =
            Image::create_empty(1024, 1024, false, godot::classes::image::Format::R8).unwrap();

        let t = std::time::Instant::now();
        for x in 0..width {
            for y in 0..height {
                img.set_pixel(
                    x,
                    y,
                    Color {
                        r: (x as f32) / 1024.0,
                        g: 0.0,
                        b: 0.0,
                        a: 0.0,
                    },
                );
            }
        }
        // img.save_png("res://t1.png");
        godot_print!("set pixel: {:?}", t.elapsed());

        let t = std::time::Instant::now();
        let mut buffer = PackedByteArray::new();
        buffer.resize((width * height).try_into().unwrap());
        for x in 0..width {
            for y in 0..height {
                buffer[(x + y * width) as usize] = ((x as f32) / 1024.0 * 255.0) as u8;
            }
        }
        img.set_data(
            width,
            height,
            false,
            godot::classes::image::Format::R8,
            &buffer,
        );
        // img.save_png("res://t2.png");
        godot_print!("set data: {:?}", t.elapsed());
    }

dev build, opt-level=0

set pixel: 1.521181788s
set data: 65.424402ms

dev build, opt-level=3

set pixel: 364.199245ms
set data: 26.16949ms

godot-cpp

	int width = 1024;
	int height = 1024;
	Ref<Image> img = Image::create_empty(1024, 1024, false, Image::FORMAT_R8);

	auto t = std::chrono::high_resolution_clock::now();
	for (int x = 0; x < width; x++) {
		for (int y = 0; y < height; y++) {
			img->set_pixel(x, y, Color((x) / 1024.0, 0.0, 0.0, 0.0));
		}
	}
	// img->save_png("res://t1.png");
	print_line("set pixel: ", std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t).count());

	t = std::chrono::high_resolution_clock::now();
	PackedByteArray buffer;
	buffer.resize((width * height));
	for (int x = 0; x < width; x++) {
		for (int y = 0; y < height; y++) {
			buffer[(x + y * width)] = uint8_t(x / 1024.0 * 255.0);
		}
	}
	img->set_data(width, height, false, Image::FORMAT_R8, buffer);
	// img->save_png("res://t2.png");
	print_line("set data: ", std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t).count());

dev build, optimize=none

set pixel: 24
set data: 7

dev build, optimize=speed

set pixel: 13
set data: 5

Metadata

Metadata

Assignees

No one assigned

    Labels

    c: ffiLow-level components and interaction with GDExtension APIperformancePerformance problems and optimizations

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions