Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQ
SET(EMBREE_ARM ON)
ENDIF()

# detect RISC-V compilation
IF (CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
MESSAGE(STATUS "Building for RISC-V 64")
SET(EMBREE_RISCV ON)
ENDIF()

SET(EMBREE_TASKING_SYSTEM "TBB" CACHE STRING "Selects tasking system")
SET(EMBREE_TBB_COMPONENT "tbb" CACHE STRING "The TBB component/library name.")

Expand Down Expand Up @@ -385,6 +391,8 @@ ENDIF()

IF (EMBREE_ARM)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X)
ELSEIF (EMBREE_RISCV)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 DEFAULT)
ELSE()
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT)
ENDIF()
Expand All @@ -399,6 +407,8 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE")
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." ON)
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF)
ENDIF()
ELSEIF (EMBREE_RISCV)
OPTION(EMBREE_ISA_RVV "Enables RVV ISA." ON)
ELSE()
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
Expand All @@ -416,18 +426,21 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE")
# Don't use OPTION, but still set them to OFF, so that embree-config.cmake is consisten with its definitions
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
ENDIF()

ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_RVV CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
Expand All @@ -442,6 +455,7 @@ ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
ELSE()
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_RVV CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
Expand All @@ -452,6 +466,8 @@ ELSE()
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "NEON2X")
SET(ISA 2)
ELSEIF(EMBREE_MAX_ISA STREQUAL "RVV")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE2")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE4.2")
Expand All @@ -470,6 +486,7 @@ ELSE()

SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
Expand All @@ -483,6 +500,10 @@ ELSE()
IF (ISA GREATER 1)
SET(EMBREE_ISA_NEON2X ON)
ENDIF ()
ELSEIF (EMBREE_RISCV)
IF (ISA GREATER 0)
SET(EMBREE_ISA_RVV ON)
ENDIF ()
ELSE()
IF (ISA GREATER 0)
SET(EMBREE_ISA_SSE2 ON)
Expand Down Expand Up @@ -574,6 +595,11 @@ IF (EMBREE_ISA_NEON2X)
SET(EMBREE_ISA_AVX2 ON)
ENDIF()

IF (EMBREE_ISA_RVV)
SET(EMBREE_ISA_SSE2 ON)
SET(EMBREE_ISA_SSE42 ON)
ENDIF()

IF (EMBREE_ISA_SSE2)
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE2)
IF (NOT EMBREE_ARM)
Expand Down
3 changes: 3 additions & 0 deletions common/cmake/clang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ IF (EMBREE_ARM)
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
ENDIF ()
ELSEIF (EMBREE_RISCV)
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__ -march=rv64gcv_zba_zbb_zbs -mrvv-vector-bits=zvl")
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__ -march=rv64gcv_zba_zbb_zbs -mrvv-vector-bits=zvl")
ELSE ()
# for `thread` keyword
_SET_IF_EMPTY(FLAGS_SSE2 "-msse -msse2 -mno-sse4.2")
Expand Down
2 changes: 2 additions & 0 deletions common/math/emath.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#elif defined(__riscv_v)
#include "../simd/riscv/emulation.h"
#else
#include <emmintrin.h>
#include <xmmintrin.h>
Expand Down
2 changes: 1 addition & 1 deletion common/math/vec3.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ namespace embree
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
#elif defined(__SSE__) || defined(__ARM_NEON) || defined(__riscv_v)
template<>
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
Expand Down
2 changes: 1 addition & 1 deletion common/math/vec4.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ namespace embree
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
#elif defined(__SSE__) || defined(__ARM_NEON) || defined(__riscv_v)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
}
Expand Down
120 changes: 120 additions & 0 deletions common/simd/riscv/emulation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#pragma once

#define SSE2RVV_PRECISE_DIV 1
#define SSE2RVV_PRECISE_SQRT 1
#define SSE2RVV_PRECISE_MINMAX 1

#include "sse2rvv.h"

#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))

/* Flush zero mode macros. */
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000

enum _mm_hint {
_MM_HINT_NTA = 0,
_MM_HINT_T0 = 1,
_MM_HINT_T1 = 2,
_MM_HINT_T2 = 3,
};

__forceinline int _mm_cvtsi128_si32(__m128i a) {
return __riscv_vmv_x_s_i32m1_i32(a);
}

__forceinline __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm8) {
vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0, 4);
vbool32_t high = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 >> 4, 1));
vbool32_t low = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 & 0xf, 1));
vfloat32m1_t sum = __riscv_vfredusum_vs_f32m1_f32m1_m(high, __riscv_vfmul(a, b, 4), zeros, 4);
return vreinterpretq_f32_m128(__riscv_vrgather_vx_f32m1_mu(low, zeros, sum, 0, 4));
}

__forceinline __int64 _mm_cvtsi128_si64 (__m128i a) {
return __riscv_vmv_x_s_i64m1_i64(__riscv_vreinterpret_v_i32m1_i64m1(a));
}

__forceinline unsigned int _mm_getcsr(void) {
union {
fcsr_bitfield field;
uint32_t value;
} r;

__asm__ volatile("csrr %0, fcsr" : "=r"(r));

switch (r.field.frm) {
case __RISCV_FRM_RTZ:
return _MM_ROUND_TOWARD_ZERO;
case __RISCV_FRM_RDN:
return _MM_ROUND_DOWN;
case __RISCV_FRM_RUP:
return _MM_ROUND_UP;
default:
return _MM_ROUND_NEAREST;
}
}

__forceinline void _mm_setcsr(unsigned int a) {
_MM_SET_ROUNDING_MODE(a);
}

__forceinline void _mm_mfence (void) {
__sync_synchronize();
}

__forceinline void _mm_pause (void) {
__asm__ __volatile__("fence.i\n\t"
"fence r, r\n\t");
}

__forceinline void _mm_prefetch (char const* p, int i) {
(void)i;
__builtin_prefetch(p);
}

__forceinline int _mm_popcnt_u32(unsigned int a) {
return __builtin_popcount(a);
}

__forceinline int64_t _mm_popcnt_u64(uint64_t a) {
return __builtin_popcount(a);
}

__forceinline __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfmacc_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfmsac_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfnmsac_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfnmacc_vv_f32m1(_c, _a, _b, 4));
}

/* Dummy defines for floating point control */
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_DIV_ZERO 0x200
// #define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_MASK_DENORM 0x100
#define _MM_SET_EXCEPTION_MASK(x)
#define _MM_SET_FLUSH_ZERO_MODE(x)
Loading