mirror of https://github.com/intel/gmmlib.git
Merge 97748870c4
into 20050cbe25
This commit is contained in:
commit
c9d50346e5
|
@ -167,6 +167,8 @@ endif()
|
|||
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
|
||||
set(GMMLIB_MARCH "armv8-a+fp+simd")
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
|
||||
set(GMMLIB_MARCH "rv64g")
|
||||
elseif("${GMMLIB_MARCH}" STREQUAL "")
|
||||
set(GMMLIB_MARCH "corei7")
|
||||
endif()
|
||||
|
@ -302,6 +304,7 @@ set(SOURCES_
|
|||
${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp
|
||||
${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp
|
||||
${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp
|
||||
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
|
||||
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
||||
${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp
|
||||
${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp
|
||||
|
@ -579,6 +582,9 @@ if(UNIX)
|
|||
FILES_MATCHING PATTERN "*.h"
|
||||
PATTERN "*.hpp")
|
||||
|
||||
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
|
||||
|
||||
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
|
||||
|
||||
|
|
|
@ -55,6 +55,40 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
|
|||
-fPIC
|
||||
-g
|
||||
)
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
|
||||
SET (GMMLIB_COMPILER_FLAGS_COMMON
|
||||
#general warnings
|
||||
#-Wall
|
||||
-Winit-self
|
||||
-Winvalid-pch
|
||||
-Wpointer-arith
|
||||
-Wno-unused
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-comments
|
||||
-Wno-narrowing
|
||||
-Wno-overflow
|
||||
-Wno-parentheses
|
||||
-Wno-missing-braces
|
||||
-Wno-sign-compare
|
||||
-Werror=address
|
||||
-Werror=format-security
|
||||
-Werror=return-type
|
||||
|
||||
# General optimization options
|
||||
-march=${GMMLIB_MARCH}
|
||||
-finline-functions
|
||||
-fno-short-enums
|
||||
-Wa,--noexecstack
|
||||
-fno-strict-aliasing
|
||||
# Other common flags
|
||||
-fstack-protector
|
||||
-fdata-sections
|
||||
-ffunction-sections
|
||||
-fmessage-length=0
|
||||
-fvisibility=hidden
|
||||
-fPIC
|
||||
-g
|
||||
)
|
||||
else()
|
||||
SET (GMMLIB_COMPILER_FLAGS_COMMON
|
||||
#general warnings
|
||||
|
|
|
@ -375,6 +375,8 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC
|
|||
#include <intrin.h>
|
||||
#elif defined(__ARM_ARCH)
|
||||
#include <sse2neon.h>
|
||||
#elif defined(__riscv)
|
||||
#include "riscv_sse2_support.h"
|
||||
#elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
|
||||
#include <cpuid.h>
|
||||
#include <x86intrin.h>
|
||||
|
@ -427,7 +429,10 @@ int SwizzleOffset( // ##########################################################
|
|||
|
||||
if(PDepSupported == -1)
|
||||
{
|
||||
#if(_MSC_VER >= 1700)
|
||||
#if defined(__riscv)
|
||||
#define PDEP(Src, Mask) 0
|
||||
PDepSupported = 0;
|
||||
#elif(_MSC_VER >= 1700)
|
||||
#define PDEP(Src, Mask) _pdep_u32((Src), (Mask))
|
||||
int CpuInfo[4];
|
||||
__cpuidex(CpuInfo, 7, 0);
|
||||
|
@ -692,21 +697,34 @@ void CpuSwizzleBlt( // #########################################################
|
|||
} __m24; // 24-bit/3-byte memory element.
|
||||
|
||||
// Macros intended to compile to various types of "load register from memory" instructions...
|
||||
#if defined(__riscv)
|
||||
#define MOVB_R( Reg, Src) memcpy((uint8_t *)&(Reg), (uint8_t *)(Src), sizeof(__m128i))
|
||||
#define MOVW_R( Reg, Src) memcpy((uint16_t *)&(Reg), (uint16_t *)(Src), sizeof(__m128i))
|
||||
#define MOV3_R( Reg, Src) memcpy((__m24 *)&(Reg), (__m24 *)(Src), sizeof(__m24))
|
||||
#define MOVD_R( Reg, Src) memcpy((uint32_t *)&(Reg), (uint32_t *)(Src), sizeof(__m128i))
|
||||
#else
|
||||
#define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src))
|
||||
#define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src))
|
||||
#define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src))
|
||||
#define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src))
|
||||
#endif
|
||||
|
||||
#define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src)))
|
||||
#define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src)))
|
||||
#define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src)))
|
||||
|
||||
// As above, but the other half: "store to memory from register"...
|
||||
#if defined(__riscv)
|
||||
#define MOVB_M( Dest, Reg) memcpy((uint8_t *)(Dest), (uint8_t *)&(Reg), sizeof(__m128i))
|
||||
#define MOVW_M( Dest, Reg) memcpy((uint16_t *)(Dest), (uint16_t *)&(Reg), sizeof(__m128i))
|
||||
#define MOV3_M( Dest, Reg) memcpy((__m24 *)(Dest), (__m24 *)&(Reg), sizeof(__m24))
|
||||
#define MOVD_M( Dest, Reg) memcpy((uint32_t *)(Dest), (uint32_t *)&(Reg), sizeof(__m128i))
|
||||
#else
|
||||
#define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg))
|
||||
#define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg))
|
||||
#define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg))
|
||||
#define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg))
|
||||
|
||||
#endif
|
||||
#define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg)))
|
||||
#define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg)))
|
||||
#define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg)))
|
||||
|
@ -749,6 +767,9 @@ void CpuSwizzleBlt( // #########################################################
|
|||
#elif(defined(__ARM_ARCH))
|
||||
#define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))
|
||||
StreamingLoadSupported = 0;
|
||||
#elif(defined(__riscv))
|
||||
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
|
||||
StreamingLoadSupported = 0;
|
||||
#elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))
|
||||
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
#pragma
|
||||
#ifndef __RISCV_SSE_SUPPORT_HPP__
|
||||
#define __RISCV_SSE_SUPPORT_HPP__
|
||||
#if defined(__riscv)
|
||||
|
||||
typedef uint16_t __attribute__((vector_size(8))) __m128i;
|
||||
|
||||
__m128i _mm_loadl_epi64(__m128i const* mem_addr) {
|
||||
__m128i ret;
|
||||
memcpy(&ret, &mem_addr, sizeof(uint64_t));
|
||||
return ret;
|
||||
}
|
||||
|
||||
__m128i _mm_load_si128 (__m128i const* mem_addr) {
|
||||
__m128i ret;
|
||||
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||
return ret;
|
||||
}
|
||||
|
||||
__m128i _mm_loadu_si128 (__m128i const* mem_addr) {
|
||||
__m128i ret;
|
||||
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) {
|
||||
memcpy(&a, &mem_addr, sizeof(uint64_t));
|
||||
}
|
||||
|
||||
void _mm_store_si128 (__m128i* mem_addr, __m128i a) {
|
||||
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) {
|
||||
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
void _mm_stream_si128 (void* mem_addr, __m128i a) {
|
||||
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
__m128i _mm_stream_load_si128 (void* mem_addr) {
|
||||
__m128i ret;
|
||||
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define RISCV_FENCE(p, s) \
|
||||
__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
|
||||
|
||||
void _mm_sfence() {
|
||||
RISCV_FENCE(rw,rw);
|
||||
}
|
||||
#else
|
||||
#error "compiling for rv64g (riscv64) but compiler architecture macro undefined"
|
||||
#endif
|
||||
#endif
|
Loading…
Reference in New Issue