feature: add calculation of stack count for sync RT
Related-To: NEO-10830 Signed-off-by: Alicja Lukaszewicz <alicja.lukaszewicz@intel.com>
This commit is contained in:
parent
ee9af40a27
commit
68dc7fb33b
|
@ -1120,7 +1120,7 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
|||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
|
||||
dispatchGlobals.numDSSRTStacks = getHardwareInfo().capabilityTable.syncNumRTStacksPerDSS;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this);
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
|
|
|
@ -13,10 +13,12 @@
|
|||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/release_helper/release_helper.h"
|
||||
|
||||
#include "ocl_igc_shared/raytracing/ocl_raytracing_structures.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class RayTracingHelper : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
|
@ -25,12 +27,15 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
|||
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * MemoryConstants::kiloByte;
|
||||
static constexpr uint32_t maxBvhLevels = 8;
|
||||
|
||||
static constexpr uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
static constexpr uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
|
||||
static size_t getDispatchGlobalSize() {
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device.getHardwareInfo())) + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device)) + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getTotalMemoryBackedFifoSize(const Device &device) {
|
||||
|
@ -41,12 +46,25 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
|||
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / MemoryConstants::kiloByte) - 1);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacks(const HardwareInfo &hwInfo) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(hwInfo) * getNumRtStacksPerDss(hwInfo);
|
||||
static uint32_t getNumRtStacks(const Device &device) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * getNumRtStacksPerDss(device);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacksPerDss(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.capabilityTable.syncNumRTStacksPerDSS;
|
||||
static uint32_t getNumRtStacksPerDss(const Device &device) {
|
||||
auto releaseHelper = device.getReleaseHelper();
|
||||
|
||||
if (releaseHelper == nullptr || releaseHelper->isNumRtStacksPerDssFixedValue()) {
|
||||
return fixedSizeOfRtStacksPerDss;
|
||||
}
|
||||
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
UNRECOVERABLE_IF(hwInfo.gtSystemInfo.EUCount == 0)
|
||||
|
||||
uint32_t maxNumEUsPerDSS = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
uint32_t maxNumThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
|
||||
uint32_t maxSIMTThreadsPerThread = CommonConstants::maximalSimdSize;
|
||||
|
||||
return std::min(maxSizeOfRtStacksPerDss, maxNumEUsPerDSS * maxNumThreadsPerEU * maxSIMTThreadsPerThread);
|
||||
}
|
||||
|
||||
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {
|
||||
|
|
|
@ -62,6 +62,7 @@ class ReleaseHelper {
|
|||
virtual bool isDisablingMsaaRequired() const = 0;
|
||||
virtual bool isDummyBlitWaRequired() const = 0;
|
||||
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
|
||||
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
|
||||
|
||||
protected:
|
||||
ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {}
|
||||
|
@ -103,6 +104,7 @@ class ReleaseHelperHw : public ReleaseHelper {
|
|||
bool isDisablingMsaaRequired() const override;
|
||||
bool isDummyBlitWaRequired() const override;
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
|
||||
bool isNumRtStacksPerDssFixedValue() const override;
|
||||
|
||||
protected:
|
||||
ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {}
|
||||
|
|
|
@ -158,4 +158,9 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw<releaseType>::getSizeToPrefe
|
|||
return sizeToPreferredSlmValue;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isNumRtStacksPerDssFixedValue() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -41,6 +41,7 @@ class MockReleaseHelper : public ReleaseHelper {
|
|||
ADDMETHOD_CONST_NOBASE(isLocalOnlyAllowed, bool, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDisablingMsaaRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDummyBlitWaRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
|
||||
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
|
||||
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_release_helper.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
@ -39,24 +40,16 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
|
|||
uint32_t extraBytesGlobal = 100;
|
||||
uint32_t tiles = 2;
|
||||
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device.getHardwareInfo()) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
size_t size = RayTracingHelper::getRTStackSizePerTile(device, tiles, maxBvhLevel, extraBytesLocal, extraBytesGlobal);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsReturned) {
|
||||
MockDevice device;
|
||||
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacksPerDss(device.getHardwareInfo());
|
||||
uint32_t expectedValue = device.getHardwareInfo().capabilityTable.syncNumRTStacksPerDSS;
|
||||
EXPECT_EQ(expectedValue, numDssRtStacks);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
|
||||
MockDevice device;
|
||||
|
||||
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device.getHardwareInfo());
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device.getHardwareInfo());
|
||||
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
|
||||
uint32_t subsliceCount = GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo());
|
||||
|
||||
EXPECT_LT(0u, numDssRtStacks);
|
||||
|
@ -77,3 +70,54 @@ TEST(RayTracingHelperTests, whenStackSizePerRayIsRequestedThenCorrectValueIsRetu
|
|||
TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledThenCorrectValueIsReturned) {
|
||||
EXPECT_EQ(2u, RayTracingHelper::getMemoryBackedFifoSizeToPatch());
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsTrueThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
MockDevice mockDevice;
|
||||
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = true;
|
||||
mockDevice.mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(mockDevice);
|
||||
EXPECT_EQ(fixedSizeOfRtStacksPerDss, result);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsFalseThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
uint32_t maxEuPerSubSlice = 16;
|
||||
uint32_t threadCount = 672;
|
||||
uint32_t euCount = 96;
|
||||
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
|
||||
hwInfo.gtSystemInfo.ThreadCount = threadCount;
|
||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
|
||||
constexpr uint32_t expectedValue = 3584;
|
||||
|
||||
EXPECT_EQ(expectedValue, RayTracingHelper::getNumRtStacksPerDss(*mockDevice));
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssExceedsMaxThenReturnsMaxRtStacksPerDssSupported) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 512;
|
||||
hwInfo.gtSystemInfo.ThreadCount = 2048;
|
||||
hwInfo.gtSystemInfo.EUCount = 256;
|
||||
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(*mockDevice);
|
||||
EXPECT_EQ(maxSizeOfRtStacksPerDss, result);
|
||||
}
|
|
@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1255Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1256Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1257Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ TEST_F(ReleaseHelper1260Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ TEST_F(ReleaseHelper1261Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ TEST_F(ReleaseHelper1270Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ TEST_F(ReleaseHelper1271Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ TEST_F(ReleaseHelper1274Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ TEST_F(ReleaseHelper2001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ TEST_F(ReleaseHelper2004Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
TEST_F(ReleaseHelper2004Tests, whenShouldAdjustCalledThenTrueReturned) {
|
||||
|
|
|
@ -39,6 +39,7 @@ TEST_F(ReleaseHelper3000Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(revision == 0, releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ TEST_F(ReleaseHelper3001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
|||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(revision == 0, releaseHelper->isDisablingMsaaRequired());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue