feature: add calculation of stack count for sync RT

Related-To: NEO-10830

Signed-off-by: Alicja Lukaszewicz <alicja.lukaszewicz@intel.com>
This commit is contained in:
Alicja Lukaszewicz 2024-11-18 13:24:30 +00:00 committed by Compute-Runtime-Automation
parent ee9af40a27
commit 68dc7fb33b
18 changed files with 99 additions and 17 deletions

View File

@ -1120,7 +1120,7 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
auto releaseHelper = getReleaseHelper();
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
dispatchGlobals.numDSSRTStacks = getHardwareInfo().capabilityTable.syncNumRTStacksPerDSS;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this);
dispatchGlobals.maxBVHLevels = maxBvhLevels;
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);

View File

@ -13,10 +13,12 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/release_helper/release_helper.h"
#include "ocl_igc_shared/raytracing/ocl_raytracing_structures.h"
#include <cstdint>
namespace NEO {
class RayTracingHelper : public NonCopyableOrMovableClass {
public:
@ -25,12 +27,15 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * MemoryConstants::kiloByte;
static constexpr uint32_t maxBvhLevels = 8;
static constexpr uint32_t maxSizeOfRtStacksPerDss = 4096;
static constexpr uint32_t fixedSizeOfRtStacksPerDss = 2048;
static size_t getDispatchGlobalSize() {
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
}
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device.getHardwareInfo())) + extraBytesGlobal, MemoryConstants::cacheLineSize));
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device)) + extraBytesGlobal, MemoryConstants::cacheLineSize));
}
static size_t getTotalMemoryBackedFifoSize(const Device &device) {
@ -41,12 +46,25 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / MemoryConstants::kiloByte) - 1);
}
static uint32_t getNumRtStacks(const HardwareInfo &hwInfo) {
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(hwInfo) * getNumRtStacksPerDss(hwInfo);
static uint32_t getNumRtStacks(const Device &device) {
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * getNumRtStacksPerDss(device);
}
static uint32_t getNumRtStacksPerDss(const HardwareInfo &hwInfo) {
return hwInfo.capabilityTable.syncNumRTStacksPerDSS;
static uint32_t getNumRtStacksPerDss(const Device &device) {
auto releaseHelper = device.getReleaseHelper();
if (releaseHelper == nullptr || releaseHelper->isNumRtStacksPerDssFixedValue()) {
return fixedSizeOfRtStacksPerDss;
}
const auto &hwInfo = device.getHardwareInfo();
UNRECOVERABLE_IF(hwInfo.gtSystemInfo.EUCount == 0)
uint32_t maxNumEUsPerDSS = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
uint32_t maxNumThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
uint32_t maxSIMTThreadsPerThread = CommonConstants::maximalSimdSize;
return std::min(maxSizeOfRtStacksPerDss, maxNumEUsPerDSS * maxNumThreadsPerEU * maxSIMTThreadsPerThread);
}
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {

View File

@ -62,6 +62,7 @@ class ReleaseHelper {
virtual bool isDisablingMsaaRequired() const = 0;
virtual bool isDummyBlitWaRequired() const = 0;
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
protected:
ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {}
@ -103,6 +104,7 @@ class ReleaseHelperHw : public ReleaseHelper {
bool isDisablingMsaaRequired() const override;
bool isDummyBlitWaRequired() const override;
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
bool isNumRtStacksPerDssFixedValue() const override;
protected:
ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {}

View File

@ -158,4 +158,9 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw<releaseType>::getSizeToPrefe
return sizeToPreferredSlmValue;
}
template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::isNumRtStacksPerDssFixedValue() const {
return true;
}
} // namespace NEO

View File

@ -41,6 +41,7 @@ class MockReleaseHelper : public ReleaseHelper {
ADDMETHOD_CONST_NOBASE(isLocalOnlyAllowed, bool, {}, ());
ADDMETHOD_CONST_NOBASE(isDisablingMsaaRequired, bool, false, ());
ADDMETHOD_CONST_NOBASE(isDummyBlitWaRequired, bool, false, ());
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};

View File

@ -8,6 +8,7 @@
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_release_helper.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
@ -39,24 +40,16 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
uint32_t extraBytesGlobal = 100;
uint32_t tiles = 2;
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device.getHardwareInfo()) + extraBytesGlobal, MemoryConstants::cacheLineSize);
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device) + extraBytesGlobal, MemoryConstants::cacheLineSize);
size_t size = RayTracingHelper::getRTStackSizePerTile(device, tiles, maxBvhLevel, extraBytesLocal, extraBytesGlobal);
EXPECT_EQ(expectedSize, size);
}
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsReturned) {
MockDevice device;
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacksPerDss(device.getHardwareInfo());
uint32_t expectedValue = device.getHardwareInfo().capabilityTable.syncNumRTStacksPerDSS;
EXPECT_EQ(expectedValue, numDssRtStacks);
}
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
MockDevice device;
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device.getHardwareInfo());
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device.getHardwareInfo());
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
uint32_t subsliceCount = GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo());
EXPECT_LT(0u, numDssRtStacks);
@ -77,3 +70,54 @@ TEST(RayTracingHelperTests, whenStackSizePerRayIsRequestedThenCorrectValueIsRetu
TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledThenCorrectValueIsReturned) {
EXPECT_EQ(2u, RayTracingHelper::getMemoryBackedFifoSizeToPatch());
}
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsTrueThenCorrectValueIsReturned) {
MockReleaseHelper mockReleaseHelper;
MockDevice mockDevice;
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = true;
mockDevice.mockReleaseHelper = &mockReleaseHelper;
uint32_t fixedSizeOfRtStacksPerDss = 2048;
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(mockDevice);
EXPECT_EQ(fixedSizeOfRtStacksPerDss, result);
}
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsFalseThenCorrectValueIsReturned) {
MockReleaseHelper mockReleaseHelper;
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
uint32_t maxEuPerSubSlice = 16;
uint32_t threadCount = 672;
uint32_t euCount = 96;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
hwInfo.gtSystemInfo.ThreadCount = threadCount;
hwInfo.gtSystemInfo.EUCount = euCount;
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
mockDevice->mockReleaseHelper = &mockReleaseHelper;
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
constexpr uint32_t expectedValue = 3584;
EXPECT_EQ(expectedValue, RayTracingHelper::getNumRtStacksPerDss(*mockDevice));
}
TEST(RayTracingHelperTests, whenNumRtStacksPerDssExceedsMaxThenReturnsMaxRtStacksPerDssSupported) {
MockReleaseHelper mockReleaseHelper;
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 512;
hwInfo.gtSystemInfo.ThreadCount = 2048;
hwInfo.gtSystemInfo.EUCount = 256;
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
mockDevice->mockReleaseHelper = &mockReleaseHelper;
uint32_t maxSizeOfRtStacksPerDss = 4096;
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(*mockDevice);
EXPECT_EQ(maxSizeOfRtStacksPerDss, result);
}

View File

@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1255Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1256Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -41,6 +41,7 @@ TEST_F(ReleaseHelper1257Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -38,6 +38,7 @@ TEST_F(ReleaseHelper1260Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -37,6 +37,7 @@ TEST_F(ReleaseHelper1261Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -42,6 +42,7 @@ TEST_F(ReleaseHelper1270Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -42,6 +42,7 @@ TEST_F(ReleaseHelper1271Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -40,6 +40,7 @@ TEST_F(ReleaseHelper1274Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -41,6 +41,7 @@ TEST_F(ReleaseHelper2001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -42,6 +42,7 @@ TEST_F(ReleaseHelper2004Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_FALSE(releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}
TEST_F(ReleaseHelper2004Tests, whenShouldAdjustCalledThenTrueReturned) {

View File

@ -39,6 +39,7 @@ TEST_F(ReleaseHelper3000Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
EXPECT_EQ(revision == 0, releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}

View File

@ -39,6 +39,7 @@ TEST_F(ReleaseHelper3001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
EXPECT_EQ(revision == 0, releaseHelper->isDisablingMsaaRequired());
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
}
}