mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Allocate per-tile RTDispatchGlobals, handle ray tracing patch tokens.
Related-to: NEO-6711 Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0eb2001d8f
commit
f4879f064f
@@ -301,7 +301,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
return ZE_RESULT_ERROR_UNINITIALIZED;
|
||||
} else {
|
||||
NEO::LinearStream *linearStream = commandContainer.getCommandStream();
|
||||
NEO::EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(*linearStream, *memoryBackedBuffer);
|
||||
NEO::EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(*linearStream, memoryBackedBuffer->getGpuAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -904,23 +904,23 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
|
||||
|
||||
if (this->usesRayTracing()) {
|
||||
if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) {
|
||||
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
neoDevice->initializeRayTracing(bvhLevels);
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(bvhLevels);
|
||||
if (rtDispatchGlobals == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
|
||||
this->residencyContainer.push_back(rtDispatchGlobals);
|
||||
|
||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
||||
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
|
||||
static_cast<uintptr_t>(rtDispatchGlobals->getGpuAddressToPatch()));
|
||||
} else {
|
||||
neoDevice->initializeRayTracing(0);
|
||||
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
|
||||
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
neoDevice->initializeRayTracing(bvhLevels);
|
||||
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
|
||||
if (rtDispatchGlobalsInfo == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) {
|
||||
this->residencyContainer.push_back(rtDispatchGlobals);
|
||||
}
|
||||
|
||||
auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch();
|
||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
||||
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
|
||||
static_cast<uintptr_t>(address));
|
||||
|
||||
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -732,53 +732,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
|
||||
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
|
||||
EXPECT_NE(nullptr, rtDispatchGlobals);
|
||||
|
||||
size_t residencySize = kernel->getResidencyContainer().size();
|
||||
EXPECT_NE(0u, residencySize);
|
||||
|
||||
EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals);
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) {
|
||||
KernelDescriptor mockDescriptor = {};
|
||||
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
|
||||
mockDescriptor.kernelMetadata.kernelName = "rt_test";
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
|
||||
}
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
|
||||
std::make_unique<MockImmutableData>(32u);
|
||||
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
module = std::make_unique<MockModule>(device,
|
||||
moduleBuildLog,
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "rt_test";
|
||||
|
||||
auto immDataVector =
|
||||
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
|
||||
|
||||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
auto result = kernel->initialize(&kernelDesc);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
|
||||
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
|
||||
EXPECT_EQ(nullptr, rtDispatchGlobals);
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
|
||||
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
|
||||
KernelDescriptor mockDescriptor = {};
|
||||
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
|
||||
mockDescriptor.kernelMetadata.kernelName = "rt_test";
|
||||
@@ -809,14 +765,94 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAll
|
||||
|
||||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(nullptr);
|
||||
neoDevice->rtDispatchGlobalsForceAllocation = false;
|
||||
|
||||
delete driverHandle->svmAllocsManager;
|
||||
execEnv->memoryManager.reset(new FailMemoryManager(0, *execEnv));
|
||||
driverHandle->setMemoryManager(execEnv->memoryManager.get());
|
||||
driverHandle->svmAllocsManager = new NEO::SVMAllocsManager(execEnv->memoryManager.get(), false);
|
||||
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
|
||||
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(0, *neoDevice->executionEnvironment);
|
||||
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
|
||||
|
||||
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArrayAllocationFailsThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
|
||||
KernelDescriptor mockDescriptor = {};
|
||||
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
|
||||
mockDescriptor.kernelMetadata.kernelName = "rt_test";
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
|
||||
}
|
||||
mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
|
||||
std::make_unique<MockImmutableData>(32u);
|
||||
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
module = std::make_unique<MockModule>(device,
|
||||
moduleBuildLog,
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "rt_test";
|
||||
auto immDataVector =
|
||||
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
|
||||
|
||||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
neoDevice->rtDispatchGlobalsForceAllocation = false;
|
||||
|
||||
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
|
||||
// Ensure that allocating RTDispatchGlobals succeeds, but the array allocation fails.
|
||||
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *neoDevice->executionEnvironment);
|
||||
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
|
||||
|
||||
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArrayAllocationSucceedsThenRayTracingIsInitialized, IsPVC) {
|
||||
KernelDescriptor mockDescriptor = {};
|
||||
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
|
||||
mockDescriptor.kernelMetadata.kernelName = "rt_test";
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
|
||||
}
|
||||
mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
|
||||
std::make_unique<MockImmutableData>(32u);
|
||||
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
module = std::make_unique<MockModule>(device,
|
||||
moduleBuildLog,
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "rt_test";
|
||||
auto immDataVector =
|
||||
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
|
||||
|
||||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
neoDevice->rtDispatchGlobalsForceAllocation = false;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
|
||||
@@ -902,7 +938,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
EXPECT_NE(nullptr, rtDispatchGlobals);
|
||||
|
||||
auto dispatchGlobalsAddressPatched = *reinterpret_cast<uint64_t *>(ptrOffset(crossThreadData.get(), rtGlobalPointerPatchOffset));
|
||||
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->getGpuAddressToPatch());
|
||||
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobals[0]->getGpuAddressToPatch());
|
||||
EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
|
||||
|
||||
kernel->crossThreadData.release();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -37,17 +37,14 @@ TEST(RayTracingHelperTests, whenGlobalDispatchSizeIsRequestedThenCorrectValueIsR
|
||||
MockContext context(&device);
|
||||
|
||||
uint32_t maxBvhLevel = 2;
|
||||
uint32_t extraBytesPerThread = 20;
|
||||
uint32_t extraBytesLocal = 20;
|
||||
uint32_t extraBytesGlobal = 100;
|
||||
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getRtGlobalsSize(), MemoryConstants::cacheLineSize) +
|
||||
alignUp((RayTracingHelper::hitInfoSize +
|
||||
RayTracingHelper::bvhStackSize * maxBvhLevel +
|
||||
extraBytesPerThread),
|
||||
MemoryConstants::cacheLineSize) *
|
||||
context.getDevice(0)->getHardwareInfo().gtSystemInfo.DualSubSliceCount * RayTracingHelper::stackDssMultiplier +
|
||||
size_t expectedSize = alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
|
||||
(RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesLocal) * RayTracingHelper::getNumRtStacks(device.getDevice()) +
|
||||
extraBytesGlobal;
|
||||
EXPECT_EQ(expectedSize, RayTracingHelper::getDispatchGlobalSize(device.getDevice(), maxBvhLevel, extraBytesPerThread, extraBytesGlobal));
|
||||
size_t size = RayTracingHelper::getDispatchGlobalSize(device.getDevice(), maxBvhLevel, extraBytesLocal, extraBytesGlobal);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsReturned) {
|
||||
@@ -60,6 +57,16 @@ TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsRe
|
||||
EXPECT_EQ(expectedValue, numDssRtStacks);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
|
||||
MockDevice device;
|
||||
|
||||
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
|
||||
uint32_t subsliceCount = device.getHardwareInfo().gtSystemInfo.DualSubSliceCount;
|
||||
|
||||
EXPECT_EQ(numDssRtStacks, numDssRtStacksPerDss * subsliceCount);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumDssIsRequestedThenCorrectValueIsReturned) {
|
||||
MockDevice device;
|
||||
EXPECT_EQ(device.getHardwareInfo().gtSystemInfo.DualSubSliceCount, RayTracingHelper::getNumDss(device));
|
||||
@@ -72,9 +79,7 @@ TEST(RayTracingHelperTests, whenStackSizePerRayIsRequestedThenCorrectValueIsRetu
|
||||
uint32_t maxBvhLevel = 1234;
|
||||
uint32_t extraBytesLocal = 5678;
|
||||
|
||||
uint32_t expectedValue = alignUp((RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel +
|
||||
extraBytesLocal),
|
||||
MemoryConstants::cacheLineSize);
|
||||
uint32_t expectedValue = RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesLocal;
|
||||
EXPECT_EQ(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal), expectedValue);
|
||||
}
|
||||
|
||||
|
||||
@@ -422,7 +422,7 @@ struct EncodeMiArbCheck {
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct EncodeEnableRayTracing {
|
||||
static void programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer);
|
||||
static void programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer);
|
||||
static void append3dStateBtd(void *ptr3dStateBtd);
|
||||
};
|
||||
|
||||
|
||||
@@ -477,7 +477,7 @@ void EncodeSempahore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -25,10 +25,10 @@ void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfa
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
|
||||
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
|
||||
auto cmd = GfxFamily::cmd3dStateBtd;
|
||||
cmd.getBtdStateBody().setPerDssMemoryBackedBufferSize(static_cast<typename GfxFamily::_3DSTATE_BTD_BODY::PER_DSS_MEMORY_BACKED_BUFFER_SIZE>(RayTracingHelper::getMemoryBackedFifoSizeToPatch()));
|
||||
cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(backBuffer.getGpuAddress());
|
||||
cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(backBuffer);
|
||||
append3dStateBtd(&cmd);
|
||||
*commandStream.getSpaceForCmd<typename GfxFamily::_3DSTATE_BTD>() = cmd;
|
||||
}
|
||||
|
||||
@@ -609,19 +609,19 @@ EngineControl *Device::getInternalCopyEngine() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GraphicsAllocation *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
if (rtDispatchGlobals.size() == 0) {
|
||||
RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
if (rtDispatchGlobalsInfos.size() == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t last = rtDispatchGlobals.size() - 1;
|
||||
size_t last = rtDispatchGlobalsInfos.size() - 1;
|
||||
if (maxBvhLevels > last) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (size_t i = last; i >= maxBvhLevels; i--) {
|
||||
if (rtDispatchGlobals[i] != nullptr) {
|
||||
return rtDispatchGlobals[i];
|
||||
if (rtDispatchGlobalsInfos[i] != nullptr) {
|
||||
return rtDispatchGlobalsInfos[i];
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
@@ -630,17 +630,22 @@ GraphicsAllocation *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
}
|
||||
|
||||
allocateRTDispatchGlobals(maxBvhLevels);
|
||||
return rtDispatchGlobals[maxBvhLevels];
|
||||
return rtDispatchGlobalsInfos[maxBvhLevels];
|
||||
}
|
||||
|
||||
void Device::initializeRayTracing(uint32_t maxBvhLevels) {
|
||||
if (rtMemoryBackedBuffer == nullptr) {
|
||||
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
|
||||
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, AllocationType::BUFFER, getDeviceBitfield()});
|
||||
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
allocProps.flags.resource48Bit = true;
|
||||
allocProps.flags.isUSMDeviceAllocation = true;
|
||||
|
||||
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
}
|
||||
|
||||
while (rtDispatchGlobals.size() <= maxBvhLevels) {
|
||||
rtDispatchGlobals.push_back(nullptr);
|
||||
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
|
||||
rtDispatchGlobalsInfos.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -648,9 +653,21 @@ void Device::finalizeRayTracing() {
|
||||
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
|
||||
rtMemoryBackedBuffer = nullptr;
|
||||
|
||||
for (size_t i = 0; i < rtDispatchGlobals.size(); i++) {
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobals[i]);
|
||||
rtDispatchGlobals[i] = nullptr;
|
||||
for (size_t i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
|
||||
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
|
||||
if (rtDispatchGlobalsInfo == nullptr) {
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobals[j]);
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
|
||||
}
|
||||
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation);
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
|
||||
|
||||
delete rtDispatchGlobalsInfos[i];
|
||||
rtDispatchGlobalsInfos[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -722,39 +739,89 @@ void Device::getAdapterMask(uint32_t &nodeMask) {
|
||||
}
|
||||
|
||||
void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
DEBUG_BREAK_IF(rtDispatchGlobals.size() < maxBvhLevels + 1);
|
||||
DEBUG_BREAK_IF(rtDispatchGlobals[maxBvhLevels] != nullptr);
|
||||
UNRECOVERABLE_IF(rtDispatchGlobalsInfos.size() < maxBvhLevels + 1);
|
||||
UNRECOVERABLE_IF(rtDispatchGlobalsInfos[maxBvhLevels] != nullptr);
|
||||
|
||||
uint32_t extraBytesLocal = 0;
|
||||
uint32_t extraBytesGlobal = 0;
|
||||
auto size = RayTracingHelper::getDispatchGlobalSize(*this, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
||||
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, AllocationType::BUFFER, getDeviceBitfield()});
|
||||
|
||||
if (nullptr == dispatchGlobalsAllocation) {
|
||||
const auto deviceCount = HwHelper::getSubDevicesCount(executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getHardwareInfo());
|
||||
|
||||
auto dispatchGlobalsInfo = new RTDispatchGlobalsInfo(nullptr);
|
||||
if (dispatchGlobalsInfo == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct RTDispatchGlobals dispatchGlobals = {0};
|
||||
|
||||
auto numRtStacks = RayTracingHelper::getNumRtStacks(*this);
|
||||
auto stackSizePerRay = RayTracingHelper::getStackSizePerRay(maxBvhLevels, 0);
|
||||
size_t rtMemOffset = alignUp(stackSizePerRay * numRtStacks, MemoryConstants::cacheLineSize);
|
||||
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
dispatchGlobals.rtMemBasePtr = rtMemOffset;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
dispatchGlobals.stackSizePerRay = stackSizePerRay / 64;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
std::vector<uint64_t> gpuAddressVector;
|
||||
bool allocFailed = false;
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *dispatchGlobalsAllocation),
|
||||
for (unsigned int tile = 0; tile < deviceCount; tile++) {
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
allocProps.flags.resource48Bit = true;
|
||||
allocProps.flags.isUSMDeviceAllocation = true;
|
||||
|
||||
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
|
||||
if (dispatchGlobalsAllocation == nullptr) {
|
||||
allocFailed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto dispatchGlobalsPtr = dispatchGlobalsAllocation->getGpuAddress();
|
||||
struct RTDispatchGlobals dispatchGlobals = {0};
|
||||
|
||||
dispatchGlobals.rtMemBasePtr = size + dispatchGlobalsPtr;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
dispatchGlobals.stackSizePerRay = 0;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
dispatchGlobalsAsArray[7] = 1;
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsAllocation),
|
||||
*this,
|
||||
dispatchGlobalsAllocation,
|
||||
0,
|
||||
&dispatchGlobals,
|
||||
sizeof(RTDispatchGlobals));
|
||||
|
||||
dispatchGlobalsInfo->rtDispatchGlobals.push_back(dispatchGlobalsAllocation);
|
||||
gpuAddressVector.push_back(dispatchGlobalsAllocation->getGpuAddress());
|
||||
}
|
||||
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
||||
size_t arrayAllocSize = sizeof(uint64_t) * deviceCount;
|
||||
|
||||
if (!allocFailed) {
|
||||
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, arrayAllocSize,
|
||||
AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
arrayAllocProps.flags.resource48Bit = true;
|
||||
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
||||
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
}
|
||||
|
||||
if (dispatchGlobalsArrayAllocation == nullptr) {
|
||||
for (auto allocation : dispatchGlobalsInfo->rtDispatchGlobals) {
|
||||
getMemoryManager()->freeGraphicsMemory(allocation);
|
||||
}
|
||||
delete dispatchGlobalsInfo;
|
||||
return;
|
||||
}
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
|
||||
*this,
|
||||
dispatchGlobalsAllocation,
|
||||
dispatchGlobalsArrayAllocation,
|
||||
0,
|
||||
&dispatchGlobals,
|
||||
sizeof(RTDispatchGlobals));
|
||||
gpuAddressVector.data(),
|
||||
arrayAllocSize);
|
||||
|
||||
rtDispatchGlobals[maxBvhLevels] = dispatchGlobalsAllocation;
|
||||
dispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = dispatchGlobalsArrayAllocation;
|
||||
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -40,6 +40,13 @@ struct EngineGroupT {
|
||||
};
|
||||
using EngineGroupsT = std::vector<EngineGroupT>;
|
||||
|
||||
struct RTDispatchGlobalsInfo {
|
||||
RTDispatchGlobalsInfo(GraphicsAllocation *rtDispatchGlobalsArrayAllocation)
|
||||
: rtDispatchGlobalsArrayAllocation(rtDispatchGlobalsArrayAllocation){};
|
||||
std::vector<GraphicsAllocation *> rtDispatchGlobals; // per tile
|
||||
GraphicsAllocation *rtDispatchGlobalsArrayAllocation; // above array as visible from device
|
||||
};
|
||||
|
||||
class Device : public ReferenceTrackedObject<Device> {
|
||||
public:
|
||||
Device &operator=(const Device &) = delete;
|
||||
@@ -130,9 +137,10 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
|
||||
std::unique_ptr<SyncBufferHandler> syncBufferHandler;
|
||||
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
|
||||
GraphicsAllocation *getRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; }
|
||||
void initializeRayTracing(uint32_t maxBvhLevels);
|
||||
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
|
||||
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
|
||||
const std::vector<SubDevice *> getSubDevices() const { return subdevices; }
|
||||
@@ -174,7 +182,6 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
virtual bool genericSubDevicesAllowed();
|
||||
bool engineInstancedSubDevicesAllowed();
|
||||
void setAsEngineInstanced();
|
||||
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
void finalizeRayTracing();
|
||||
|
||||
DeviceInfo deviceInfo = {};
|
||||
@@ -206,7 +213,8 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
uintptr_t specializedDevice = reinterpret_cast<uintptr_t>(nullptr);
|
||||
|
||||
GraphicsAllocation *rtMemoryBackedBuffer = nullptr;
|
||||
std::vector<GraphicsAllocation *> rtDispatchGlobals;
|
||||
std::vector<RTDispatchGlobalsInfo *> rtDispatchGlobalsInfos;
|
||||
|
||||
struct {
|
||||
bool isValid = false;
|
||||
std::array<uint8_t, HwInfoConfig::uuidSize> id;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
@@ -411,6 +411,10 @@ inline bool decodeToken(const SPatchItemHeader *token, KernelFromPatchtokens &ou
|
||||
case PATCH_TOKEN_ALLOCATE_SYNC_BUFFER: {
|
||||
assignToken(out.tokens.allocateSyncBuffer, token);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_RT_GLOBAL_BUFFER:
|
||||
assignToken(out.tokens.allocateRTGlobalBuffer, token);
|
||||
break;
|
||||
}
|
||||
|
||||
return out.decodeStatus != DecodeError::InvalidBinary;
|
||||
|
||||
@@ -25,7 +25,7 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
||||
static constexpr uint32_t maxBvhLevels = 8;
|
||||
|
||||
static size_t getDispatchGlobalSize(const Device &device, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
return static_cast<size_t>(alignUp(getRtGlobalsSize(), MemoryConstants::cacheLineSize) +
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
|
||||
getStackSizePerRay(maxBvhLevel, extraBytesLocal) * getNumRtStacks(device) +
|
||||
extraBytesGlobal);
|
||||
}
|
||||
@@ -38,8 +38,6 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
||||
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / KB) - 1);
|
||||
}
|
||||
|
||||
static size_t getRtGlobalsSize() { return sizeof(RTDispatchGlobals); }
|
||||
|
||||
static uint32_t getNumRtStacks(const Device &device) {
|
||||
return device.getHardwareInfo().gtSystemInfo.DualSubSliceCount * stackDssMultiplier;
|
||||
}
|
||||
@@ -53,9 +51,7 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
||||
}
|
||||
|
||||
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {
|
||||
return alignUp((hitInfoSize + bvhStackSize * maxBvhLevel +
|
||||
extraBytesLocal),
|
||||
MemoryConstants::cacheLineSize);
|
||||
return hitInfoSize + bvhStackSize * maxBvhLevel + extraBytesLocal;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -48,7 +48,6 @@ class MockDevice : public RootDevice {
|
||||
public:
|
||||
using Device::addEngineToEngineGroup;
|
||||
using Device::allEngines;
|
||||
using Device::allocateRTDispatchGlobals;
|
||||
using Device::commandStreamReceivers;
|
||||
using Device::createDeviceInternals;
|
||||
using Device::createEngine;
|
||||
@@ -165,6 +164,28 @@ class MockDevice : public RootDevice {
|
||||
|
||||
bool verifyAdapterLuid() override;
|
||||
|
||||
void finalizeRayTracing() {
|
||||
for (unsigned int i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
|
||||
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
|
||||
if (rtDispatchGlobalsForceAllocation == true && rtDispatchGlobalsInfo != nullptr) {
|
||||
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
|
||||
delete rtDispatchGlobalsInfo->rtDispatchGlobals[j];
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
|
||||
}
|
||||
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation;
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
|
||||
delete rtDispatchGlobalsInfos[i];
|
||||
rtDispatchGlobalsInfos[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Device::finalizeRayTracing();
|
||||
}
|
||||
|
||||
void setRTDispatchGlobalsForceAllocation() {
|
||||
rtDispatchGlobalsForceAllocation = true;
|
||||
}
|
||||
|
||||
static decltype(&createCommandStream) createCommandStreamReceiverFunc;
|
||||
|
||||
bool isDebuggerActiveParentCall = true;
|
||||
@@ -173,6 +194,7 @@ class MockDevice : public RootDevice {
|
||||
bool callBaseVerifyAdapterLuid = true;
|
||||
bool verifyAdapterLuidReturnValue = true;
|
||||
size_t maxParameterSizeFromIGC = 0u;
|
||||
bool rtDispatchGlobalsForceAllocation = true;
|
||||
};
|
||||
|
||||
template <>
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "shared/test/common/mocks/mock_compiler_interface.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -94,6 +95,19 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenRTDispatchGlobalsIsA
|
||||
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsThenRTDispatchGlobalsInfoIsNull) {
|
||||
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
|
||||
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *pDevice->getExecutionEnvironment());
|
||||
pDevice->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
|
||||
|
||||
pDevice->initializeRayTracing(5);
|
||||
auto rtDispatchGlobalsInfo = pDevice->getRTDispatchGlobals(5);
|
||||
|
||||
EXPECT_EQ(nullptr, rtDispatchGlobalsInfo);
|
||||
|
||||
pDevice->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, GivenDeviceWhenGenerateUuidThenValidValuesAreSet) {
|
||||
std::array<uint8_t, HwInfoConfig::uuidSize> uuid, expectedUuid;
|
||||
pDevice->generateUuid(uuid);
|
||||
@@ -369,3 +383,32 @@ TEST_F(DeviceGetCapsTest, givenFlagEnabled64kbPagesWhenCallConstructorMemoryMana
|
||||
memoryManager.reset(new MockMemoryManager(executionEnvironment));
|
||||
EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsOnSecondSubDeviceThenRtDispatchGlobalsInfoIsNull) {
|
||||
class FailMockMemoryManager : public MockMemoryManager {
|
||||
public:
|
||||
FailMockMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : MockMemoryManager(false, false, executionEnvironment) {}
|
||||
|
||||
GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) {
|
||||
allocateGraphicsMemoryWithPropertiesCount++;
|
||||
if (allocateGraphicsMemoryWithPropertiesCount > 2) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return MockMemoryManager::allocateGraphicsMemoryWithProperties(properties);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableWalkerPartition.set(-1);
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2u);
|
||||
|
||||
UltDeviceFactory deviceFactory{1, 2};
|
||||
ExecutionEnvironment &executionEnvironment = *deviceFactory.rootDevices[0]->executionEnvironment;
|
||||
executionEnvironment.memoryManager = std::make_unique<FailMockMemoryManager>(executionEnvironment);
|
||||
|
||||
deviceFactory.rootDevices[0]->initializeRayTracing(5);
|
||||
auto rtDispatchGlobalsInfo = deviceFactory.rootDevices[0]->getRTDispatchGlobals(5);
|
||||
|
||||
EXPECT_EQ(nullptr, rtDispatchGlobalsInfo);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -25,9 +25,7 @@ HWTEST2_F(CommandEncodeEnableRayTracing, whenEnableRayTracingIsProgrammedThen3DS
|
||||
MockGraphicsAllocation gfxAllocation(static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
|
||||
LinearStream stream(&gfxAllocation);
|
||||
|
||||
MockGraphicsAllocation memoryBackedBuffer(static_cast<void *>(pMemoryBackedBuffer), sizeof(pMemoryBackedBuffer));
|
||||
|
||||
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, memoryBackedBuffer);
|
||||
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, reinterpret_cast<uint64_t>(&pMemoryBackedBuffer));
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -23,7 +23,7 @@ HWTEST_F(CommandEncodeEnableRayTracing, programEnableRayTracing) {
|
||||
MockGraphicsAllocation gfxAllocation(static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
|
||||
LinearStream stream(&gfxAllocation);
|
||||
|
||||
MockGraphicsAllocation memoryBackedBuffer(static_cast<void *>(pMemoryBackedBuffer), sizeof(pMemoryBackedBuffer));
|
||||
uint64_t memoryBackedBuffer = reinterpret_cast<uint64_t>(&pMemoryBackedBuffer);
|
||||
|
||||
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, memoryBackedBuffer);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user