Allocate per-tile RTDispatchGlobals, handle ray tracing patch tokens.

Related-to: NEO-6711

Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
This commit is contained in:
Jim Snow
2022-07-21 18:44:54 +00:00
committed by Compute-Runtime-Automation
parent 0eb2001d8f
commit f4879f064f
17 changed files with 313 additions and 134 deletions

View File

@@ -301,7 +301,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
return ZE_RESULT_ERROR_UNINITIALIZED;
} else {
NEO::LinearStream *linearStream = commandContainer.getCommandStream();
NEO::EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(*linearStream, *memoryBackedBuffer);
NEO::EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(*linearStream, memoryBackedBuffer->getGpuAddress());
}
}

View File

@@ -904,23 +904,23 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
if (this->usesRayTracing()) {
if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobals == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
this->residencyContainer.push_back(rtDispatchGlobals);
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
static_cast<uintptr_t>(rtDispatchGlobals->getGpuAddressToPatch()));
} else {
neoDevice->initializeRayTracing(0);
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobalsInfo == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) {
this->residencyContainer.push_back(rtDispatchGlobals);
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch();
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
static_cast<uintptr_t>(address));
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
}
return ZE_RESULT_SUCCESS;

View File

@@ -732,53 +732,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_NE(nullptr, rtDispatchGlobals);
size_t residencySize = kernel->getResidencyContainer().size();
EXPECT_NE(0u, residencySize);
EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals);
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
}
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
std::make_unique<MockImmutableData>(32u);
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "rt_test";
auto immDataVector =
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
immDataVector->push_back(std::move(mockKernelImmutableData));
auto result = kernel->initialize(&kernelDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_EQ(nullptr, rtDispatchGlobals);
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
@@ -809,14 +765,94 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAll
immDataVector->push_back(std::move(mockKernelImmutableData));
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(nullptr);
neoDevice->rtDispatchGlobalsForceAllocation = false;
delete driverHandle->svmAllocsManager;
execEnv->memoryManager.reset(new FailMemoryManager(0, *execEnv));
driverHandle->setMemoryManager(execEnv->memoryManager.get());
driverHandle->svmAllocsManager = new NEO::SVMAllocsManager(execEnv->memoryManager.get(), false);
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(0, *neoDevice->executionEnvironment);
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
}
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArrayAllocationFailsThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
}
mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
std::make_unique<MockImmutableData>(32u);
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "rt_test";
auto immDataVector =
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
immDataVector->push_back(std::move(mockKernelImmutableData));
neoDevice->rtDispatchGlobalsForceAllocation = false;
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
// Ensure that allocating RTDispatchGlobals succeeds, but the array allocation fails.
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *neoDevice->executionEnvironment);
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
}
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArrayAllocationSucceedsThenRayTracingIsInitialized, IsPVC) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
}
mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
std::make_unique<MockImmutableData>(32u);
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "rt_test";
auto immDataVector =
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
immDataVector->push_back(std::move(mockKernelImmutableData));
neoDevice->rtDispatchGlobalsForceAllocation = false;
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
@@ -902,7 +938,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
EXPECT_NE(nullptr, rtDispatchGlobals);
auto dispatchGlobalsAddressPatched = *reinterpret_cast<uint64_t *>(ptrOffset(crossThreadData.get(), rtGlobalPointerPatchOffset));
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->getGpuAddressToPatch());
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobals[0]->getGpuAddressToPatch());
EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
kernel->crossThreadData.release();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -37,17 +37,14 @@ TEST(RayTracingHelperTests, whenGlobalDispatchSizeIsRequestedThenCorrectValueIsR
MockContext context(&device);
uint32_t maxBvhLevel = 2;
uint32_t extraBytesPerThread = 20;
uint32_t extraBytesLocal = 20;
uint32_t extraBytesGlobal = 100;
size_t expectedSize = alignUp(RayTracingHelper::getRtGlobalsSize(), MemoryConstants::cacheLineSize) +
alignUp((RayTracingHelper::hitInfoSize +
RayTracingHelper::bvhStackSize * maxBvhLevel +
extraBytesPerThread),
MemoryConstants::cacheLineSize) *
context.getDevice(0)->getHardwareInfo().gtSystemInfo.DualSubSliceCount * RayTracingHelper::stackDssMultiplier +
size_t expectedSize = alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
(RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesLocal) * RayTracingHelper::getNumRtStacks(device.getDevice()) +
extraBytesGlobal;
EXPECT_EQ(expectedSize, RayTracingHelper::getDispatchGlobalSize(device.getDevice(), maxBvhLevel, extraBytesPerThread, extraBytesGlobal));
size_t size = RayTracingHelper::getDispatchGlobalSize(device.getDevice(), maxBvhLevel, extraBytesLocal, extraBytesGlobal);
EXPECT_EQ(expectedSize, size);
}
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsReturned) {
@@ -60,6 +57,16 @@ TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsRe
EXPECT_EQ(expectedValue, numDssRtStacks);
}
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
MockDevice device;
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
uint32_t subsliceCount = device.getHardwareInfo().gtSystemInfo.DualSubSliceCount;
EXPECT_EQ(numDssRtStacks, numDssRtStacksPerDss * subsliceCount);
}
TEST(RayTracingHelperTests, whenNumDssIsRequestedThenCorrectValueIsReturned) {
MockDevice device;
EXPECT_EQ(device.getHardwareInfo().gtSystemInfo.DualSubSliceCount, RayTracingHelper::getNumDss(device));
@@ -72,9 +79,7 @@ TEST(RayTracingHelperTests, whenStackSizePerRayIsRequestedThenCorrectValueIsRetu
uint32_t maxBvhLevel = 1234;
uint32_t extraBytesLocal = 5678;
uint32_t expectedValue = alignUp((RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel +
extraBytesLocal),
MemoryConstants::cacheLineSize);
uint32_t expectedValue = RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesLocal;
EXPECT_EQ(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal), expectedValue);
}

View File

@@ -422,7 +422,7 @@ struct EncodeMiArbCheck {
template <typename GfxFamily>
struct EncodeEnableRayTracing {
static void programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer);
static void programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer);
static void append3dStateBtd(void *ptr3dStateBtd);
};

View File

@@ -477,7 +477,7 @@ void EncodeSempahore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
}
template <typename GfxFamily>
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
}
template <typename Family>

View File

@@ -11,7 +11,7 @@
namespace NEO {
template <typename GfxFamily>
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
}
} // namespace NEO

View File

@@ -25,10 +25,10 @@ void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfa
}
template <typename GfxFamily>
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
auto cmd = GfxFamily::cmd3dStateBtd;
cmd.getBtdStateBody().setPerDssMemoryBackedBufferSize(static_cast<typename GfxFamily::_3DSTATE_BTD_BODY::PER_DSS_MEMORY_BACKED_BUFFER_SIZE>(RayTracingHelper::getMemoryBackedFifoSizeToPatch()));
cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(backBuffer.getGpuAddress());
cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(backBuffer);
append3dStateBtd(&cmd);
*commandStream.getSpaceForCmd<typename GfxFamily::_3DSTATE_BTD>() = cmd;
}

View File

@@ -609,19 +609,19 @@ EngineControl *Device::getInternalCopyEngine() {
return nullptr;
}
GraphicsAllocation *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
if (rtDispatchGlobals.size() == 0) {
RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
if (rtDispatchGlobalsInfos.size() == 0) {
return nullptr;
}
size_t last = rtDispatchGlobals.size() - 1;
size_t last = rtDispatchGlobalsInfos.size() - 1;
if (maxBvhLevels > last) {
return nullptr;
}
for (size_t i = last; i >= maxBvhLevels; i--) {
if (rtDispatchGlobals[i] != nullptr) {
return rtDispatchGlobals[i];
if (rtDispatchGlobalsInfos[i] != nullptr) {
return rtDispatchGlobalsInfos[i];
}
if (i == 0) {
@@ -630,17 +630,22 @@ GraphicsAllocation *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
}
allocateRTDispatchGlobals(maxBvhLevels);
return rtDispatchGlobals[maxBvhLevels];
return rtDispatchGlobalsInfos[maxBvhLevels];
}
void Device::initializeRayTracing(uint32_t maxBvhLevels) {
if (rtMemoryBackedBuffer == nullptr) {
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, AllocationType::BUFFER, getDeviceBitfield()});
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
allocProps.flags.resource48Bit = true;
allocProps.flags.isUSMDeviceAllocation = true;
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
}
while (rtDispatchGlobals.size() <= maxBvhLevels) {
rtDispatchGlobals.push_back(nullptr);
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
rtDispatchGlobalsInfos.push_back(nullptr);
}
}
@@ -648,9 +653,21 @@ void Device::finalizeRayTracing() {
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
rtMemoryBackedBuffer = nullptr;
for (size_t i = 0; i < rtDispatchGlobals.size(); i++) {
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobals[i]);
rtDispatchGlobals[i] = nullptr;
for (size_t i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
if (rtDispatchGlobalsInfo == nullptr) {
continue;
}
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobals[j]);
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
}
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation);
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
delete rtDispatchGlobalsInfos[i];
rtDispatchGlobalsInfos[i] = nullptr;
}
}
@@ -722,39 +739,89 @@ void Device::getAdapterMask(uint32_t &nodeMask) {
}
void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
DEBUG_BREAK_IF(rtDispatchGlobals.size() < maxBvhLevels + 1);
DEBUG_BREAK_IF(rtDispatchGlobals[maxBvhLevels] != nullptr);
UNRECOVERABLE_IF(rtDispatchGlobalsInfos.size() < maxBvhLevels + 1);
UNRECOVERABLE_IF(rtDispatchGlobalsInfos[maxBvhLevels] != nullptr);
uint32_t extraBytesLocal = 0;
uint32_t extraBytesGlobal = 0;
auto size = RayTracingHelper::getDispatchGlobalSize(*this, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, AllocationType::BUFFER, getDeviceBitfield()});
if (nullptr == dispatchGlobalsAllocation) {
const auto deviceCount = HwHelper::getSubDevicesCount(executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getHardwareInfo());
auto dispatchGlobalsInfo = new RTDispatchGlobalsInfo(nullptr);
if (dispatchGlobalsInfo == nullptr) {
return;
}
struct RTDispatchGlobals dispatchGlobals = {0};
auto numRtStacks = RayTracingHelper::getNumRtStacks(*this);
auto stackSizePerRay = RayTracingHelper::getStackSizePerRay(maxBvhLevels, 0);
size_t rtMemOffset = alignUp(stackSizePerRay * numRtStacks, MemoryConstants::cacheLineSize);
auto &hwInfo = getHardwareInfo();
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
dispatchGlobals.rtMemBasePtr = rtMemOffset;
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
dispatchGlobals.stackSizePerRay = stackSizePerRay / 64;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
dispatchGlobals.maxBVHLevels = maxBvhLevels;
std::vector<uint64_t> gpuAddressVector;
bool allocFailed = false;
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *dispatchGlobalsAllocation),
for (unsigned int tile = 0; tile < deviceCount; tile++) {
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
allocProps.flags.resource48Bit = true;
allocProps.flags.isUSMDeviceAllocation = true;
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
if (dispatchGlobalsAllocation == nullptr) {
allocFailed = true;
break;
}
auto dispatchGlobalsPtr = dispatchGlobalsAllocation->getGpuAddress();
struct RTDispatchGlobals dispatchGlobals = {0};
dispatchGlobals.rtMemBasePtr = size + dispatchGlobalsPtr;
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
dispatchGlobals.stackSizePerRay = 0;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
dispatchGlobals.maxBVHLevels = maxBvhLevels;
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
dispatchGlobalsAsArray[7] = 1;
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsAllocation),
*this,
dispatchGlobalsAllocation,
0,
&dispatchGlobals,
sizeof(RTDispatchGlobals));
dispatchGlobalsInfo->rtDispatchGlobals.push_back(dispatchGlobalsAllocation);
gpuAddressVector.push_back(dispatchGlobalsAllocation->getGpuAddress());
}
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
size_t arrayAllocSize = sizeof(uint64_t) * deviceCount;
if (!allocFailed) {
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, arrayAllocSize,
AllocationType::BUFFER, true, getDeviceBitfield());
arrayAllocProps.flags.resource48Bit = true;
arrayAllocProps.flags.isUSMDeviceAllocation = true;
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
}
if (dispatchGlobalsArrayAllocation == nullptr) {
for (auto allocation : dispatchGlobalsInfo->rtDispatchGlobals) {
getMemoryManager()->freeGraphicsMemory(allocation);
}
delete dispatchGlobalsInfo;
return;
}
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
*this,
dispatchGlobalsAllocation,
dispatchGlobalsArrayAllocation,
0,
&dispatchGlobals,
sizeof(RTDispatchGlobals));
gpuAddressVector.data(),
arrayAllocSize);
rtDispatchGlobals[maxBvhLevels] = dispatchGlobalsAllocation;
dispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = dispatchGlobalsArrayAllocation;
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo;
}
} // namespace NEO

View File

@@ -40,6 +40,13 @@ struct EngineGroupT {
};
using EngineGroupsT = std::vector<EngineGroupT>;
struct RTDispatchGlobalsInfo {
RTDispatchGlobalsInfo(GraphicsAllocation *rtDispatchGlobalsArrayAllocation)
: rtDispatchGlobalsArrayAllocation(rtDispatchGlobalsArrayAllocation){};
std::vector<GraphicsAllocation *> rtDispatchGlobals; // per tile
GraphicsAllocation *rtDispatchGlobalsArrayAllocation; // above array as visible from device
};
class Device : public ReferenceTrackedObject<Device> {
public:
Device &operator=(const Device &) = delete;
@@ -130,9 +137,10 @@ class Device : public ReferenceTrackedObject<Device> {
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
std::unique_ptr<SyncBufferHandler> syncBufferHandler;
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
GraphicsAllocation *getRTDispatchGlobals(uint32_t maxBvhLevels);
RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels);
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; }
void initializeRayTracing(uint32_t maxBvhLevels);
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
const std::vector<SubDevice *> getSubDevices() const { return subdevices; }
@@ -174,7 +182,6 @@ class Device : public ReferenceTrackedObject<Device> {
virtual bool genericSubDevicesAllowed();
bool engineInstancedSubDevicesAllowed();
void setAsEngineInstanced();
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
void finalizeRayTracing();
DeviceInfo deviceInfo = {};
@@ -206,7 +213,8 @@ class Device : public ReferenceTrackedObject<Device> {
uintptr_t specializedDevice = reinterpret_cast<uintptr_t>(nullptr);
GraphicsAllocation *rtMemoryBackedBuffer = nullptr;
std::vector<GraphicsAllocation *> rtDispatchGlobals;
std::vector<RTDispatchGlobalsInfo *> rtDispatchGlobalsInfos;
struct {
bool isValid = false;
std::array<uint8_t, HwInfoConfig::uuidSize> id;

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2021 Intel Corporation
# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#

View File

@@ -411,6 +411,10 @@ inline bool decodeToken(const SPatchItemHeader *token, KernelFromPatchtokens &ou
case PATCH_TOKEN_ALLOCATE_SYNC_BUFFER: {
assignToken(out.tokens.allocateSyncBuffer, token);
} break;
case PATCH_TOKEN_ALLOCATE_RT_GLOBAL_BUFFER:
assignToken(out.tokens.allocateRTGlobalBuffer, token);
break;
}
return out.decodeStatus != DecodeError::InvalidBinary;

View File

@@ -25,7 +25,7 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
static constexpr uint32_t maxBvhLevels = 8;
static size_t getDispatchGlobalSize(const Device &device, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
return static_cast<size_t>(alignUp(getRtGlobalsSize(), MemoryConstants::cacheLineSize) +
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
getStackSizePerRay(maxBvhLevel, extraBytesLocal) * getNumRtStacks(device) +
extraBytesGlobal);
}
@@ -38,8 +38,6 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / KB) - 1);
}
static size_t getRtGlobalsSize() { return sizeof(RTDispatchGlobals); }
static uint32_t getNumRtStacks(const Device &device) {
return device.getHardwareInfo().gtSystemInfo.DualSubSliceCount * stackDssMultiplier;
}
@@ -53,9 +51,7 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
}
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {
return alignUp((hitInfoSize + bvhStackSize * maxBvhLevel +
extraBytesLocal),
MemoryConstants::cacheLineSize);
return hitInfoSize + bvhStackSize * maxBvhLevel + extraBytesLocal;
}
};
} // namespace NEO

View File

@@ -48,7 +48,6 @@ class MockDevice : public RootDevice {
public:
using Device::addEngineToEngineGroup;
using Device::allEngines;
using Device::allocateRTDispatchGlobals;
using Device::commandStreamReceivers;
using Device::createDeviceInternals;
using Device::createEngine;
@@ -165,6 +164,28 @@ class MockDevice : public RootDevice {
bool verifyAdapterLuid() override;
void finalizeRayTracing() {
for (unsigned int i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
if (rtDispatchGlobalsForceAllocation == true && rtDispatchGlobalsInfo != nullptr) {
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
delete rtDispatchGlobalsInfo->rtDispatchGlobals[j];
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
}
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation;
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
delete rtDispatchGlobalsInfos[i];
rtDispatchGlobalsInfos[i] = nullptr;
}
}
Device::finalizeRayTracing();
}
void setRTDispatchGlobalsForceAllocation() {
rtDispatchGlobalsForceAllocation = true;
}
static decltype(&createCommandStream) createCommandStreamReceiverFunc;
bool isDebuggerActiveParentCall = true;
@@ -173,6 +194,7 @@ class MockDevice : public RootDevice {
bool callBaseVerifyAdapterLuid = true;
bool verifyAdapterLuidReturnValue = true;
size_t maxParameterSizeFromIGC = 0u;
bool rtDispatchGlobalsForceAllocation = true;
};
template <>

View File

@@ -14,6 +14,7 @@
#include "shared/test/common/mocks/mock_compiler_interface.h"
#include "shared/test/common/mocks/mock_compilers.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/test.h"
@@ -94,6 +95,19 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenRTDispatchGlobalsIsA
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
}
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsThenRTDispatchGlobalsInfoIsNull) {
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *pDevice->getExecutionEnvironment());
pDevice->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
pDevice->initializeRayTracing(5);
auto rtDispatchGlobalsInfo = pDevice->getRTDispatchGlobals(5);
EXPECT_EQ(nullptr, rtDispatchGlobalsInfo);
pDevice->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
}
TEST_F(DeviceTest, GivenDeviceWhenGenerateUuidThenValidValuesAreSet) {
std::array<uint8_t, HwInfoConfig::uuidSize> uuid, expectedUuid;
pDevice->generateUuid(uuid);
@@ -369,3 +383,32 @@ TEST_F(DeviceGetCapsTest, givenFlagEnabled64kbPagesWhenCallConstructorMemoryMana
memoryManager.reset(new MockMemoryManager(executionEnvironment));
EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u));
}
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsOnSecondSubDeviceThenRtDispatchGlobalsInfoIsNull) {
class FailMockMemoryManager : public MockMemoryManager {
public:
FailMockMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : MockMemoryManager(false, false, executionEnvironment) {}
GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) {
allocateGraphicsMemoryWithPropertiesCount++;
if (allocateGraphicsMemoryWithPropertiesCount > 2) {
return nullptr;
} else {
return MockMemoryManager::allocateGraphicsMemoryWithProperties(properties);
}
}
};
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(-1);
DebugManager.flags.CreateMultipleSubDevices.set(2u);
UltDeviceFactory deviceFactory{1, 2};
ExecutionEnvironment &executionEnvironment = *deviceFactory.rootDevices[0]->executionEnvironment;
executionEnvironment.memoryManager = std::make_unique<FailMockMemoryManager>(executionEnvironment);
deviceFactory.rootDevices[0]->initializeRayTracing(5);
auto rtDispatchGlobalsInfo = deviceFactory.rootDevices[0]->getRTDispatchGlobals(5);
EXPECT_EQ(nullptr, rtDispatchGlobalsInfo);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,7 @@ HWTEST2_F(CommandEncodeEnableRayTracing, whenEnableRayTracingIsProgrammedThen3DS
MockGraphicsAllocation gfxAllocation(static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
MockGraphicsAllocation memoryBackedBuffer(static_cast<void *>(pMemoryBackedBuffer), sizeof(pMemoryBackedBuffer));
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, memoryBackedBuffer);
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, reinterpret_cast<uint64_t>(&pMemoryBackedBuffer));
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@ HWTEST_F(CommandEncodeEnableRayTracing, programEnableRayTracing) {
MockGraphicsAllocation gfxAllocation(static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
MockGraphicsAllocation memoryBackedBuffer(static_cast<void *>(pMemoryBackedBuffer), sizeof(pMemoryBackedBuffer));
uint64_t memoryBackedBuffer = reinterpret_cast<uint64_t>(&pMemoryBackedBuffer);
EncodeEnableRayTracing<FamilyType>::programEnableRayTracing(stream, memoryBackedBuffer);
}