Revert "Allocate RTDispatchGlobals as unboxed array"

This reverts commit eaa4965ae8.

Signed-off-by: Jim Snow <jim.m.snow@intel.com>
This commit is contained in:
Jim Snow
2022-10-22 04:28:30 +00:00
committed by Compute-Runtime-Automation
parent d653779098
commit f976c7a313
8 changed files with 68 additions and 94 deletions

View File

@@ -663,13 +663,13 @@ void Device::finalizeRayTracing() {
if (rtDispatchGlobalsInfo == nullptr) {
continue;
}
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) {
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtStacks[j]);
rtDispatchGlobalsInfo->rtStacks[j] = nullptr;
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobals[j]);
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
}
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr;
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation);
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
delete rtDispatchGlobalsInfos[i];
rtDispatchGlobalsInfos[i] = nullptr;
@@ -749,16 +749,11 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
uint32_t extraBytesLocal = 0;
uint32_t extraBytesGlobal = 0;
uint32_t dispatchGlobalsStride = MemoryConstants::pageSize64k;
UNRECOVERABLE_IF(RayTracingHelper::getDispatchGlobalSize() > dispatchGlobalsStride);
bool allocFailed = false;
auto size = RayTracingHelper::getDispatchGlobalSize(*this, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
const auto deviceCount = HwHelper::getSubDevicesCount(executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getHardwareInfo());
auto dispatchGlobalsSize = deviceCount * dispatchGlobalsStride;
auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, deviceCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique<RTDispatchGlobalsInfo>();
auto dispatchGlobalsInfo = new RTDispatchGlobalsInfo(nullptr);
if (dispatchGlobalsInfo == nullptr) {
return;
}
@@ -766,38 +761,25 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
auto &hwInfo = getHardwareInfo();
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize,
AllocationType::BUFFER, true, getDeviceBitfield());
arrayAllocProps.flags.resource48Bit = true;
arrayAllocProps.flags.isUSMDeviceAllocation = true;
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
if (dispatchGlobalsArrayAllocation == nullptr) {
return;
}
std::vector<uint64_t> gpuAddressVector;
bool allocFailed = false;
for (unsigned int tile = 0; tile < deviceCount; tile++) {
DeviceBitfield deviceBitfield =
(deviceCount == 1)
? this->getDeviceBitfield()
: subdevices[tile]->getDeviceBitfield();
AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::BUFFER, true, deviceBitfield);
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
allocProps.flags.resource48Bit = true;
allocProps.flags.isUSMDeviceAllocation = true;
auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
if (rtStackAllocation == nullptr) {
if (dispatchGlobalsAllocation == nullptr) {
allocFailed = true;
break;
}
auto dispatchGlobalsPtr = dispatchGlobalsAllocation->getGpuAddress();
struct RTDispatchGlobals dispatchGlobals = {0};
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress();
dispatchGlobals.rtMemBasePtr = size + dispatchGlobalsPtr;
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
dispatchGlobals.stackSizePerRay = 0;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
@@ -806,27 +788,45 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
dispatchGlobalsAsArray[7] = 1;
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsAllocation),
*this,
dispatchGlobalsArrayAllocation,
tile * dispatchGlobalsStride,
dispatchGlobalsAllocation,
0,
&dispatchGlobals,
sizeof(RTDispatchGlobals));
dispatchGlobalsInfo->rtStacks.push_back(rtStackAllocation);
dispatchGlobalsInfo->rtDispatchGlobals.push_back(dispatchGlobalsAllocation);
gpuAddressVector.push_back(dispatchGlobalsAllocation->getGpuAddress());
}
if (allocFailed) {
for (auto allocation : dispatchGlobalsInfo->rtStacks) {
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
size_t arrayAllocSize = sizeof(uint64_t) * deviceCount;
if (!allocFailed) {
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, arrayAllocSize,
AllocationType::BUFFER, true, getDeviceBitfield());
arrayAllocProps.flags.resource48Bit = true;
arrayAllocProps.flags.isUSMDeviceAllocation = true;
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
}
if (dispatchGlobalsArrayAllocation == nullptr) {
for (auto allocation : dispatchGlobalsInfo->rtDispatchGlobals) {
getMemoryManager()->freeGraphicsMemory(allocation);
}
getMemoryManager()->freeGraphicsMemory(dispatchGlobalsArrayAllocation);
delete dispatchGlobalsInfo;
return;
}
dispatchGlobalsInfo->rtDispatchGlobalsArray = dispatchGlobalsArrayAllocation;
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo.release();
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
*this,
dispatchGlobalsArrayAllocation,
0,
gpuAddressVector.data(),
arrayAllocSize);
dispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = dispatchGlobalsArrayAllocation;
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo;
}
} // namespace NEO

View File

@@ -41,8 +41,10 @@ struct EngineGroupT {
using EngineGroupsT = std::vector<EngineGroupT>;
struct RTDispatchGlobalsInfo {
GraphicsAllocation *rtDispatchGlobalsArray = nullptr;
std::vector<GraphicsAllocation *> rtStacks; // per tile
RTDispatchGlobalsInfo(GraphicsAllocation *rtDispatchGlobalsArrayAllocation)
: rtDispatchGlobalsArrayAllocation(rtDispatchGlobalsArrayAllocation){};
std::vector<GraphicsAllocation *> rtDispatchGlobals; // per tile
GraphicsAllocation *rtDispatchGlobalsArrayAllocation; // above array as visible from device
};
class Device : public ReferenceTrackedObject<Device> {

View File

@@ -24,12 +24,10 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * KB;
static constexpr uint32_t maxBvhLevels = 8;
static size_t getDispatchGlobalSize() {
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
}
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
return static_cast<size_t>(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device) / tiles) + extraBytesGlobal);
static size_t getDispatchGlobalSize(const Device &device, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
getStackSizePerRay(maxBvhLevel, extraBytesLocal) * getNumRtStacks(device) +
extraBytesGlobal);
}
static size_t getTotalMemoryBackedFifoSize(const Device &device) {

View File

@@ -168,12 +168,12 @@ class MockDevice : public RootDevice {
for (unsigned int i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
if (rtDispatchGlobalsForceAllocation == true && rtDispatchGlobalsInfo != nullptr) {
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) {
delete rtDispatchGlobalsInfo->rtStacks[j];
rtDispatchGlobalsInfo->rtStacks[j] = nullptr;
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
delete rtDispatchGlobalsInfo->rtDispatchGlobals[j];
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
}
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArray;
rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr;
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation;
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
delete rtDispatchGlobalsInfos[i];
rtDispatchGlobalsInfos[i] = nullptr;
}

View File

@@ -98,26 +98,6 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenRTDispatchGlobalsIsA
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
}
HWTEST2_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledAndRTStackAllocationFailsRTDispatchGlobalsIsNotAllocated, IsPVC) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.CreateMultipleSubDevices.set(2);
pDevice->deviceBitfield = 3;
pDevice->subdevices.push_back(new SubDevice(pDevice->executionEnvironment, 0, *pDevice));
pDevice->subdevices.push_back(new SubDevice(pDevice->executionEnvironment, 1, *pDevice));
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
otherMemoryManager = std::make_unique<NEO::MockMemoryManagerWithCapacity>(*pDevice->executionEnvironment);
static_cast<NEO::MockMemoryManagerWithCapacity &>(*otherMemoryManager).capacity = 25000000;
pDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
pDevice->initializeRayTracing(5);
EXPECT_EQ(nullptr, pDevice->getRTDispatchGlobals(3));
pDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
}
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsThenRTDispatchGlobalsInfoIsNull) {
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *pDevice->getExecutionEnvironment());
@@ -644,4 +624,4 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceC
auto device = deviceFactory.rootDevices[0];
auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver;
EXPECT_EQ(0u, csr->peekLatestSentTaskCount());
}
}