mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Revert "Allocate RTDispatchGlobals as unboxed array"
This reverts commit eaa4965ae8.
Signed-off-by: Jim Snow <jim.m.snow@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d653779098
commit
f976c7a313
@@ -663,13 +663,13 @@ void Device::finalizeRayTracing() {
|
||||
if (rtDispatchGlobalsInfo == nullptr) {
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) {
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtStacks[j]);
|
||||
rtDispatchGlobalsInfo->rtStacks[j] = nullptr;
|
||||
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobals[j]);
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
|
||||
}
|
||||
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr;
|
||||
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation);
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
|
||||
|
||||
delete rtDispatchGlobalsInfos[i];
|
||||
rtDispatchGlobalsInfos[i] = nullptr;
|
||||
@@ -749,16 +749,11 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
|
||||
uint32_t extraBytesLocal = 0;
|
||||
uint32_t extraBytesGlobal = 0;
|
||||
uint32_t dispatchGlobalsStride = MemoryConstants::pageSize64k;
|
||||
UNRECOVERABLE_IF(RayTracingHelper::getDispatchGlobalSize() > dispatchGlobalsStride);
|
||||
|
||||
bool allocFailed = false;
|
||||
auto size = RayTracingHelper::getDispatchGlobalSize(*this, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
||||
|
||||
const auto deviceCount = HwHelper::getSubDevicesCount(executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getHardwareInfo());
|
||||
auto dispatchGlobalsSize = deviceCount * dispatchGlobalsStride;
|
||||
auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, deviceCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
||||
|
||||
std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique<RTDispatchGlobalsInfo>();
|
||||
auto dispatchGlobalsInfo = new RTDispatchGlobalsInfo(nullptr);
|
||||
if (dispatchGlobalsInfo == nullptr) {
|
||||
return;
|
||||
}
|
||||
@@ -766,38 +761,25 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
||||
|
||||
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize,
|
||||
AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
arrayAllocProps.flags.resource48Bit = true;
|
||||
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
||||
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
|
||||
if (dispatchGlobalsArrayAllocation == nullptr) {
|
||||
return;
|
||||
}
|
||||
std::vector<uint64_t> gpuAddressVector;
|
||||
bool allocFailed = false;
|
||||
|
||||
for (unsigned int tile = 0; tile < deviceCount; tile++) {
|
||||
DeviceBitfield deviceBitfield =
|
||||
(deviceCount == 1)
|
||||
? this->getDeviceBitfield()
|
||||
: subdevices[tile]->getDeviceBitfield();
|
||||
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::BUFFER, true, deviceBitfield);
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
allocProps.flags.resource48Bit = true;
|
||||
allocProps.flags.isUSMDeviceAllocation = true;
|
||||
|
||||
auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
auto dispatchGlobalsAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
|
||||
if (rtStackAllocation == nullptr) {
|
||||
if (dispatchGlobalsAllocation == nullptr) {
|
||||
allocFailed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto dispatchGlobalsPtr = dispatchGlobalsAllocation->getGpuAddress();
|
||||
struct RTDispatchGlobals dispatchGlobals = {0};
|
||||
|
||||
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress();
|
||||
dispatchGlobals.rtMemBasePtr = size + dispatchGlobalsPtr;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
dispatchGlobals.stackSizePerRay = 0;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
|
||||
@@ -806,27 +788,45 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
dispatchGlobalsAsArray[7] = 1;
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsAllocation),
|
||||
*this,
|
||||
dispatchGlobalsArrayAllocation,
|
||||
tile * dispatchGlobalsStride,
|
||||
dispatchGlobalsAllocation,
|
||||
0,
|
||||
&dispatchGlobals,
|
||||
sizeof(RTDispatchGlobals));
|
||||
|
||||
dispatchGlobalsInfo->rtStacks.push_back(rtStackAllocation);
|
||||
dispatchGlobalsInfo->rtDispatchGlobals.push_back(dispatchGlobalsAllocation);
|
||||
gpuAddressVector.push_back(dispatchGlobalsAllocation->getGpuAddress());
|
||||
}
|
||||
|
||||
if (allocFailed) {
|
||||
for (auto allocation : dispatchGlobalsInfo->rtStacks) {
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
||||
size_t arrayAllocSize = sizeof(uint64_t) * deviceCount;
|
||||
|
||||
if (!allocFailed) {
|
||||
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, arrayAllocSize,
|
||||
AllocationType::BUFFER, true, getDeviceBitfield());
|
||||
arrayAllocProps.flags.resource48Bit = true;
|
||||
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
||||
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
}
|
||||
|
||||
if (dispatchGlobalsArrayAllocation == nullptr) {
|
||||
for (auto allocation : dispatchGlobalsInfo->rtDispatchGlobals) {
|
||||
getMemoryManager()->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
getMemoryManager()->freeGraphicsMemory(dispatchGlobalsArrayAllocation);
|
||||
delete dispatchGlobalsInfo;
|
||||
return;
|
||||
}
|
||||
|
||||
dispatchGlobalsInfo->rtDispatchGlobalsArray = dispatchGlobalsArrayAllocation;
|
||||
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo.release();
|
||||
MemoryTransferHelper::transferMemoryToAllocation(hwInfoConfig.isBlitCopyRequiredForLocalMemory(this->getHardwareInfo(), *dispatchGlobalsArrayAllocation),
|
||||
*this,
|
||||
dispatchGlobalsArrayAllocation,
|
||||
0,
|
||||
gpuAddressVector.data(),
|
||||
arrayAllocSize);
|
||||
|
||||
dispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = dispatchGlobalsArrayAllocation;
|
||||
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -41,8 +41,10 @@ struct EngineGroupT {
|
||||
using EngineGroupsT = std::vector<EngineGroupT>;
|
||||
|
||||
struct RTDispatchGlobalsInfo {
|
||||
GraphicsAllocation *rtDispatchGlobalsArray = nullptr;
|
||||
std::vector<GraphicsAllocation *> rtStacks; // per tile
|
||||
RTDispatchGlobalsInfo(GraphicsAllocation *rtDispatchGlobalsArrayAllocation)
|
||||
: rtDispatchGlobalsArrayAllocation(rtDispatchGlobalsArrayAllocation){};
|
||||
std::vector<GraphicsAllocation *> rtDispatchGlobals; // per tile
|
||||
GraphicsAllocation *rtDispatchGlobalsArrayAllocation; // above array as visible from device
|
||||
};
|
||||
|
||||
class Device : public ReferenceTrackedObject<Device> {
|
||||
|
||||
@@ -24,12 +24,10 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
|
||||
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * KB;
|
||||
static constexpr uint32_t maxBvhLevels = 8;
|
||||
|
||||
static size_t getDispatchGlobalSize() {
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
return static_cast<size_t>(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device) / tiles) + extraBytesGlobal);
|
||||
static size_t getDispatchGlobalSize(const Device &device, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize) +
|
||||
getStackSizePerRay(maxBvhLevel, extraBytesLocal) * getNumRtStacks(device) +
|
||||
extraBytesGlobal);
|
||||
}
|
||||
|
||||
static size_t getTotalMemoryBackedFifoSize(const Device &device) {
|
||||
|
||||
@@ -168,12 +168,12 @@ class MockDevice : public RootDevice {
|
||||
for (unsigned int i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
|
||||
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
|
||||
if (rtDispatchGlobalsForceAllocation == true && rtDispatchGlobalsInfo != nullptr) {
|
||||
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) {
|
||||
delete rtDispatchGlobalsInfo->rtStacks[j];
|
||||
rtDispatchGlobalsInfo->rtStacks[j] = nullptr;
|
||||
for (unsigned int j = 0; j < rtDispatchGlobalsInfo->rtDispatchGlobals.size(); j++) {
|
||||
delete rtDispatchGlobalsInfo->rtDispatchGlobals[j];
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobals[j] = nullptr;
|
||||
}
|
||||
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArray;
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr;
|
||||
delete rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation;
|
||||
rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr;
|
||||
delete rtDispatchGlobalsInfos[i];
|
||||
rtDispatchGlobalsInfos[i] = nullptr;
|
||||
}
|
||||
|
||||
@@ -98,26 +98,6 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenRTDispatchGlobalsIsA
|
||||
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledAndRTStackAllocationFailsRTDispatchGlobalsIsNotAllocated, IsPVC) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
pDevice->deviceBitfield = 3;
|
||||
|
||||
pDevice->subdevices.push_back(new SubDevice(pDevice->executionEnvironment, 0, *pDevice));
|
||||
pDevice->subdevices.push_back(new SubDevice(pDevice->executionEnvironment, 1, *pDevice));
|
||||
|
||||
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
|
||||
otherMemoryManager = std::make_unique<NEO::MockMemoryManagerWithCapacity>(*pDevice->executionEnvironment);
|
||||
static_cast<NEO::MockMemoryManagerWithCapacity &>(*otherMemoryManager).capacity = 25000000;
|
||||
pDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
|
||||
pDevice->initializeRayTracing(5);
|
||||
EXPECT_EQ(nullptr, pDevice->getRTDispatchGlobals(3));
|
||||
|
||||
pDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, givenDispatchGlobalsAllocationFailsThenRTDispatchGlobalsInfoIsNull) {
|
||||
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
|
||||
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *pDevice->getExecutionEnvironment());
|
||||
@@ -644,4 +624,4 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenNonDebuggableOsContextWhenDeviceC
|
||||
auto device = deviceFactory.rootDevices[0];
|
||||
auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver;
|
||||
EXPECT_EQ(0u, csr->peekLatestSentTaskCount());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user