Add RTDispatchGlobals allocation for ray tracing

If a kernel has ray tracing calls, we allocate and initialize
per-device RTDispatchGlobals if needed, and hand off pointer to
the same into a running kernel via an implicit parameter.

Related-To: NEO-5384
Signed-off-by: Jim Snow <jim.m.snow@intel.com>
This commit is contained in:
Jim Snow
2021-07-28 04:31:52 +00:00
committed by Compute-Runtime-Automation
parent 7d5924cd98
commit 2dfb7df63b
17 changed files with 213 additions and 18 deletions

View File

@@ -13,6 +13,7 @@
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/local_work_size.h"
#include "shared/source/helpers/per_thread_data.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h"
@@ -902,8 +903,18 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
if (this->usesRayTracing()) {
neoDevice->initializeRayTracing();
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobals == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
this->residencyContainer.push_back(rtDispatchGlobals);
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
static_cast<uintptr_t>(rtDispatchGlobals->getGpuAddressToPatch()));
}
return ZE_RESULT_SUCCESS;

View File

@@ -7,6 +7,7 @@
#include "shared/source/device_binary_format/patchtokens_decoder.h"
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/program/kernel_info_from_patchtokens.h"
@@ -573,8 +574,19 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
immDataVector->push_back(std::move(mockKernelImmutableData));
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
neoDevice->setRTDispatchGlobalsForceAllocation();
auto result = kernel->initialize(&kernelDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_NE(nullptr, rtDispatchGlobals);
size_t residencySize = kernel->getResidencyContainer().size();
EXPECT_NE(0u, residencySize);
EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals);
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
@@ -611,6 +623,42 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
EXPECT_EQ(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
KernelDescriptorRTCallsTrue mockDescriptor = {};
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
}
NEO::MemoryManager *currMemoryManager = new NEO::FailMemoryManager(0, *neoDevice->executionEnvironment);
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
std::make_unique<MockImmutableData>(32u);
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "rt_test";
auto immDataVector =
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
immDataVector->push_back(std::move(mockKernelImmutableData));
neoDevice->injectMemoryManager(currMemoryManager);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
}
using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) {

View File

@@ -808,11 +808,3 @@ TEST(ClDeviceHelperTest, givenZeroNumberOfTilesWhenPrepareDeviceEnvironmentsCoun
uint32_t devicesCount = HwHelper::getSubDevicesCount(&hwInfo);
EXPECT_EQ(devicesCount, 1u);
}
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrMemoryBackedBufferIsCreated) {
EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer());
pDevice->initializeRayTracing();
EXPECT_NE(nullptr, pDevice->getRTMemoryBackedBuffer());
pDevice->initializeRayTracing();
EXPECT_NE(nullptr, pDevice->getRTMemoryBackedBuffer());
}

View File

@@ -15,6 +15,7 @@ set(NEO_CORE_DEVICE
${CMAKE_CURRENT_SOURCE_DIR}/sub_device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sub_device.h
${CMAKE_CURRENT_SOURCE_DIR}/device_get_device_name.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/device_extended_setup.cpp
)
set_property(GLOBAL PROPERTY NEO_CORE_DEVICE ${NEO_CORE_DEVICE})

View File

@@ -42,6 +42,10 @@ Device::~Device() {
}
}
finalizeRayTracing();
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
rtMemoryBackedBuffer = nullptr;
DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get());
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
rtMemoryBackedBuffer = nullptr;
@@ -587,11 +591,49 @@ EngineControl *Device::getInternalCopyEngine() {
return nullptr;
}
void Device::initializeRayTracing() {
GraphicsAllocation *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
if (rtDispatchGlobals.size() == 0) {
return nullptr;
}
size_t last = rtDispatchGlobals.size() - 1;
if (maxBvhLevels > last) {
return nullptr;
}
for (size_t i = last; i >= maxBvhLevels; i--) {
if (rtDispatchGlobals[i] != nullptr) {
return rtDispatchGlobals[i];
}
if (i == 0) {
break;
}
}
allocateRTDispatchGlobals(maxBvhLevels);
return rtDispatchGlobals[maxBvhLevels];
}
void Device::initializeRayTracing(uint32_t maxBvhLevels) {
if (rtMemoryBackedBuffer == nullptr) {
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, GraphicsAllocation::AllocationType::BUFFER, getDeviceBitfield()});
}
while (rtDispatchGlobals.size() <= maxBvhLevels) {
rtDispatchGlobals.push_back(nullptr);
}
}
void Device::finalizeRayTracing() {
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
rtMemoryBackedBuffer = nullptr;
for (size_t i = 0; i < rtDispatchGlobals.size(); i++) {
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobals[i]);
rtDispatchGlobals[i] = nullptr;
}
}
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };

View File

@@ -123,7 +123,9 @@ class Device : public ReferenceTrackedObject<Device> {
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
std::unique_ptr<SyncBufferHandler> syncBufferHandler;
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
void initializeRayTracing();
GraphicsAllocation *getRTDispatchGlobals(uint32_t maxBvhLevels);
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; }
void initializeRayTracing(uint32_t maxBvhLevels);
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
const std::vector<SubDevice *> getSubDevices() const { return subdevices; }
@@ -159,6 +161,8 @@ class Device : public ReferenceTrackedObject<Device> {
virtual bool genericSubDevicesAllowed();
bool engineInstancedSubDevicesAllowed();
void setAsEngineInstanced();
MOCKABLE_VIRTUAL void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
void finalizeRayTracing();
DeviceInfo deviceInfo = {};
@@ -185,7 +189,7 @@ class Device : public ReferenceTrackedObject<Device> {
uintptr_t specializedDevice = reinterpret_cast<uintptr_t>(nullptr);
GraphicsAllocation *rtMemoryBackedBuffer = nullptr;
GraphicsAllocation *rtDispatchGlobals = nullptr;
std::vector<GraphicsAllocation *> rtDispatchGlobals;
};
inline EngineControl &Device::getDefaultEngine() {

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device/device.h"
namespace NEO {
void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
}
} // namespace NEO

View File

@@ -132,6 +132,7 @@ struct KernelFromPatchtokens {
const SPatchAllocateStatelessEventPoolSurface *allocateStatelessEventPoolSurface = nullptr;
const SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateStatelessDefaultDeviceQueueSurface = nullptr;
const SPatchAllocateSyncBuffer *allocateSyncBuffer = nullptr;
const void *allocateRTGlobalBuffer = nullptr;
const SPatchItemHeader *inlineVmeSamplerInfo = nullptr;
const SPatchGtpinFreeGRFInfo *gtpinFreeGrfInfo = nullptr;
const SPatchStateSIP *stateSip = nullptr;

View File

@@ -20,6 +20,7 @@ class RayTracingHelper : public NonCopyableOrMovableClass {
static constexpr uint32_t hitInfoSize = 64;
static constexpr uint32_t bvhStackSize = 96;
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * KB;
static constexpr uint32_t maxBvhLevels = 8;
static size_t getDispatchGlobalSize(const Device &device, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
return static_cast<size_t>(alignUp(getRtGlobalsSize(), MemoryConstants::cacheLineSize) +

View File

@@ -22,6 +22,7 @@ set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}read_extended_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extended.cpp
)
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})

View File

@@ -249,6 +249,7 @@ struct KernelDescriptor {
ArgDescPointer deviceSideEnqueueDefaultQueueSurfaceAddress;
ArgDescPointer systemThreadSurfaceAddress;
ArgDescPointer syncBufferAddress;
ArgDescPointer rtDispatchGlobals;
CrossThreadDataOffset privateMemorySize = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset maxWorkGroupSize = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset simdSize = undefined<CrossThreadDataOffset>;

View File

@@ -482,7 +482,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface);
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface);
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSyncBuffer);
populateKernelDescriptorRtDispatchGlobals(dst, src);
dst.payloadMappings.explicitArgs.resize(src.tokens.kernelArgs.size());
dst.explicitArgsExtendedMetadata.resize(src.tokens.kernelArgs.size());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,6 +16,7 @@ namespace PatchTokenBinary {
struct KernelFromPatchtokens;
}
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
} // namespace NEO

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
namespace NEO {
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src) {}
} // namespace NEO

View File

@@ -12,6 +12,7 @@
#include "shared/source/device/sub_device.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h"
namespace NEO {
@@ -158,10 +159,23 @@ class MockDevice : public RootDevice {
return isDebuggerActiveReturn;
}
void allocateRTDispatchGlobals(uint32_t maxBvhLevels) override {
if (rtDispatchGlobalsForceAllocation == true) {
rtDispatchGlobals[maxBvhLevels] = new MockGraphicsAllocation();
} else {
Device::allocateRTDispatchGlobals(maxBvhLevels);
}
}
void setRTDispatchGlobalsForceAllocation() {
rtDispatchGlobalsForceAllocation = true;
}
static decltype(&createCommandStream) createCommandStreamReceiverFunc;
bool isDebuggerActiveParentCall = true;
bool isDebuggerActiveReturn = false;
bool rtDispatchGlobalsForceAllocation = false;
};
template <>

View File

@@ -5,6 +5,8 @@
*
*/
#include "shared/source/device/device.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h"
@@ -14,7 +16,7 @@
using namespace NEO;
TEST(DeviceTest, whenBlitterOperationsSupportIsDisabledThenNoInternalCopyEngineIsReturned) {
TEST(DeviceBlitterTest, whenBlitterOperationsSupportIsDisabledThenNoInternalCopyEngineIsReturned) {
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
defaultHwInfo->capabilityTable.blitterOperationsSupported = false;
@@ -22,7 +24,7 @@ TEST(DeviceTest, whenBlitterOperationsSupportIsDisabledThenNoInternalCopyEngineI
EXPECT_EQ(nullptr, factory.rootDevices[0]->getInternalCopyEngine());
}
TEST(DeviceTest, givenBlitterOperationsDisabledWhenCreatingBlitterEngineThenAbort) {
TEST(DeviceBlitterTest, givenBlitterOperationsDisabledWhenCreatingBlitterEngineThenAbort) {
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
defaultHwInfo->capabilityTable.blitterOperationsSupported = false;
@@ -32,3 +34,48 @@ TEST(DeviceTest, givenBlitterOperationsDisabledWhenCreatingBlitterEngineThenAbor
EXPECT_THROW(factory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_BCS, EngineUsage::Internal}), std::runtime_error);
EXPECT_THROW(factory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_BCS, EngineUsage::LowPriority}), std::runtime_error);
}
using DeviceTest = Test<DeviceFixture>;
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) {
EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer());
EXPECT_EQ(false, pDevice->rayTracingIsInitialized());
pDevice->initializeRayTracing(0);
EXPECT_NE(nullptr, pDevice->getRTMemoryBackedBuffer());
EXPECT_EQ(true, pDevice->rayTracingIsInitialized());
pDevice->initializeRayTracing(0);
EXPECT_NE(nullptr, pDevice->getRTMemoryBackedBuffer());
EXPECT_EQ(true, pDevice->rayTracingIsInitialized());
}
TEST_F(DeviceTest, whenGetRTDispatchGlobalsIsCalledWithUnsupportedBVHLevelsThenNullptrIsReturned) {
pDevice->initializeRayTracing(5);
EXPECT_EQ(nullptr, pDevice->getRTDispatchGlobals(100));
}
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledWithMockAllocatorThenRTDispatchGlobalsIsAllocated) {
pDevice->setRTDispatchGlobalsForceAllocation();
pDevice->initializeRayTracing(5);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(5));
}
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledMultipleTimesWithMockAllocatorThenInitializeRayTracingIsIdempotent) {
pDevice->setRTDispatchGlobalsForceAllocation();
pDevice->initializeRayTracing(5);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(5));
pDevice->initializeRayTracing(5);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(5));
}
TEST_F(DeviceTest, whenGetRTDispatchGlobalsIsCalledBeforeInitializationThenNullPtrIsReturned) {
EXPECT_EQ(nullptr, pDevice->getRTDispatchGlobals(1));
}
TEST_F(DeviceTest, whenGetRTDispatchGlobalsIsCalledWithZeroSizeAndMockAllocatorThenDispatchGlobalsIsReturned) {
pDevice->setRTDispatchGlobalsForceAllocation();
EXPECT_EQ(nullptr, pDevice->getRTDispatchGlobals(0));
pDevice->initializeRayTracing(5);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(0));
}

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020 Intel Corporation
# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -11,3 +11,5 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_tests.cpp
)
add_subdirectories()