Move ISA at kernel creation time
Instead of moving the ISAs for all kernel in a module when the module is created, move the ISA when the kernel is created, to avoid unnecessary memory transfers. Related-To: LOCI-2009 Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
parent
2114a85ec7
commit
afffedebb2
|
@ -60,8 +60,11 @@ struct KernelImmutableData {
|
|||
|
||||
Device *getDevice() { return this->device; }
|
||||
|
||||
const NEO::KernelInfo *getKernelInfo() const { return kernelInfo; }
|
||||
|
||||
protected:
|
||||
Device *device = nullptr;
|
||||
NEO::KernelInfo *kernelInfo = nullptr;
|
||||
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
||||
|
||||
|
|
|
@ -98,6 +98,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
|||
NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel) {
|
||||
|
||||
UNRECOVERABLE_IF(kernelInfo == nullptr);
|
||||
this->kernelInfo = kernelInfo;
|
||||
this->kernelDescriptor = &kernelInfo->kernelDescriptor;
|
||||
|
||||
auto neoDevice = device->getNEODevice();
|
||||
|
@ -110,15 +111,6 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
|||
{neoDevice->getRootDeviceIndex(), kernelIsaSize, allocType, neoDevice->getDeviceBitfield()});
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (kernelInfo->heapInfo.pKernelHeap != nullptr) {
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *allocation),
|
||||
*neoDevice, allocation, 0, kernelInfo->heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(kernelIsaSize));
|
||||
}
|
||||
|
||||
isaGraphicsAllocation.reset(allocation);
|
||||
if (device->getL0Debugger() && kernelInfo->kernelDescriptor.external.debugData.get()) {
|
||||
device->getL0Debugger()->registerElf(kernelInfo->kernelDescriptor.external.debugData.get(), allocation);
|
||||
|
@ -604,6 +596,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
|||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap != nullptr) {
|
||||
auto neoDevice = module->getDevice()->getNEODevice();
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto isaAllocation = this->kernelImmData->getIsaGraphicsAllocation();
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *isaAllocation),
|
||||
*neoDevice,
|
||||
isaAllocation,
|
||||
0,
|
||||
this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(this->kernelImmData->getKernelInfo()->heapInfo.KernelHeapSize));
|
||||
}
|
||||
|
||||
for (const auto &argT : kernelImmData->getDescriptor().payloadMappings.explicitArgs) {
|
||||
switch (argT.type) {
|
||||
default:
|
||||
|
|
|
@ -8,29 +8,76 @@
|
|||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/helpers/test_files.h"
|
||||
#include "shared/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
|
||||
|
||||
#include "level_zero/core/source/module/module.h"
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_context.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
struct MockImmutableMemoryManager : public NEO::MockMemoryManager {
|
||||
MockImmutableMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
|
||||
bool copyMemoryToAllocation(NEO::GraphicsAllocation *graphicsAllocation,
|
||||
size_t destinationOffset,
|
||||
const void *memoryToCopy,
|
||||
size_t sizeToCopy) override {
|
||||
|
||||
copyMemoryToAllocationCalledTimes++;
|
||||
return true;
|
||||
}
|
||||
uint32_t copyMemoryToAllocationCalledTimes = 0;
|
||||
};
|
||||
|
||||
struct MockImmutableData : KernelImmutableData {
|
||||
using KernelImmutableData::kernelDescriptor;
|
||||
using KernelImmutableData::kernelInfo;
|
||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
|
||||
mockKernelDescriptor = new NEO::KernelDescriptor;
|
||||
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
|
||||
kernelDescriptor = mockKernelDescriptor;
|
||||
return;
|
||||
|
||||
mockKernelInfo = new NEO::KernelInfo;
|
||||
mockKernelInfo->heapInfo.pKernelHeap = kernelHeap;
|
||||
mockKernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize;
|
||||
kernelInfo = mockKernelInfo;
|
||||
|
||||
if (getIsaGraphicsAllocation() != nullptr) {
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*isaGraphicsAllocation);
|
||||
isaGraphicsAllocation.release();
|
||||
}
|
||||
isaGraphicsAllocation.reset(new NEO::MockGraphicsAllocation(0,
|
||||
NEO::GraphicsAllocation::AllocationType::KERNEL_ISA,
|
||||
reinterpret_cast<void *>(0x1234),
|
||||
0x1000,
|
||||
0,
|
||||
sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages));
|
||||
}
|
||||
|
||||
void setDevice(L0::Device *inDevice) {
|
||||
device = inDevice;
|
||||
}
|
||||
|
||||
~MockImmutableData() override {
|
||||
delete mockKernelInfo;
|
||||
delete mockKernelDescriptor;
|
||||
}
|
||||
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
|
||||
char kernelHeap[MemoryConstants::pageSize] = {};
|
||||
NEO::KernelInfo *mockKernelInfo = nullptr;
|
||||
};
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
|
@ -39,6 +86,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
|||
L0::ModuleType type,
|
||||
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
|
||||
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
|
||||
mockKernelImmData->setDevice(device);
|
||||
}
|
||||
|
||||
~MockModule() {
|
||||
|
@ -69,6 +117,8 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
|||
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
memoryManager = new MockImmutableMemoryManager(*neoDevice->executionEnvironment);
|
||||
neoDevice->executionEnvironment->memoryManager.reset(memoryManager);
|
||||
}
|
||||
|
||||
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
|
||||
|
@ -113,6 +163,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
|||
const std::string kernelName = "test";
|
||||
const uint32_t numKernelArguments = 6;
|
||||
std::unique_ptr<MockModule> module;
|
||||
MockImmutableMemoryManager *memoryManager;
|
||||
};
|
||||
|
||||
struct ModuleFixture : public DeviceFixture {
|
||||
|
|
|
@ -294,6 +294,51 @@ HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPr
|
|||
EXPECT_EQ(expectedSize, kernel->privateMemoryGraphicsAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImmutableDataTests, givenCallToCreateKernelThenIsaIsCopied) {
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
MockImmutableMemoryManager *mockMemoryManager =
|
||||
static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
|
||||
|
||||
uint32_t previouscopyMemoryToAllocationCalledTimes =
|
||||
mockMemoryManager->copyMemoryToAllocationCalledTimes;
|
||||
|
||||
createKernel(kernel.get());
|
||||
|
||||
EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes + 1u,
|
||||
mockMemoryManager->copyMemoryToAllocationCalledTimes);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImmutableDataTests, givenCallToCreateKernelWithNullKernelHeapThenIsaIsNotCopied) {
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
MockModule *mockModule = static_cast<MockModule *>(module.get());
|
||||
MockImmutableData *mockData = static_cast<MockImmutableData *>(mockModule->mockKernelImmData);
|
||||
auto previousKernelHeap = mockData->kernelInfo->heapInfo.pKernelHeap;
|
||||
mockData->kernelInfo->heapInfo.pKernelHeap = nullptr;
|
||||
|
||||
MockImmutableMemoryManager *mockMemoryManager =
|
||||
static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
|
||||
|
||||
uint32_t previouscopyMemoryToAllocationCalledTimes =
|
||||
mockMemoryManager->copyMemoryToAllocationCalledTimes;
|
||||
|
||||
createKernel(kernel.get());
|
||||
|
||||
EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes,
|
||||
mockMemoryManager->copyMemoryToAllocationCalledTimes);
|
||||
|
||||
mockData->kernelInfo->heapInfo.pKernelHeap = previousKernelHeap;
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation) {
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
|
||||
|
|
Loading…
Reference in New Issue