From 08655a315c3d19c8e2fe8c0c5f3f70a79d2ff8e9 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Sat, 9 Jan 2021 16:29:11 -0800 Subject: [PATCH] Revert "Initialize kernel private surface when kernel is created" This reverts commit be2a87fe98a1b1315e1e2f2317c9ca06ef251241. Signed-off-by: Jaime Arteaga --- level_zero/core/source/kernel/kernel.h | 4 + level_zero/core/source/kernel/kernel_imp.cpp | 52 ++++++----- level_zero/core/source/kernel/kernel_imp.h | 8 +- .../test/unit_tests/fixtures/module_fixture.h | 93 ------------------- .../core/test/unit_tests/mocks/mock_kernel.h | 1 + .../unit_tests/sources/kernel/test_kernel.cpp | 44 ++------- 6 files changed, 43 insertions(+), 159 deletions(-) diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index 5a2946d715..0c9b71fa9f 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -48,6 +48,9 @@ struct KernelImmutableData { uint32_t getIsaSize() const; NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); } + uint64_t getPrivateMemorySize() const; + NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); } + const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); } uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; } @@ -64,6 +67,7 @@ struct KernelImmutableData { Device *device = nullptr; NEO::KernelDescriptor *kernelDescriptor = nullptr; std::unique_ptr isaGraphicsAllocation = nullptr; + std::unique_ptr privateMemoryGraphicsAllocation = nullptr; uint32_t crossThreadDataSize = 0; std::unique_ptr crossThreadDataTemplate = nullptr; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index f57a0bafe8..b05b308c7a 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -72,6 +72,10 @@ KernelImmutableData::~KernelImmutableData() { isaGraphicsAllocation.release(); } crossThreadDataTemplate.reset(); + if (nullptr != privateMemoryGraphicsAllocation) { + this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation); + privateMemoryGraphicsAllocation.release(); + } surfaceStateHeapTemplate.reset(); dynamicStateHeapTemplate.reset(); } @@ -160,6 +164,21 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device } ArrayRef surfaceStateHeapArrayRef = ArrayRef(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize()); + auto &kernelAttributes = kernelDescriptor->kernelAttributes; + + if (kernelAttributes.perHwThreadPrivateMemorySize != 0) { + auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch); + + UNRECOVERABLE_IF(privateSurfaceSize == 0); + this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties( + {neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()})); + + UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr); + patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, + static_cast(privateMemoryGraphicsAllocation->getGpuAddressToPatch()), + *privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice); + this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get()); + } if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { UNRECOVERABLE_IF(nullptr == globalConstBuffer); @@ -188,13 +207,17 @@ uint32_t KernelImmutableData::getIsaSize() const { return static_cast(isaGraphicsAllocation->getUnderlyingBufferSize()); } +uint64_t KernelImmutableData::getPrivateMemorySize() const { + uint64_t size = 0; + if (privateMemoryGraphicsAllocation != nullptr) { + size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize(); + } + return size; +} + KernelImp::KernelImp(Module *module) : module(module) {} KernelImp::~KernelImp() { - if (nullptr != privateMemoryGraphicsAllocation) { - module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation); - } - if (perThreadDataForWholeThreadGroup != nullptr) { alignedFree(perThreadDataForWholeThreadGroup); } @@ -651,27 +674,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize(); } - auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; - auto neoDevice = module->getDevice()->getNEODevice(); - if (kernelAttributes.perHwThreadPrivateMemorySize != 0) { - auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, - neoDevice->getDeviceInfo().computeUnitsUsedForScratch); - - UNRECOVERABLE_IF(privateSurfaceSize == 0); - this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( - {neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}); - - UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr); - - ArrayRef crossThredDataArrayRef = ArrayRef(this->crossThreadData.get(), this->crossThreadDataSize); - ArrayRef surfaceStateHeapArrayRef = ArrayRef(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize); - - patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, - static_cast(privateMemoryGraphicsAllocation->getGpuAddressToPatch()), - *privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice); - this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation); - } - if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) { auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize; UNRECOVERABLE_IF(reqdSize[1] == 0); diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 463cfdc02a..370307fd61 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -124,17 +124,11 @@ struct KernelImp : Kernel { ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override; - NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() { - return privateMemoryGraphicsAllocation; - } - protected: KernelImp() = default; void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z); - NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr; - void createPrintfBuffer(); void setDebugSurface(); virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0; @@ -153,7 +147,7 @@ struct KernelImp : Kernel { uint32_t numThreadsPerThreadGroup = 1u; uint32_t threadExecutionMask = 0u; - std::unique_ptr crossThreadData = nullptr; + std::unique_ptr crossThreadData = 0; uint32_t crossThreadDataSize = 0; std::unique_ptr surfaceStateHeapData = nullptr; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index e7c5369010..f8536d7c5b 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -12,105 +12,12 @@ #include "shared/test/unit_test/helpers/test_files.h" #include "level_zero/core/source/module/module.h" -#include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { -struct ModuleImmutableDataFixture : public DeviceFixture { - struct MockImmutableData : KernelImmutableData { - MockImmutableData(uint32_t perHwThreadPrivateMemorySize) { - mockKernelDescriptor = new NEO::KernelDescriptor; - mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize; - kernelDescriptor = mockKernelDescriptor; - return; - } - ~MockImmutableData() override { - delete mockKernelDescriptor; - } - NEO::KernelDescriptor *mockKernelDescriptor = nullptr; - }; - - struct MockModule : public L0::ModuleImp { - MockModule(L0::Device *device, - L0::ModuleBuildLog *moduleBuildLog, - L0::ModuleType type, - uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) { - mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize); - } - - ~MockModule() { - delete mockKernelImmData; - } - - const KernelImmutableData *getKernelImmutableData(const char *functionName) const override { - return mockKernelImmData; - } - MockImmutableData *mockKernelImmData = nullptr; - }; - - class MockKernel : public WhiteBox { - public: - MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { - } - void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { - return; - } - void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { - return; - } - ~MockKernel() override { - } - std::unique_ptr clone() const override { return nullptr; } - }; - - void SetUp() override { - DeviceFixture::SetUp(); - } - - void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) { - std::string testFile; - retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin"); - - size_t size = 0; - auto src = loadDataFromFile( - testFile.c_str(), - size); - - ASSERT_NE(0u, size); - ASSERT_NE(nullptr, src); - - ze_module_desc_t moduleDesc = {}; - moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; - moduleDesc.pInputModule = reinterpret_cast(src.get()); - moduleDesc.inputSize = size; - - ModuleBuildLog *moduleBuildLog = nullptr; - - module = std::make_unique(device, - moduleBuildLog, - ModuleType::User, - perHwThreadPrivateMemorySize); - } - - void createKernel(MockKernel *kernel) { - ze_kernel_desc_t desc = {}; - desc.pKernelName = kernelName.c_str(); - kernel->initialize(&desc); - } - - void TearDown() override { - DeviceFixture::TearDown(); - } - - const std::string binaryFilename = "test_kernel"; - const std::string kernelName = "test"; - const uint32_t numKernelArguments = 6; - std::unique_ptr module; -}; - struct ModuleFixture : public DeviceFixture { void SetUp() override { DeviceFixture::SetUp(); diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index 9293067775..525cad4a59 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -27,6 +27,7 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData { using ::L0::KernelImmutableData::isaGraphicsAllocation; using ::L0::KernelImmutableData::kernelDescriptor; using ::L0::KernelImmutableData::KernelImmutableData; + using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation; using ::L0::KernelImmutableData::residencyContainer; WhiteBox() : ::L0::KernelImmutableData() {} diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index c97c099f4b..78314c0425 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -254,43 +254,19 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) { delete[] kernelNameRetrieved; } -class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test { - public: - void SetUp() override { - ModuleImmutableDataFixture::SetUp(); - } +HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) { + uint32_t computeUnitsUsedForSratch = 0x300; - void TearDown() override { - ModuleImmutableDataFixture::TearDown(); - } -}; + KernelInfo kernelInfo; + auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes; + kernelAttributes.perHwThreadPrivateMemorySize = 0x100; + kernelAttributes.simdSize = 8; -HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) { - uint32_t perHwThreadPrivateMemorySizeRequested = 0u; - createModuleFromBinary(perHwThreadPrivateMemorySizeRequested); + KernelImmutableData kernelImmutableData(device); + kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false); - std::unique_ptr kernel; - kernel = std::make_unique(module.get()); - - createKernel(kernel.get()); - - EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation()); -} - -HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) { - uint32_t perHwThreadPrivateMemorySizeRequested = 32u; - createModuleFromBinary(perHwThreadPrivateMemorySizeRequested); - - std::unique_ptr kernel; - kernel = std::make_unique(module.get()); - - createKernel(kernel.get()); - - EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation()); - - size_t expectedSize = perHwThreadPrivateMemorySizeRequested * - device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch; - EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize()); + size_t expectedSize = static_cast(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch; + EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize()); } HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {