Revert "Initialize kernel private surface when kernel is created"

This reverts commit be2a87fe98.

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga 2021-01-09 16:29:11 -08:00 committed by Compute-Runtime-Automation
parent 26b036ab97
commit 08655a315c
6 changed files with 43 additions and 159 deletions

View File

@ -48,6 +48,9 @@ struct KernelImmutableData {
uint32_t getIsaSize() const;
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
uint64_t getPrivateMemorySize() const;
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); }
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
@ -64,6 +67,7 @@ struct KernelImmutableData {
Device *device = nullptr;
NEO::KernelDescriptor *kernelDescriptor = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
uint32_t crossThreadDataSize = 0;
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;

View File

@ -72,6 +72,10 @@ KernelImmutableData::~KernelImmutableData() {
isaGraphicsAllocation.release();
}
crossThreadDataTemplate.reset();
if (nullptr != privateMemoryGraphicsAllocation) {
this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation);
privateMemoryGraphicsAllocation.release();
}
surfaceStateHeapTemplate.reset();
dynamicStateHeapTemplate.reset();
}
@ -160,6 +164,21 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
}
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
auto &kernelAttributes = kernelDescriptor->kernelAttributes;
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch);
UNRECOVERABLE_IF(privateSurfaceSize == 0);
this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}));
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
*privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get());
}
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
@ -188,13 +207,17 @@ uint32_t KernelImmutableData::getIsaSize() const {
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
}
uint64_t KernelImmutableData::getPrivateMemorySize() const {
uint64_t size = 0;
if (privateMemoryGraphicsAllocation != nullptr) {
size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize();
}
return size;
}
KernelImp::KernelImp(Module *module) : module(module) {}
KernelImp::~KernelImp() {
if (nullptr != privateMemoryGraphicsAllocation) {
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation);
}
if (perThreadDataForWholeThreadGroup != nullptr) {
alignedFree(perThreadDataForWholeThreadGroup);
}
@ -651,27 +674,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
}
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
auto neoDevice = module->getDevice()->getNEODevice();
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize,
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
UNRECOVERABLE_IF(privateSurfaceSize == 0);
this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()});
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
}
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
UNRECOVERABLE_IF(reqdSize[1] == 0);

View File

@ -124,17 +124,11 @@ struct KernelImp : Kernel {
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() {
return privateMemoryGraphicsAllocation;
}
protected:
KernelImp() = default;
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr;
void createPrintfBuffer();
void setDebugSurface();
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
@ -153,7 +147,7 @@ struct KernelImp : Kernel {
uint32_t numThreadsPerThreadGroup = 1u;
uint32_t threadExecutionMask = 0u;
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
std::unique_ptr<uint8_t[]> crossThreadData = 0;
uint32_t crossThreadDataSize = 0;
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;

View File

@ -12,105 +12,12 @@
#include "shared/test/unit_test/helpers/test_files.h"
#include "level_zero/core/source/module/module.h"
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 {
namespace ult {
struct ModuleImmutableDataFixture : public DeviceFixture {
struct MockImmutableData : KernelImmutableData {
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
mockKernelDescriptor = new NEO::KernelDescriptor;
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
kernelDescriptor = mockKernelDescriptor;
return;
}
~MockImmutableData() override {
delete mockKernelDescriptor;
}
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
};
struct MockModule : public L0::ModuleImp {
MockModule(L0::Device *device,
L0::ModuleBuildLog *moduleBuildLog,
L0::ModuleType type,
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
}
~MockModule() {
delete mockKernelImmData;
}
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
return mockKernelImmData;
}
MockImmutableData *mockKernelImmData = nullptr;
};
class MockKernel : public WhiteBox<L0::KernelImp> {
public:
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
~MockKernel() override {
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
};
void SetUp() override {
DeviceFixture::SetUp();
}
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
std::string testFile;
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
size_t size = 0;
auto src = loadDataFromFile(
testFile.c_str(),
size);
ASSERT_NE(0u, size);
ASSERT_NE(nullptr, src);
ze_module_desc_t moduleDesc = {};
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
moduleDesc.inputSize = size;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
perHwThreadPrivateMemorySize);
}
void createKernel(MockKernel *kernel) {
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
kernel->initialize(&desc);
}
void TearDown() override {
DeviceFixture::TearDown();
}
const std::string binaryFilename = "test_kernel";
const std::string kernelName = "test";
const uint32_t numKernelArguments = 6;
std::unique_ptr<MockModule> module;
};
struct ModuleFixture : public DeviceFixture {
void SetUp() override {
DeviceFixture::SetUp();

View File

@ -27,6 +27,7 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
using ::L0::KernelImmutableData::isaGraphicsAllocation;
using ::L0::KernelImmutableData::kernelDescriptor;
using ::L0::KernelImmutableData::KernelImmutableData;
using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation;
using ::L0::KernelImmutableData::residencyContainer;
WhiteBox() : ::L0::KernelImmutableData() {}

View File

@ -254,43 +254,19 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
delete[] kernelNameRetrieved;
}
class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test {
public:
void SetUp() override {
ModuleImmutableDataFixture::SetUp();
}
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
uint32_t computeUnitsUsedForSratch = 0x300;
void TearDown() override {
ModuleImmutableDataFixture::TearDown();
}
};
KernelInfo kernelInfo;
auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
kernelAttributes.simdSize = 8;
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
KernelImmutableData kernelImmutableData(device);
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
createKernel(kernel.get());
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
}
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
createKernel(kernel.get());
EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
}
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {