Revert "Initialize kernel private surface when kernel is created"
This reverts commit be2a87fe98
.
Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
parent
26b036ab97
commit
08655a315c
|
@ -48,6 +48,9 @@ struct KernelImmutableData {
|
|||
uint32_t getIsaSize() const;
|
||||
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
|
||||
|
||||
uint64_t getPrivateMemorySize() const;
|
||||
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); }
|
||||
|
||||
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
|
||||
|
||||
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
|
||||
|
@ -64,6 +67,7 @@ struct KernelImmutableData {
|
|||
Device *device = nullptr;
|
||||
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
|
||||
|
||||
uint32_t crossThreadDataSize = 0;
|
||||
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;
|
||||
|
|
|
@ -72,6 +72,10 @@ KernelImmutableData::~KernelImmutableData() {
|
|||
isaGraphicsAllocation.release();
|
||||
}
|
||||
crossThreadDataTemplate.reset();
|
||||
if (nullptr != privateMemoryGraphicsAllocation) {
|
||||
this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation);
|
||||
privateMemoryGraphicsAllocation.release();
|
||||
}
|
||||
surfaceStateHeapTemplate.reset();
|
||||
dynamicStateHeapTemplate.reset();
|
||||
}
|
||||
|
@ -160,6 +164,21 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
|||
}
|
||||
|
||||
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
|
||||
auto &kernelAttributes = kernelDescriptor->kernelAttributes;
|
||||
|
||||
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
|
||||
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch);
|
||||
|
||||
UNRECOVERABLE_IF(privateSurfaceSize == 0);
|
||||
this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}));
|
||||
|
||||
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
|
||||
*privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
|
||||
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get());
|
||||
}
|
||||
|
||||
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
|
||||
|
@ -188,13 +207,17 @@ uint32_t KernelImmutableData::getIsaSize() const {
|
|||
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
uint64_t KernelImmutableData::getPrivateMemorySize() const {
|
||||
uint64_t size = 0;
|
||||
if (privateMemoryGraphicsAllocation != nullptr) {
|
||||
size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
KernelImp::KernelImp(Module *module) : module(module) {}
|
||||
|
||||
KernelImp::~KernelImp() {
|
||||
if (nullptr != privateMemoryGraphicsAllocation) {
|
||||
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
if (perThreadDataForWholeThreadGroup != nullptr) {
|
||||
alignedFree(perThreadDataForWholeThreadGroup);
|
||||
}
|
||||
|
@ -651,27 +674,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
|||
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
|
||||
}
|
||||
|
||||
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
|
||||
auto neoDevice = module->getDevice()->getNEODevice();
|
||||
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
|
||||
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize,
|
||||
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
|
||||
UNRECOVERABLE_IF(privateSurfaceSize == 0);
|
||||
this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
||||
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()});
|
||||
|
||||
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
|
||||
|
||||
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
|
||||
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
|
||||
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
|
||||
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
|
||||
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
|
||||
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
|
||||
UNRECOVERABLE_IF(reqdSize[1] == 0);
|
||||
|
|
|
@ -124,17 +124,11 @@ struct KernelImp : Kernel {
|
|||
|
||||
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
|
||||
|
||||
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() {
|
||||
return privateMemoryGraphicsAllocation;
|
||||
}
|
||||
|
||||
protected:
|
||||
KernelImp() = default;
|
||||
|
||||
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr;
|
||||
|
||||
void createPrintfBuffer();
|
||||
void setDebugSurface();
|
||||
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
||||
|
@ -153,7 +147,7 @@ struct KernelImp : Kernel {
|
|||
uint32_t numThreadsPerThreadGroup = 1u;
|
||||
uint32_t threadExecutionMask = 0u;
|
||||
|
||||
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
|
||||
std::unique_ptr<uint8_t[]> crossThreadData = 0;
|
||||
uint32_t crossThreadDataSize = 0;
|
||||
|
||||
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
|
||||
|
|
|
@ -12,105 +12,12 @@
|
|||
#include "shared/test/unit_test/helpers/test_files.h"
|
||||
|
||||
#include "level_zero/core/source/module/module.h"
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
struct MockImmutableData : KernelImmutableData {
|
||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
|
||||
mockKernelDescriptor = new NEO::KernelDescriptor;
|
||||
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
|
||||
kernelDescriptor = mockKernelDescriptor;
|
||||
return;
|
||||
}
|
||||
~MockImmutableData() override {
|
||||
delete mockKernelDescriptor;
|
||||
}
|
||||
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
|
||||
};
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
MockModule(L0::Device *device,
|
||||
L0::ModuleBuildLog *moduleBuildLog,
|
||||
L0::ModuleType type,
|
||||
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
|
||||
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
|
||||
}
|
||||
|
||||
~MockModule() {
|
||||
delete mockKernelImmData;
|
||||
}
|
||||
|
||||
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
|
||||
return mockKernelImmData;
|
||||
}
|
||||
MockImmutableData *mockKernelImmData = nullptr;
|
||||
};
|
||||
|
||||
class MockKernel : public WhiteBox<L0::KernelImp> {
|
||||
public:
|
||||
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
|
||||
}
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
||||
return;
|
||||
}
|
||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
|
||||
return;
|
||||
}
|
||||
~MockKernel() override {
|
||||
}
|
||||
std::unique_ptr<Kernel> clone() const override { return nullptr; }
|
||||
};
|
||||
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
}
|
||||
|
||||
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
|
||||
|
||||
size_t size = 0;
|
||||
auto src = loadDataFromFile(
|
||||
testFile.c_str(),
|
||||
size);
|
||||
|
||||
ASSERT_NE(0u, size);
|
||||
ASSERT_NE(nullptr, src);
|
||||
|
||||
ze_module_desc_t moduleDesc = {};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
|
||||
moduleDesc.inputSize = size;
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
|
||||
module = std::make_unique<MockModule>(device,
|
||||
moduleBuildLog,
|
||||
ModuleType::User,
|
||||
perHwThreadPrivateMemorySize);
|
||||
}
|
||||
|
||||
void createKernel(MockKernel *kernel) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
kernel->initialize(&desc);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
DeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
const std::string binaryFilename = "test_kernel";
|
||||
const std::string kernelName = "test";
|
||||
const uint32_t numKernelArguments = 6;
|
||||
std::unique_ptr<MockModule> module;
|
||||
};
|
||||
|
||||
struct ModuleFixture : public DeviceFixture {
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
|
|
|
@ -27,6 +27,7 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
|
|||
using ::L0::KernelImmutableData::isaGraphicsAllocation;
|
||||
using ::L0::KernelImmutableData::kernelDescriptor;
|
||||
using ::L0::KernelImmutableData::KernelImmutableData;
|
||||
using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation;
|
||||
using ::L0::KernelImmutableData::residencyContainer;
|
||||
|
||||
WhiteBox() : ::L0::KernelImmutableData() {}
|
||||
|
|
|
@ -254,43 +254,19 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
|
|||
delete[] kernelNameRetrieved;
|
||||
}
|
||||
|
||||
class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ModuleImmutableDataFixture::SetUp();
|
||||
}
|
||||
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
|
||||
uint32_t computeUnitsUsedForSratch = 0x300;
|
||||
|
||||
void TearDown() override {
|
||||
ModuleImmutableDataFixture::TearDown();
|
||||
}
|
||||
};
|
||||
KernelInfo kernelInfo;
|
||||
auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
|
||||
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
|
||||
kernelAttributes.simdSize = 8;
|
||||
|
||||
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
|
||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
createKernel(kernel.get());
|
||||
|
||||
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
createKernel(kernel.get());
|
||||
|
||||
EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
|
||||
|
||||
size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
|
||||
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
|
||||
EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
|
||||
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
|
||||
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
||||
|
|
Loading…
Reference in New Issue