Revert "Initialize kernel private surface when kernel is created"
This reverts commit be2a87fe98
.
Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
parent
26b036ab97
commit
08655a315c
|
@ -48,6 +48,9 @@ struct KernelImmutableData {
|
||||||
uint32_t getIsaSize() const;
|
uint32_t getIsaSize() const;
|
||||||
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
|
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
|
||||||
|
|
||||||
|
uint64_t getPrivateMemorySize() const;
|
||||||
|
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); }
|
||||||
|
|
||||||
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
|
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
|
||||||
|
|
||||||
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
|
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
|
||||||
|
@ -64,6 +67,7 @@ struct KernelImmutableData {
|
||||||
Device *device = nullptr;
|
Device *device = nullptr;
|
||||||
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
||||||
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
||||||
|
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
|
||||||
|
|
||||||
uint32_t crossThreadDataSize = 0;
|
uint32_t crossThreadDataSize = 0;
|
||||||
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;
|
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;
|
||||||
|
|
|
@ -72,6 +72,10 @@ KernelImmutableData::~KernelImmutableData() {
|
||||||
isaGraphicsAllocation.release();
|
isaGraphicsAllocation.release();
|
||||||
}
|
}
|
||||||
crossThreadDataTemplate.reset();
|
crossThreadDataTemplate.reset();
|
||||||
|
if (nullptr != privateMemoryGraphicsAllocation) {
|
||||||
|
this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation);
|
||||||
|
privateMemoryGraphicsAllocation.release();
|
||||||
|
}
|
||||||
surfaceStateHeapTemplate.reset();
|
surfaceStateHeapTemplate.reset();
|
||||||
dynamicStateHeapTemplate.reset();
|
dynamicStateHeapTemplate.reset();
|
||||||
}
|
}
|
||||||
|
@ -160,6 +164,21 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
|
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
|
||||||
|
auto &kernelAttributes = kernelDescriptor->kernelAttributes;
|
||||||
|
|
||||||
|
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
|
||||||
|
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch);
|
||||||
|
|
||||||
|
UNRECOVERABLE_IF(privateSurfaceSize == 0);
|
||||||
|
this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(
|
||||||
|
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}));
|
||||||
|
|
||||||
|
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
|
||||||
|
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||||
|
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
|
||||||
|
*privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
|
||||||
|
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get());
|
||||||
|
}
|
||||||
|
|
||||||
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||||
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
|
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
|
||||||
|
@ -188,13 +207,17 @@ uint32_t KernelImmutableData::getIsaSize() const {
|
||||||
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
|
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t KernelImmutableData::getPrivateMemorySize() const {
|
||||||
|
uint64_t size = 0;
|
||||||
|
if (privateMemoryGraphicsAllocation != nullptr) {
|
||||||
|
size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize();
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
KernelImp::KernelImp(Module *module) : module(module) {}
|
KernelImp::KernelImp(Module *module) : module(module) {}
|
||||||
|
|
||||||
KernelImp::~KernelImp() {
|
KernelImp::~KernelImp() {
|
||||||
if (nullptr != privateMemoryGraphicsAllocation) {
|
|
||||||
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (perThreadDataForWholeThreadGroup != nullptr) {
|
if (perThreadDataForWholeThreadGroup != nullptr) {
|
||||||
alignedFree(perThreadDataForWholeThreadGroup);
|
alignedFree(perThreadDataForWholeThreadGroup);
|
||||||
}
|
}
|
||||||
|
@ -651,27 +674,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||||
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
|
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
|
|
||||||
auto neoDevice = module->getDevice()->getNEODevice();
|
|
||||||
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
|
|
||||||
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize,
|
|
||||||
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
|
||||||
|
|
||||||
UNRECOVERABLE_IF(privateSurfaceSize == 0);
|
|
||||||
this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
|
||||||
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()});
|
|
||||||
|
|
||||||
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
|
|
||||||
|
|
||||||
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
|
|
||||||
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
|
|
||||||
|
|
||||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
|
||||||
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
|
|
||||||
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
|
|
||||||
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
|
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
|
||||||
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
|
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
|
||||||
UNRECOVERABLE_IF(reqdSize[1] == 0);
|
UNRECOVERABLE_IF(reqdSize[1] == 0);
|
||||||
|
|
|
@ -124,17 +124,11 @@ struct KernelImp : Kernel {
|
||||||
|
|
||||||
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
|
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
|
||||||
|
|
||||||
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() {
|
|
||||||
return privateMemoryGraphicsAllocation;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
KernelImp() = default;
|
KernelImp() = default;
|
||||||
|
|
||||||
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
|
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
|
||||||
|
|
||||||
NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr;
|
|
||||||
|
|
||||||
void createPrintfBuffer();
|
void createPrintfBuffer();
|
||||||
void setDebugSurface();
|
void setDebugSurface();
|
||||||
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
||||||
|
@ -153,7 +147,7 @@ struct KernelImp : Kernel {
|
||||||
uint32_t numThreadsPerThreadGroup = 1u;
|
uint32_t numThreadsPerThreadGroup = 1u;
|
||||||
uint32_t threadExecutionMask = 0u;
|
uint32_t threadExecutionMask = 0u;
|
||||||
|
|
||||||
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
|
std::unique_ptr<uint8_t[]> crossThreadData = 0;
|
||||||
uint32_t crossThreadDataSize = 0;
|
uint32_t crossThreadDataSize = 0;
|
||||||
|
|
||||||
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
|
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
|
||||||
|
|
|
@ -12,105 +12,12 @@
|
||||||
#include "shared/test/unit_test/helpers/test_files.h"
|
#include "shared/test/unit_test/helpers/test_files.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/module/module.h"
|
#include "level_zero/core/source/module/module.h"
|
||||||
#include "level_zero/core/source/module/module_imp.h"
|
|
||||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||||
|
|
||||||
namespace L0 {
|
namespace L0 {
|
||||||
namespace ult {
|
namespace ult {
|
||||||
|
|
||||||
struct ModuleImmutableDataFixture : public DeviceFixture {
|
|
||||||
struct MockImmutableData : KernelImmutableData {
|
|
||||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
|
|
||||||
mockKernelDescriptor = new NEO::KernelDescriptor;
|
|
||||||
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
|
|
||||||
kernelDescriptor = mockKernelDescriptor;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
~MockImmutableData() override {
|
|
||||||
delete mockKernelDescriptor;
|
|
||||||
}
|
|
||||||
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MockModule : public L0::ModuleImp {
|
|
||||||
MockModule(L0::Device *device,
|
|
||||||
L0::ModuleBuildLog *moduleBuildLog,
|
|
||||||
L0::ModuleType type,
|
|
||||||
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
|
|
||||||
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
|
|
||||||
}
|
|
||||||
|
|
||||||
~MockModule() {
|
|
||||||
delete mockKernelImmData;
|
|
||||||
}
|
|
||||||
|
|
||||||
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
|
|
||||||
return mockKernelImmData;
|
|
||||||
}
|
|
||||||
MockImmutableData *mockKernelImmData = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
class MockKernel : public WhiteBox<L0::KernelImp> {
|
|
||||||
public:
|
|
||||||
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
|
|
||||||
}
|
|
||||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
~MockKernel() override {
|
|
||||||
}
|
|
||||||
std::unique_ptr<Kernel> clone() const override { return nullptr; }
|
|
||||||
};
|
|
||||||
|
|
||||||
void SetUp() override {
|
|
||||||
DeviceFixture::SetUp();
|
|
||||||
}
|
|
||||||
|
|
||||||
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
|
|
||||||
std::string testFile;
|
|
||||||
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
|
|
||||||
|
|
||||||
size_t size = 0;
|
|
||||||
auto src = loadDataFromFile(
|
|
||||||
testFile.c_str(),
|
|
||||||
size);
|
|
||||||
|
|
||||||
ASSERT_NE(0u, size);
|
|
||||||
ASSERT_NE(nullptr, src);
|
|
||||||
|
|
||||||
ze_module_desc_t moduleDesc = {};
|
|
||||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
|
||||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
|
|
||||||
moduleDesc.inputSize = size;
|
|
||||||
|
|
||||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
|
||||||
|
|
||||||
module = std::make_unique<MockModule>(device,
|
|
||||||
moduleBuildLog,
|
|
||||||
ModuleType::User,
|
|
||||||
perHwThreadPrivateMemorySize);
|
|
||||||
}
|
|
||||||
|
|
||||||
void createKernel(MockKernel *kernel) {
|
|
||||||
ze_kernel_desc_t desc = {};
|
|
||||||
desc.pKernelName = kernelName.c_str();
|
|
||||||
kernel->initialize(&desc);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TearDown() override {
|
|
||||||
DeviceFixture::TearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string binaryFilename = "test_kernel";
|
|
||||||
const std::string kernelName = "test";
|
|
||||||
const uint32_t numKernelArguments = 6;
|
|
||||||
std::unique_ptr<MockModule> module;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ModuleFixture : public DeviceFixture {
|
struct ModuleFixture : public DeviceFixture {
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
DeviceFixture::SetUp();
|
DeviceFixture::SetUp();
|
||||||
|
|
|
@ -27,6 +27,7 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
|
||||||
using ::L0::KernelImmutableData::isaGraphicsAllocation;
|
using ::L0::KernelImmutableData::isaGraphicsAllocation;
|
||||||
using ::L0::KernelImmutableData::kernelDescriptor;
|
using ::L0::KernelImmutableData::kernelDescriptor;
|
||||||
using ::L0::KernelImmutableData::KernelImmutableData;
|
using ::L0::KernelImmutableData::KernelImmutableData;
|
||||||
|
using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation;
|
||||||
using ::L0::KernelImmutableData::residencyContainer;
|
using ::L0::KernelImmutableData::residencyContainer;
|
||||||
|
|
||||||
WhiteBox() : ::L0::KernelImmutableData() {}
|
WhiteBox() : ::L0::KernelImmutableData() {}
|
||||||
|
|
|
@ -254,43 +254,19 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
|
||||||
delete[] kernelNameRetrieved;
|
delete[] kernelNameRetrieved;
|
||||||
}
|
}
|
||||||
|
|
||||||
class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test {
|
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
|
||||||
public:
|
uint32_t computeUnitsUsedForSratch = 0x300;
|
||||||
void SetUp() override {
|
|
||||||
ModuleImmutableDataFixture::SetUp();
|
|
||||||
}
|
|
||||||
|
|
||||||
void TearDown() override {
|
KernelInfo kernelInfo;
|
||||||
ModuleImmutableDataFixture::TearDown();
|
auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
|
||||||
}
|
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
|
||||||
};
|
kernelAttributes.simdSize = 8;
|
||||||
|
|
||||||
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
|
KernelImmutableData kernelImmutableData(device);
|
||||||
uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
|
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
|
||||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
|
||||||
|
|
||||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
|
||||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
|
||||||
|
|
||||||
createKernel(kernel.get());
|
|
||||||
|
|
||||||
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
|
|
||||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
|
||||||
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
|
|
||||||
|
|
||||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
|
||||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
|
||||||
|
|
||||||
createKernel(kernel.get());
|
|
||||||
|
|
||||||
EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
|
|
||||||
|
|
||||||
size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
|
|
||||||
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
|
|
||||||
EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
||||||
|
|
Loading…
Reference in New Issue