diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 0e30c75385..f97ea83df9 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1104,6 +1104,20 @@ TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonIntern EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType()); } +TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) { + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); + auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation(); + auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); + size_t isaPadding = hwHelper.getPaddingForISAAllocation(); + EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.KernelHeapSize + isaPadding); +} + TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) { uint32_t kernelHeap = 0; KernelInfo kernelInfo; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index a718bf59ae..62c72aca21 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -734,7 +734,11 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize(); bool status = false; - if (currentAllocationSize >= newKernelHeapSize) { + + const auto &hwInfo = clDevice.getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + size_t isaPadding = hwHelper.getPaddingForISAAllocation(); + if (currentAllocationSize >= newKernelHeapSize + isaPadding) { auto &hwInfo = clDevice.getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); status = MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *pKernelInfo->getGraphicsAllocation()), diff --git a/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp b/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp index 1c3eee71dd..d8cb2c5b13 100644 --- a/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp +++ b/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp @@ -116,7 +116,10 @@ void KernelDataTest::buildAndDecode() { if (kernelHeapSize) { auto kernelAllocation = pKernelInfo->getGraphicsAllocation(); UNRECOVERABLE_IF(kernelAllocation == nullptr); - EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize); + auto &device = pContext->getDevice(0)->getDevice(); + auto &hwHelper = NEO::HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); + size_t isaPadding = hwHelper.getPaddingForISAAllocation(); + EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize + isaPadding); auto kernelIsa = kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, pKernelInfo->heapInfo.pKernelHeap, kernelHeapSize)); } else { diff --git a/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp index 9885360ae5..021acc8b12 100644 --- a/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 7591fd1a77..1c9b63d098 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2468,7 +2468,25 @@ TEST(KernelInfoTest, GivenArgNameWhenGettingArgNumberByNameThenCorrectValueIsRet EXPECT_EQ(-1, info.getArgNumByName("arg1")); } -TEST(KernelTest, GivenNormalKernelWhenGettingInstructionHeapSizeForExecutionModelThenZeroIsReturned) { +TEST(KernelInfoTest, givenHwHelperWhenCreatingKernelAllocationThenCorrectPaddingIsAdded) { + + std::unique_ptr clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), mockRootDeviceIndex)); + std::unique_ptr context = std::make_unique(clDevice.get()); + + std::unique_ptr mockKernel = std::make_unique(*clDevice, context.get()); + uint32_t kernelHeap = 0; + mockKernel->kernelInfo.heapInfo.KernelHeapSize = 1; + mockKernel->kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + mockKernel->kernelInfo.createKernelAllocation(clDevice->getDevice(), false); + + auto graphicsAllocation = mockKernel->kernelInfo.getGraphicsAllocation(); + auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + size_t isaPadding = hwHelper.getPaddingForISAAllocation(); + EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), mockKernel->kernelInfo.heapInfo.KernelHeapSize + isaPadding); + clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation()); +} + +TEST(KernelTest, givenNormalKernelWhenGettingInstructionHeapSizeForExecutionModelThenZeroIsReturned) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp index d38ceb70d3..66b05e79ea 100644 --- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp +++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp @@ -27,7 +27,8 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); - EXPECT_EQ(initialHeapSize, firstAllocationSize); + size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); + EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; @@ -38,8 +39,8 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); - EXPECT_NE(initialHeapSize, secondAllocationSize); - EXPECT_EQ(newHeapSize, secondAllocationSize); + EXPECT_NE(secondAllocationSize, initialHeapSize + isaPadding); + EXPECT_EQ(secondAllocationSize, newHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_NE(firstAllocationId, secondAllocationId); @@ -57,7 +58,8 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); - EXPECT_EQ(initialHeapSize, firstAllocationSize); + size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); + EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; @@ -68,7 +70,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); - EXPECT_EQ(initialHeapSize, secondAllocationSize); + EXPECT_EQ(secondAllocationSize, initialHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); @@ -86,7 +88,8 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); - EXPECT_EQ(initialHeapSize, firstAllocationSize); + size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); + EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; @@ -97,7 +100,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); - EXPECT_EQ(initialHeapSize, secondAllocationSize); + EXPECT_EQ(secondAllocationSize, initialHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); diff --git a/opencl/test/unit_test/program/kernel_info_tests.cpp b/opencl/test/unit_test/program/kernel_info_tests.cpp index e67484c273..0cdeee33fb 100644 --- a/opencl/test/unit_test/program/kernel_info_tests.cpp +++ b/opencl/test/unit_test/program/kernel_info_tests.cpp @@ -45,7 +45,8 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKerne EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), heap, heapSize)); - EXPECT_EQ(heapSize, allocation->getUnderlyingBufferSize()); + size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); + EXPECT_EQ(allocation->getUnderlyingBufferSize(), heapSize + isaPadding); device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 27d97cd065..37a32860c9 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -579,7 +579,9 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphi auto graphicsAllocation = kernelInfo->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); - EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.KernelHeapSize); + auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + size_t isaPadding = hwHelper.getPaddingForISAAllocation(); + EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.KernelHeapSize + isaPadding); auto kernelIsa = graphicsAllocation->getUnderlyingBuffer(); EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 5072e2e0f5..a03e64c940 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -48,6 +48,7 @@ class HwHelper { virtual uint32_t getBindingTableStateAlignement() const = 0; virtual size_t getInterfaceDescriptorDataSize() const = 0; virtual size_t getMaxBarrierRegisterPerSlice() const = 0; + virtual size_t getPaddingForISAAllocation() const = 0; virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0; virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const = 0; virtual uint32_t getMaxNumSamplers() const = 0; @@ -197,6 +198,8 @@ class HwHelperHw : public HwHelper { size_t getMaxBarrierRegisterPerSlice() const override; + size_t getPaddingForISAAllocation() const override; + uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override; uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index cc1ae86267..7cd3e584aa 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -59,6 +59,11 @@ size_t HwHelperHw::getMaxBarrierRegisterPerSlice() const { return 32; } +template +size_t HwHelperHw::getPaddingForISAAllocation() const { + return 512; +} + template uint32_t HwHelperHw::getPitchAlignmentForImage(const HardwareInfo *hwInfo) const { return 4u; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 86cf8e9759..2d9b3301b6 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -409,7 +409,12 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo } allocationData.hostPtr = hostPtr; - allocationData.size = properties.size; + if (properties.allocationType == GraphicsAllocation::AllocationType::KERNEL_ISA || + properties.allocationType == GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL) { + allocationData.size = properties.size + hwHelper.getPaddingForISAAllocation(); + } else { + allocationData.size = properties.size; + } allocationData.type = properties.allocationType; allocationData.storageInfo = storageInfo; allocationData.alignment = properties.alignment ? properties.alignment : MemoryConstants::preferredAlignment;