fix: one transfer per kernel ISA allocation(s) page

If several kernel heaps are sharing the same page then use a temporary
buffer to collect all of them and transfer to memory in one shot.
Previously there were several transfers performed (one per kernel) and,
observably, they happened not to be immediately effective at times.

Related-To: NEO-7788
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2023-10-03 23:09:30 +00:00
committed by Compute-Runtime-Automation
parent 9134a4e1bc
commit f553d9f76b
5 changed files with 112 additions and 49 deletions

View File

@@ -820,7 +820,23 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedThenIsaIs
mockMemoryManager->copyMemoryToAllocationCalledTimes);
}
TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking) {
struct KernelIsaCopyingMomentTest : public ModuleImmutableDataFixture, public ::testing::TestWithParam<std::pair<uint32_t, size_t>> {
void SetUp() override {
ModuleImmutableDataFixture::setUp();
}
void TearDown() override {
ModuleImmutableDataFixture::tearDown();
}
};
std::pair<uint32_t, size_t> kernelIsaCopyingPairs[] = {
{1, 1},
{static_cast<uint32_t>(MemoryConstants::pageSize64k + 1), 0}}; // pageSize64 is a common upper-bound for both system and local memory
INSTANTIATE_TEST_CASE_P(, KernelIsaCopyingMomentTest, testing::ValuesIn(kernelIsaCopyingPairs));
TEST_P(KernelIsaCopyingMomentTest, givenInternalModuleWhenKernelIsCreatedThenIsaCopiedDuringLinkingOnlyIfCanFitInACommonParentPage) {
auto [testKernelHeapSize, numberOfCopiesToAllocationAtModuleInitialization] = GetParam();
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip);
@@ -845,7 +861,7 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
kernelInfo->heapInfo.kernelHeapSize = 1;
kernelInfo->heapInfo.kernelHeapSize = testKernelHeapSize;
kernelInfo->heapInfo.pKernelHeap = &kernelHeap;
Mock<::L0::KernelImp> kernelMock;
@@ -864,15 +880,20 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
result = moduleMock->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
numberOfCopiesToAllocationAtModuleInitialization;
EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
for (auto &ki : moduleMock->kernelImmDatas) {
EXPECT_FALSE(ki->isIsaCopiedToAllocation());
bool isaExpectedToBeCopied = (numberOfCopiesToAllocationAtModuleInitialization != 0u);
EXPECT_EQ(isaExpectedToBeCopied, ki->isIsaCopiedToAllocation());
}
expectedPreviouscopyMemoryToAllocationCalledTimes++;
if (numberOfCopiesToAllocationAtModuleInitialization == 0) {
// For large builtin kernels copying is not optimized and done at kernel initailization
expectedPreviouscopyMemoryToAllocationCalledTimes++;
}
ze_kernel_desc_t desc = {};
desc.pKernelName = "";

View File

@@ -222,7 +222,11 @@ HWTEST_F(ModuleTest, givenBlitterAvailableWhenCopyingPatchedSegmentsThenIsaIsTra
auto &productHelper = device.getProductHelper();
auto &rootDeviceEnvironment = device.getNEODevice()->getRootDeviceEnvironment();
if (productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *module->getKernelImmutableDataVector()[0]->getIsaGraphicsAllocation())) {
EXPECT_EQ(zebinData->numOfKernels, blitterCalled);
if (module->getKernelsIsaParentAllocation()) {
EXPECT_EQ(1u, blitterCalled);
} else {
EXPECT_EQ(zebinData->numOfKernels, blitterCalled);
}
} else {
EXPECT_EQ(0u, blitterCalled);
}
@@ -3788,7 +3792,13 @@ TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsRetu
class MyMockModuleTU : public MockModuleTU {
public:
using MockModuleTU::MockModuleTU;
ze_result_t createFromNativeBinary(const char *input, size_t inputSize) override { return ZE_RESULT_SUCCESS; }
ze_result_t createFromNativeBinary(const char *input, size_t inputSize) override {
programInfo.kernelInfos[0]->heapInfo.pKernelHeap = &mockKernelHeap;
programInfo.kernelInfos[0]->heapInfo.kernelHeapSize = 4;
return ZE_RESULT_SUCCESS;
}
uint32_t mockKernelHeap = 0xDEAD;
};
const auto &compilerProductHelper = neoDevice->getRootDeviceEnvironment().getHelper<CompilerProductHelper>();