mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
fix: one transfer per kernel ISA allocation(s) page
If several kernel heaps are sharing the same page then use a temporary buffer to collect all of them and transfer to memory in one shot. Previously there were several transfers performed (one per kernel) and, observably, they happened not to be immediately effective at times. Related-To: NEO-7788 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9134a4e1bc
commit
f553d9f76b
@@ -820,7 +820,23 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedThenIsaIs
|
||||
mockMemoryManager->copyMemoryToAllocationCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking) {
|
||||
struct KernelIsaCopyingMomentTest : public ModuleImmutableDataFixture, public ::testing::TestWithParam<std::pair<uint32_t, size_t>> {
|
||||
void SetUp() override {
|
||||
ModuleImmutableDataFixture::setUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ModuleImmutableDataFixture::tearDown();
|
||||
}
|
||||
};
|
||||
std::pair<uint32_t, size_t> kernelIsaCopyingPairs[] = {
|
||||
{1, 1},
|
||||
{static_cast<uint32_t>(MemoryConstants::pageSize64k + 1), 0}}; // pageSize64 is a common upper-bound for both system and local memory
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(, KernelIsaCopyingMomentTest, testing::ValuesIn(kernelIsaCopyingPairs));
|
||||
|
||||
TEST_P(KernelIsaCopyingMomentTest, givenInternalModuleWhenKernelIsCreatedThenIsaCopiedDuringLinkingOnlyIfCanFitInACommonParentPage) {
|
||||
auto [testKernelHeapSize, numberOfCopiesToAllocationAtModuleInitialization] = GetParam();
|
||||
|
||||
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip);
|
||||
@@ -845,7 +861,7 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
kernelInfo->heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo->heapInfo.kernelHeapSize = testKernelHeapSize;
|
||||
kernelInfo->heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
Mock<::L0::KernelImp> kernelMock;
|
||||
@@ -864,15 +880,20 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
|
||||
result = moduleMock->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
|
||||
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
|
||||
numberOfCopiesToAllocationAtModuleInitialization;
|
||||
|
||||
EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
|
||||
|
||||
for (auto &ki : moduleMock->kernelImmDatas) {
|
||||
EXPECT_FALSE(ki->isIsaCopiedToAllocation());
|
||||
bool isaExpectedToBeCopied = (numberOfCopiesToAllocationAtModuleInitialization != 0u);
|
||||
EXPECT_EQ(isaExpectedToBeCopied, ki->isIsaCopiedToAllocation());
|
||||
}
|
||||
|
||||
expectedPreviouscopyMemoryToAllocationCalledTimes++;
|
||||
if (numberOfCopiesToAllocationAtModuleInitialization == 0) {
|
||||
// For large builtin kernels copying is not optimized and done at kernel initailization
|
||||
expectedPreviouscopyMemoryToAllocationCalledTimes++;
|
||||
}
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = "";
|
||||
|
||||
@@ -222,7 +222,11 @@ HWTEST_F(ModuleTest, givenBlitterAvailableWhenCopyingPatchedSegmentsThenIsaIsTra
|
||||
auto &productHelper = device.getProductHelper();
|
||||
auto &rootDeviceEnvironment = device.getNEODevice()->getRootDeviceEnvironment();
|
||||
if (productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *module->getKernelImmutableDataVector()[0]->getIsaGraphicsAllocation())) {
|
||||
EXPECT_EQ(zebinData->numOfKernels, blitterCalled);
|
||||
if (module->getKernelsIsaParentAllocation()) {
|
||||
EXPECT_EQ(1u, blitterCalled);
|
||||
} else {
|
||||
EXPECT_EQ(zebinData->numOfKernels, blitterCalled);
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(0u, blitterCalled);
|
||||
}
|
||||
@@ -3788,7 +3792,13 @@ TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsRetu
|
||||
class MyMockModuleTU : public MockModuleTU {
|
||||
public:
|
||||
using MockModuleTU::MockModuleTU;
|
||||
ze_result_t createFromNativeBinary(const char *input, size_t inputSize) override { return ZE_RESULT_SUCCESS; }
|
||||
ze_result_t createFromNativeBinary(const char *input, size_t inputSize) override {
|
||||
programInfo.kernelInfos[0]->heapInfo.pKernelHeap = &mockKernelHeap;
|
||||
programInfo.kernelInfos[0]->heapInfo.kernelHeapSize = 4;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t mockKernelHeap = 0xDEAD;
|
||||
};
|
||||
|
||||
const auto &compilerProductHelper = neoDevice->getRootDeviceEnvironment().getHelper<CompilerProductHelper>();
|
||||
|
||||
Reference in New Issue
Block a user