diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 917040fa18..b56dca29bf 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -63,11 +63,11 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { } if (isGTPinInitialized) { auto pKernel = castToObjectOrAbort(kernel); - auto device = pKernel->getDevices()[0]; - auto rootDeviceIndex = device->getRootDeviceIndex(); + auto &device = pKernel->getDevices()[0]->getDevice(); + auto rootDeviceIndex = device.getRootDeviceIndex(); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); // Enlarge local copy of SSH by 1 SS - GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily; + GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) { // Kernel with no SSH or Kernel EM, not supported @@ -97,7 +97,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { instrument_params_out_t paramsOut = {0}; (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut); // Substitute ISA of created kernel with instrumented code - pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size); + pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size); pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id); } } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 67b477cc3f..87f01bef5e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -794,7 +794,8 @@ size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const { return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize; } -void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) { +void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) { + auto rootDeviceIndex = device.getRootDeviceIndex(); KernelInfo *pKernelInfo = const_cast(&getKernelInfo(rootDeviceIndex)); void **pKernelHeap = const_cast(&pKernelInfo->heapInfo.pKernelHeap); *pKernelHeap = newKernelHeap; @@ -810,7 +811,7 @@ void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, } else { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation); pKernelInfo->kernelAllocation = nullptr; - status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn); + status = pKernelInfo->createKernelAllocation(device, isBuiltIn); } UNRECOVERABLE_IF(!status); } @@ -2300,21 +2301,21 @@ void Kernel::provideInitializationHints() { Context *context = program->getContextPtr(); if (context == nullptr || !context->isProvidingPerformanceHints()) return; - for (auto i = 0u; i < kernelDeviceInfos.size(); i++) { - if (!kernelInfos[i]) { - continue; - } - if (kernelDeviceInfos[i].privateSurfaceSize) { + + for (auto &pClDevice : getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, - kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize); + kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize); } - const auto &patchInfo = kernelInfos[i]->patchInfo; + const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; if (patchInfo.mediavfestate) { auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; - scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize(); + scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, - kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); + kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); } } } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 3061944ddb..878b269834 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -170,7 +170,7 @@ class Kernel : public BaseObject<_cl_kernel> { void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); - void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize); + void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize); bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const; uint64_t getKernelId(uint32_t rootDeviceIndex) const; void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId); diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 5c06ecc61b..ca156332df 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -707,22 +707,30 @@ TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerfor TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { - auto pDevice = castToObject(devices[0]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); auto size = zeroSized ? 0 : 1024; - MockKernelWithInternals mockKernel(*pDevice, context); + MockKernelWithInternals mockKernel(context->getDevices(), context); SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = size; + uint32_t computeUnitsForScratch[] = {0x10, 0x20}; + + for (auto &pClDevice : context->getDevices()) { + auto &deviceInfo = const_cast(pClDevice->getSharedDeviceInfo()); + deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()]; + } + mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; - size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); mockKernel.mockKernel->initialize(); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], - mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), size); - EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); + for (auto &pClDevice : context->getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], + mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize); + EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); + } } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 0c335f41d7..b4f7988864 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -2328,7 +2328,7 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { // Substitute new kernel code constexpr size_t newCodeSize = 64; uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4}; - pKernel->substituteKernelHeap(rootDeviceIndex, &newCode[0], newCodeSize); + pKernel->substituteKernelHeap(pDevice->getDevice(), &newCode[0], newCodeSize); // Verify that substitution went properly isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex); diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp index 92d58afccc..510dd1024e 100644 --- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp +++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp @@ -34,7 +34,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT const size_t newHeapSize = initialHeapSize + 1; char newHeap[newHeapSize]; - kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize); + kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); @@ -64,7 +64,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen const size_t newHeapSize = initialHeapSize; char newHeap[newHeapSize]; - kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize); + kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); @@ -93,7 +93,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT const size_t newHeapSize = initialHeapSize - 1; char newHeap[newHeapSize]; - kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize); + kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); @@ -125,7 +125,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); - kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize); + kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 24c79f84e2..d800a10407 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -254,7 +254,7 @@ class MockKernel : public Kernel { //class below have enough internals to service Enqueue operation. class MockKernelWithInternals { public: - MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) { + MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) { memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon)); memset(&threadPayload, 0, sizeof(SPatchThreadPayload)); memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream)); @@ -284,15 +284,27 @@ class MockKernelWithInternals { context->incRefInternal(); mockContext = context; } - ClDeviceVector deviceVector; - deviceVector.push_back(&deviceArg); - kernelInfos.resize(deviceArg.getRootDeviceIndex() + 1); - kernelInfos[deviceArg.getRootDeviceIndex()] = &kernelInfo; + auto maxRootDeviceIndex = 0u; + + for (const auto &pClDevice : deviceVector) { + if (pClDevice->getRootDeviceIndex() > maxRootDeviceIndex) { + maxRootDeviceIndex = pClDevice->getRootDeviceIndex(); + } + } + + kernelInfos.resize(maxRootDeviceIndex + 1); + + for (const auto &pClDevice : deviceVector) { + kernelInfos[pClDevice->getRootDeviceIndex()] = &kernelInfo; + } mockProgram = new MockProgram(context, false, deviceVector); mockKernel = new MockKernel(mockProgram, kernelInfos); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); - mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), deviceArg.getRootDeviceIndex()); + + for (const auto &pClDevice : deviceVector) { + mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), pClDevice->getRootDeviceIndex()); + } if (addDefaultArg) { defaultKernelArguments.resize(2); @@ -323,6 +335,9 @@ class MockKernelWithInternals { kernelInfo.kernelArgInfo[0].offsetHeap = 64; } } + + MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) : MockKernelWithInternals(toClDeviceVector(deviceArg), context, addDefaultArg, execEnv) { + } MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment execEnv) : MockKernelWithInternals(deviceArg, nullptr, false, execEnv) { mockKernel->initialize(); }