Correct device usage in kernel methods

pass device to substituteKernelHeap
use proper device when iterating over devices

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2020-12-17 09:54:45 +00:00
committed by Compute-Runtime-Automation
parent 26dc1de24d
commit ee0523ae23
7 changed files with 58 additions and 34 deletions

View File

@@ -63,11 +63,11 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
}
if (isGTPinInitialized) {
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
auto device = pKernel->getDevices()[0];
auto rootDeviceIndex = device->getRootDeviceIndex();
auto &device = pKernel->getDevices()[0]->getDevice();
auto rootDeviceIndex = device.getRootDeviceIndex();
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex);
// Enlarge local copy of SSH by 1 SS
GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily;
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) {
// Kernel with no SSH or Kernel EM, not supported
@@ -97,7 +97,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
instrument_params_out_t paramsOut = {0};
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, &paramsIn, &paramsOut);
// Substitute ISA of created kernel with instrumented code
pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
}
}

View File

@@ -794,7 +794,8 @@ size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
}
void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) {
auto rootDeviceIndex = device.getRootDeviceIndex();
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
*pKernelHeap = newKernelHeap;
@@ -810,7 +811,7 @@ void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap,
} else {
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
pKernelInfo->kernelAllocation = nullptr;
status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn);
status = pKernelInfo->createKernelAllocation(device, isBuiltIn);
}
UNRECOVERABLE_IF(!status);
}
@@ -2300,21 +2301,21 @@ void Kernel::provideInitializationHints() {
Context *context = program->getContextPtr();
if (context == nullptr || !context->isProvidingPerformanceHints())
return;
for (auto i = 0u; i < kernelDeviceInfos.size(); i++) {
if (!kernelInfos[i]) {
continue;
}
if (kernelDeviceInfos[i].privateSurfaceSize) {
for (auto &pClDevice : getDevices()) {
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH,
kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize);
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(),
kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize);
}
const auto &patchInfo = kernelInfos[i]->patchInfo;
const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
if (patchInfo.mediavfestate) {
auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace;
scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize();
scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
if (scratchSize > 0) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
}
}
}

View File

@@ -170,7 +170,7 @@ class Kernel : public BaseObject<_cl_kernel> {
void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize);
bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
uint64_t getKernelId(uint32_t rootDeviceIndex) const;
void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);