mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Correct device usage in kernel methods
pass device to substituteKernelHeap use proper device when iterating over devices Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
26dc1de24d
commit
ee0523ae23
@@ -63,11 +63,11 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
||||
}
|
||||
if (isGTPinInitialized) {
|
||||
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||
auto device = pKernel->getDevices()[0];
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
auto &device = pKernel->getDevices()[0]->getDevice();
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex);
|
||||
// Enlarge local copy of SSH by 1 SS
|
||||
GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily;
|
||||
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||
if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) {
|
||||
// Kernel with no SSH or Kernel EM, not supported
|
||||
@@ -97,7 +97,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
||||
instrument_params_out_t paramsOut = {0};
|
||||
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut);
|
||||
// Substitute ISA of created kernel with instrumented code
|
||||
pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
||||
pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
||||
pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -794,7 +794,8 @@ size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
|
||||
}
|
||||
|
||||
void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
|
||||
void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
|
||||
void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
|
||||
*pKernelHeap = newKernelHeap;
|
||||
@@ -810,7 +811,7 @@ void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap,
|
||||
} else {
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
|
||||
pKernelInfo->kernelAllocation = nullptr;
|
||||
status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn);
|
||||
status = pKernelInfo->createKernelAllocation(device, isBuiltIn);
|
||||
}
|
||||
UNRECOVERABLE_IF(!status);
|
||||
}
|
||||
@@ -2300,21 +2301,21 @@ void Kernel::provideInitializationHints() {
|
||||
Context *context = program->getContextPtr();
|
||||
if (context == nullptr || !context->isProvidingPerformanceHints())
|
||||
return;
|
||||
for (auto i = 0u; i < kernelDeviceInfos.size(); i++) {
|
||||
if (!kernelInfos[i]) {
|
||||
continue;
|
||||
}
|
||||
if (kernelDeviceInfos[i].privateSurfaceSize) {
|
||||
|
||||
for (auto &pClDevice : getDevices()) {
|
||||
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
|
||||
if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH,
|
||||
kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize);
|
||||
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize);
|
||||
}
|
||||
const auto &patchInfo = kernelInfos[i]->patchInfo;
|
||||
const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
|
||||
if (patchInfo.mediavfestate) {
|
||||
auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace;
|
||||
scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize();
|
||||
scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
if (scratchSize > 0) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
|
||||
kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
|
||||
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,7 +170,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
|
||||
|
||||
void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
|
||||
void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize);
|
||||
bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
|
||||
uint64_t getKernelId(uint32_t rootDeviceIndex) const;
|
||||
void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);
|
||||
|
||||
Reference in New Issue
Block a user