mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
Store single KernelInfo in Kernel
remove root device index from Kernel's methods Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ecceddcab6
commit
7098e9c5f2
@@ -536,15 +536,14 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
|
||||
bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
||||
auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex));
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful);
|
||||
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
|
||||
kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful);
|
||||
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
|
||||
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch,
|
||||
addressToPatch, 0, debugSurface, 0, 0,
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->getTotalNumDevicesInContext());
|
||||
return true;
|
||||
}
|
||||
@@ -894,7 +893,7 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) {
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto kernelName = dispatchInfo.getKernel()->getKernelInfo(device->getRootDeviceIndex()).kernelDescriptor.kernelMetadata.kernelName;
|
||||
auto kernelName = dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName;
|
||||
getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,11 +67,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
} else {
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
|
||||
kernel->updateAuxTranslationRequired();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex);
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
|
||||
if (!kernelObjsForAuxTranslation.empty()) {
|
||||
@@ -86,13 +85,13 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) {
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder;
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
||||
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
@@ -357,7 +356,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
if (blockQueue) {
|
||||
if (parentKernel) {
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, device->getRootDeviceIndex());
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
}
|
||||
|
||||
@@ -400,7 +399,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
printfHandler->prepareDispatch(multiDispatchInfo);
|
||||
}
|
||||
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(device->getRootDeviceIndex())) {
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
||||
auto &gws = multiDispatchInfo.begin()->getGWS();
|
||||
auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize();
|
||||
size_t workGroupsCount = (gws.x * gws.y * gws.z) /
|
||||
@@ -569,8 +568,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
bool &blocking) {
|
||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
auto rootDeviceIndex = devQueueHw->getDevice().getRootDeviceIndex();
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
|
||||
|
||||
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
|
||||
@@ -684,8 +682,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
printfHandler->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(rootDeviceIndex)) {
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
||||
device->getDevice().syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
@@ -722,7 +719,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
||||
requiresCoherency |= kernel->requiresCoherency();
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
|
||||
auxTranslationRequired |= kernel->isAuxTranslationRequired();
|
||||
@@ -730,11 +727,11 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
|
||||
if (kernel->requiresPerDssBackedBuffer(rootDeviceIndex)) {
|
||||
if (kernel->requiresPerDssBackedBuffer()) {
|
||||
usePerDssBackedBuffer = true;
|
||||
}
|
||||
|
||||
if (kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) {
|
||||
if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) {
|
||||
useGlobalAtomics = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,8 +37,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
size_t enqueuedLocalWorkSize[3] = {0, 0, 0};
|
||||
|
||||
auto &kernel = *pKernel;
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex);
|
||||
const auto &kernelInfo = kernel.getKernelInfo();
|
||||
|
||||
if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) {
|
||||
return CL_INVALID_OPERATION;
|
||||
@@ -109,7 +108,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
Surface *surfaces[] = {&s};
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
if (kernel.hasPrintfOutput(rootDeviceIndex)) {
|
||||
if (kernel.hasPrintfOutput()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
}
|
||||
if (kernel.requiresCoherency()) {
|
||||
|
||||
@@ -102,7 +102,7 @@ class GpgpuWalkerHelper {
|
||||
bool disablePerfMode);
|
||||
|
||||
static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
|
||||
static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex);
|
||||
static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel);
|
||||
|
||||
static size_t setGpgpuWalkerThreadData(
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
@@ -200,7 +200,7 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
|
||||
|
||||
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
|
||||
if (heapType == IndirectHeap::SURFACE_STATE) {
|
||||
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, commandQueue.getDevice().getRootDeviceIndex());
|
||||
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
|
||||
{
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
|
||||
|
||||
@@ -172,7 +172,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) {
|
||||
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
|
||||
@@ -68,8 +68,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed) {
|
||||
|
||||
auto rootDeviceIndex = devQueueHw.getDevice().getRootDeviceIndex();
|
||||
const auto &kernelInfo = scheduler.getKernelInfo(rootDeviceIndex);
|
||||
const auto &kernelInfo = scheduler.getKernelInfo();
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
@@ -117,8 +116,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>();
|
||||
GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler, rootDeviceIndex);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler, rootDeviceIndex);
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
@@ -126,7 +125,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
*ioh,
|
||||
*ssh,
|
||||
scheduler,
|
||||
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
|
||||
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
@@ -195,7 +194,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
|
||||
}
|
||||
size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters);
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel, commandQueue.getDevice().getRootDeviceIndex());
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -102,7 +102,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh),
|
||||
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0,
|
||||
mainKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
mainKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
mainKernel->getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
@@ -244,7 +244,6 @@ template <typename GfxFamily>
|
||||
void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo,
|
||||
bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) {
|
||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
|
||||
if (blockedQueue) {
|
||||
size_t dshSize = 0;
|
||||
@@ -254,7 +253,7 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
|
||||
|
||||
if (parentKernel) {
|
||||
dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize();
|
||||
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
|
||||
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
iohEqualsDsh = true;
|
||||
colorCalcSize = static_cast<size_t>(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize);
|
||||
} else {
|
||||
|
||||
@@ -70,11 +70,10 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups) {
|
||||
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel);
|
||||
WALKER_TYPE<GfxFamily> walkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
uint32_t dim = dispatchInfo.getDim();
|
||||
uint32_t simd = kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||
|
||||
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
|
||||
size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z};
|
||||
@@ -86,7 +85,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
}
|
||||
|
||||
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel, rootDeviceIndex);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
@@ -94,7 +93,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
|
||||
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
@@ -105,7 +104,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
true,
|
||||
commandQueue.getDevice());
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor,
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor,
|
||||
globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
false, false, 0u);
|
||||
|
||||
@@ -416,11 +416,10 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
|
||||
if (kernel != nullptr) {
|
||||
auto &device = dispatchInfo.getClDevice();
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto isSimulation = device.isSimulation();
|
||||
if (kernel->requiresLimitedWorkgroupSize(rootDeviceIndex) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) {
|
||||
if (kernel->requiresLimitedWorkgroupSize() && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) {
|
||||
setSpecialWorkgroupSize(workGroupSize);
|
||||
} else if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
|
||||
WorkSizeInfo wsInfo(dispatchInfo);
|
||||
@@ -428,7 +427,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim());
|
||||
} else {
|
||||
auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize();
|
||||
auto simd = kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
auto simd = kernel->getKernelInfo().getMaxSimdSize();
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
if (dispatchInfo.getDim() == 1) {
|
||||
computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd);
|
||||
@@ -476,7 +475,7 @@ void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo)
|
||||
preferredWorkGroupSize[1] = lws.y;
|
||||
preferredWorkGroupSize[2] = lws.z;
|
||||
|
||||
const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(dispatchInfo.getClDevice().getRootDeviceIndex());
|
||||
const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo();
|
||||
if (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]);
|
||||
|
||||
Reference in New Issue
Block a user