Pass root device index to get proper kernel info

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2020-12-08 12:11:40 +00:00 committed by Compute-Runtime-Automation
parent d3f3730989
commit 350ec9f16b
49 changed files with 239 additions and 217 deletions

View File

@ -3410,7 +3410,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
}
if ((pKernel->getExecutionType() != KernelExecutionType::Default) ||
pKernel->isUsingSyncBuffer()) {
pKernel->isUsingSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) {
retVal = CL_INVALID_KERNEL;
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
return retVal;
@ -5818,7 +5818,9 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
return retVal;
}
auto &hardwareInfo = pKernel->getDevices()[0]->getHardwareInfo();
auto &device = pCommandQueue->getClDevice();
auto rootDeviceIndex = device.getRootDeviceIndex();
auto &hardwareInfo = device.getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(pCommandQueue->getGpgpuEngine().getEngineType(), hardwareInfo.platform.eProductFamily)) {
retVal = CL_INVALID_COMMAND_QUEUE;
@ -5842,13 +5844,13 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
}
}
if (pKernel->isUsingSyncBuffer()) {
if (pKernel->isUsingSyncBuffer(rootDeviceIndex)) {
if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
retVal = CL_INVALID_KERNEL;
return retVal;
}
pCommandQueue->getDevice().getSpecializedDevice<ClDevice>()->allocateSyncBufferHandler();
device.allocateSyncBufferHandler();
}
if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_KERNEL_INTEL, eventWaitList, event)) {

View File

@ -529,7 +529,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
auto rootDeviceIndex = device->getRootDeviceIndex();
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex));
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset);
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());

View File

@ -64,16 +64,16 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
if (DebugManager.flags.ForceDispatchScheduler.get()) {
forceDispatchScheduler(multiDispatchInfo);
} else {
auto rootDeviceIndex = device->getRootDeviceIndex();
if (kernel->isAuxTranslationRequired()) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder);
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation);
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation, rootDeviceIndex);
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
if (!memObjsForAuxTranslation.empty()) {
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
}
}
auto rootDeviceIndex = device->getRootDeviceIndex();
if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
@ -373,7 +373,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
printfHandler->prepareDispatch(multiDispatchInfo);
}
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(device->getRootDeviceIndex())) {
auto &gws = multiDispatchInfo.begin()->getGWS();
auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize();
size_t workGroupsCount = (gws.x * gws.y * gws.z) /
@ -649,7 +649,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
printfHandler->makeResident(getGpgpuCommandStreamReceiver());
}
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
auto rootDeviceIndex = device->getRootDeviceIndex();
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(rootDeviceIndex)) {
device->syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver());
}
@ -674,7 +675,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto specialPipelineSelectMode = false;
Kernel *kernel = nullptr;
bool usePerDssBackedBuffer = false;
auto rootDeviceIndex = device->getRootDeviceIndex();
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {

View File

@ -412,7 +412,7 @@ SchedulerKernel &Context::getSchedulerKernel() {
kernelInfos,
&retVal);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize(clDevice->getRootDeviceIndex()) != 0);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
};

View File

@ -151,13 +151,13 @@ template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
void *pDSH = dynamicStateHeap.getCpuBase();
auto rootDeviceIndex = device->getRootDeviceIndex();
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2);
// Parent's dsh is located after ColorCalcState and 2 ID tables
igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp((uint32_t)parentKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp(static_cast<uint32_t>(parentKernel->getDynamicStateHeapSize(rootDeviceIndex)), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize();
igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart;

View File

@ -73,7 +73,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
// Kernel with no SSH or Kernel EM, not supported
return;
}
if (pKernel->isKernelHeapSubstituted()) {
if (pKernel->isKernelHeapSubstituted(rootDeviceIndex)) {
// ISA for this kernel was already substituted
return;
}
@ -85,8 +85,8 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS;
paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize();
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap();
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize());
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(rootDeviceIndex);
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize(rootDeviceIndex));
paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL;
paramsIn.buffer_desc.BTI = static_cast<uint32_t>(gtpinBTI);
paramsIn.igc_hash_id = kernelInfo.shaderHashCode;
@ -97,8 +97,8 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
instrument_params_out_t paramsOut = {0};
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, &paramsIn, &paramsOut);
// Substitute ISA of created kernel with instrumented code
pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
pKernel->setKernelId(paramsOut.kernel_id);
pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
}
}
@ -106,14 +106,15 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
if (isGTPinInitialized) {
auto pCmdQ = reinterpret_cast<CommandQueue *>(pCmdQueue);
auto &device = pCmdQ->getDevice();
auto rootDeviceIndex = device.getRootDeviceIndex();
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(device.getRootDeviceIndex()) == 0) {
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(rootDeviceIndex) == 0) {
// Kernel with no SSH, not supported
return;
}
Context *pContext = &(pKernel->getContext());
cl_context context = (cl_context)pContext;
uint64_t kernelId = pKernel->getKernelId();
uint64_t kernelId = pKernel->getKernelId(rootDeviceIndex);
command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++));
uint32_t kernelOffset = 0;
resource_handle_t resource = 0;
@ -138,7 +139,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, device.getRootDeviceIndex());
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex);
cl_mem buffer = (cl_mem)resource;
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);

View File

@ -19,11 +19,11 @@ bool DispatchInfo::usesStatelessPrintfSurface() const {
}
uint32_t DispatchInfo::getRequiredScratchSize() const {
return (kernel == nullptr) ? 0 : kernel->getScratchSize();
return (kernel == nullptr) ? 0 : kernel->getScratchSize(pClDevice->getRootDeviceIndex());
}
uint32_t DispatchInfo::getRequiredPrivateScratchSize() const {
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize();
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(pClDevice->getRootDeviceIndex());
}
Kernel *MultiDispatchInfo::peekMainKernel() const {

View File

@ -52,7 +52,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(
totalSize += borderColorSize + additionalSizeRequiredDsh();
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload));
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize(rootDeviceIndex) || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload));
return alignUp(totalSize, EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
}
@ -245,7 +245,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
uint32_t samplerCount = 0;
if (patchInfo.samplerStateArray) {
samplerCount = patchInfo.samplerStateArray->Count;
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset, samplerCount, patchInfo.samplerStateArray->BorderColorOffset, kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper());
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset,
samplerCount, patchInfo.samplerStateArray->BorderColorOffset,
kernel.getDynamicStateHeap(rootDeviceIndex), device.getBindlessHeapsHelper());
}
auto threadPayload = kernelInfo.patchInfo.threadPayload;

View File

@ -11,6 +11,6 @@ namespace NEO {
void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const {
}
void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const {
void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet, uint32_t rootDeviceIndex) const {
}
} // namespace NEO

View File

@ -290,7 +290,7 @@ cl_int Kernel::initialize() {
}
if (patchInfo.pAllocateStatelessEventPoolSurface) {
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
@ -299,7 +299,7 @@ cl_int Kernel::initialize() {
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
@ -442,16 +442,18 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
const _cl_context *ctxt;
cl_uint refCount = 0;
uint64_t nonCannonizedGpuAddress = 0llu;
auto defaultRootDeviceIndex = getDevices()[0]->getRootDeviceIndex();
auto &defaultKernelInfo = getKernelInfo(defaultRootDeviceIndex);
switch (paramName) {
case CL_KERNEL_FUNCTION_NAME:
pSrc = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str();
srcSize = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.length() + 1;
pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str();
srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1;
break;
case CL_KERNEL_NUM_ARGS:
srcSize = sizeof(cl_uint);
numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size();
numArgs = static_cast<cl_uint>(defaultKernelInfo.kernelArgInfo.size());
pSrc = &numArgs;
break;
@ -474,16 +476,16 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
break;
case CL_KERNEL_ATTRIBUTES:
pSrc = getDefaultKernelInfo().attributes.c_str();
srcSize = getDefaultKernelInfo().attributes.length() + 1;
pSrc = defaultKernelInfo.attributes.c_str();
srcSize = defaultKernelInfo.attributes.length() + 1;
break;
case CL_KERNEL_BINARY_PROGRAM_INTEL:
pSrc = getKernelHeap();
srcSize = getKernelHeapSize();
pSrc = getKernelHeap(defaultRootDeviceIndex);
srcSize = getKernelHeapSize(defaultRootDeviceIndex);
break;
case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL:
nonCannonizedGpuAddress = GmmHelper::decanonize(getDefaultKernelInfo().kernelAllocation->getGpuAddress());
nonCannonizedGpuAddress = GmmHelper::decanonize(defaultKernelInfo.kernelAllocation->getGpuAddress());
pSrc = &nonCannonizedGpuAddress;
srcSize = sizeof(nonCannonizedGpuAddress);
break;
@ -504,8 +506,9 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
cl_int retVal;
const void *pSrc = nullptr;
size_t srcSize = GetInfo::invalidSourceSize;
auto numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size();
const auto &argInfo = getDefaultKernelInfo().kernelArgInfo[argIndx];
auto &defaultKernelInfo = getDefaultKernelInfo();
auto numArgs = static_cast<cl_uint>(defaultKernelInfo.kernelArgInfo.size());
const auto &argInfo = defaultKernelInfo.kernelArgInfo[argIndx];
if (argIndx >= numArgs) {
retVal = CL_INVALID_ARG_INDEX;
@ -626,7 +629,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
pSrc = &privateMemSize;
break;
default:
getAdditionalWorkGroupInfo(paramName, pSrc, srcSize);
getAdditionalWorkGroupInfo(paramName, pSrc, srcSize, rootDeviceIndex);
break;
}
@ -742,16 +745,16 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
}
}
const void *Kernel::getKernelHeap() const {
return getDefaultKernelInfo().heapInfo.pKernelHeap;
const void *Kernel::getKernelHeap(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap;
}
size_t Kernel::getKernelHeapSize() const {
return getDefaultKernelInfo().heapInfo.KernelHeapSize;
size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
}
void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) {
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getDefaultKernelInfo());
void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
*pKernelHeap = newKernelHeap;
auto &heapInfo = pKernelInfo->heapInfo;
@ -771,16 +774,16 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
UNRECOVERABLE_IF(!status);
}
bool Kernel::isKernelHeapSubstituted() const {
return getDefaultKernelInfo().isKernelHeapSubstituted;
bool Kernel::isKernelHeapSubstituted(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).isKernelHeapSubstituted;
}
uint64_t Kernel::getKernelId() const {
return getDefaultKernelInfo().kernelId;
uint64_t Kernel::getKernelId(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).kernelId;
}
void Kernel::setKernelId(uint64_t newKernelId) {
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getDefaultKernelInfo());
void Kernel::setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId) {
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
pKernelInfo->kernelId = newKernelId;
}
uint32_t Kernel::getStartOffset() const {
@ -794,12 +797,12 @@ void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const {
return kernelInfos[rootDeviceIndex]->usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr;
}
size_t Kernel::getDynamicStateHeapSize() const {
return getDefaultKernelInfo().heapInfo.DynamicStateHeapSize;
size_t Kernel::getDynamicStateHeapSize(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).heapInfo.DynamicStateHeapSize;
}
const void *Kernel::getDynamicStateHeap() const {
return getDefaultKernelInfo().heapInfo.pDsh;
const void *Kernel::getDynamicStateHeap(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).heapInfo.pDsh;
}
size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const {
@ -823,8 +826,9 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
cl_int retVal = CL_SUCCESS;
bool updateExposedKernel = true;
auto argWasUncacheable = false;
if (getDefaultKernelInfo().builtinDispatchBuilder != nullptr) {
updateExposedKernel = getDefaultKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal);
auto &defaultKernelInfo = getDefaultKernelInfo();
if (defaultKernelInfo.builtinDispatchBuilder != nullptr) {
updateExposedKernel = defaultKernelInfo.builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal);
}
if (updateExposedKernel) {
if (argIndex >= kernelArgHandlers.size()) {
@ -882,13 +886,14 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap
cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
void *ptrToPatch = patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex);
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex);
setArgImmediate(argIndex, sizeof(void *), &svmPtr);
storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags);
if (requiresSshForBuffers()) {
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
if (requiresSshForBuffers(rootDeviceIndex)) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
}
@ -905,7 +910,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc);
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
@ -918,8 +924,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
patchWithRequiredSize(patchLocation, patchSize, reinterpret_cast<uintptr_t>(svmPtr));
if (requiresSshForBuffers()) {
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
if (requiresSshForBuffers(rootDeviceIndex)) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
size_t allocSize = 0;
size_t offset = 0;
@ -1049,7 +1055,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
return 0;
}
auto executionEnvironment = getDefaultKernelInfo().patchInfo.executionEnvironment;
auto executionEnvironment = getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
if (dssCount == 0) {
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
@ -1227,6 +1233,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
const void *argVal) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
auto crossThreadData = reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex));
auto &defaultKernelInfo = getDefaultKernelInfo();
storeKernelArg(argIndex, SLM_OBJ, nullptr, argVal, argSize);
@ -1234,7 +1241,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
// Extract our current slmOffset
auto slmOffset = *ptrOffset(crossThreadData,
getDefaultKernelInfo().kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
defaultKernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
// Add our size
slmOffset += static_cast<uint32_t>(argSize);
@ -1242,7 +1249,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
// Update all slm offsets after this argIndex
++argIndex;
while (argIndex < slmSizes.size()) {
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
const auto &kernelArgInfo = defaultKernelInfo.kernelArgInfo[argIndex];
auto slmAlignment = kernelArgInfo.slmAlignment;
// If an local argument, alignment should be non-zero
@ -1260,7 +1267,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
++argIndex;
}
slmTotalSize = getDefaultKernelInfo().workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
slmTotalSize = defaultKernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
return CL_SUCCESS;
}
@ -1272,9 +1279,9 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (argSize != sizeof(cl_mem *))
return CL_INVALID_ARG_SIZE;
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
auto clMem = reinterpret_cast<const cl_mem *>(argVal);
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex);
if (clMem && *clMem) {
@ -1323,7 +1330,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
forceNonAuxMode = true;
}
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, getDevice().getDevice());
}
@ -1348,7 +1355,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize);
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
}
@ -1365,7 +1372,6 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
return CL_INVALID_ARG_SIZE;
}
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
auto clMem = reinterpret_cast<const cl_mem *>(argVal);
if (clMem && *clMem) {
@ -1388,6 +1394,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
return CL_INVALID_MEM_OBJECT;
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex),
kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset);
@ -1397,7 +1404,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
auto graphicsAllocation = pipe->getGraphicsAllocation(rootDeviceIndex);
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState,
pipe->getSize(), pipe->getCpuAddress(), 0,
@ -1421,7 +1428,8 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
const void *argVal, uint32_t mipLevel) {
auto retVal = CL_INVALID_ARG_VALUE;
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex);
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex);
auto clMemObj = *(static_cast<const cl_mem *>(argVal));
auto pImage = castToObject<Image>(clMemObj);
@ -1430,7 +1438,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
if (pImage->peekSharingHandler()) {
usingSharedObjArgs = true;
}
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
DBG_LOG_INPUTS("setArgImage cl_mem", clMemObj);
@ -1441,7 +1449,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
// Sets SS structure
if (kernelArgInfo.isMediaImage) {
DEBUG_BREAK_IF(!getDefaultKernelInfo().isVmeWorkload);
DEBUG_BREAK_IF(!kernelInfo.isVmeWorkload);
pImage->setMediaImageArg(surfaceState, rootDeviceIndex);
} else {
pImage->setImageArg(surfaceState, kernelArgInfo.isMediaBlockImage, mipLevel, rootDeviceIndex);
@ -1487,7 +1495,7 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex,
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
if (argVal) {
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0);
storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize);
@ -1540,11 +1548,11 @@ cl_int Kernel::setArgSampler(uint32_t argIndex,
}
if (pSampler && argSize == sizeof(cl_sampler *)) {
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize);
auto dsh = getDynamicStateHeap();
auto dsh = getDynamicStateHeap(rootDeviceIndex);
auto samplerState = ptrOffset(dsh, kernelArgInfo.offsetHeap);
pSampler->setArg(const_cast<void *>(samplerState));
@ -1583,7 +1591,7 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex,
if (pAccelerator) {
storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize);
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) {
auto crossThreadData = getCrossThreadData(rootDeviceIndex);
@ -2267,7 +2275,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize,
static_cast<uintptr_t>(devQueue->getQueueBuffer()->getGpuAddressToPatch()));
}
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
@ -2289,7 +2297,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
static_cast<uintptr_t>(devQueue->getEventPoolBuffer()->getGpuAddressToPatch()));
}
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(),
@ -2311,8 +2319,8 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) {
}
}
bool Kernel::usesSyncBuffer() {
return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr);
bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) {
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
}
void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
@ -2322,7 +2330,7 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation,
patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize,
ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset));
if (requiresSshForBuffers()) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
@ -2339,12 +2347,13 @@ bool Kernel::isPatched() const {
cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex,
size_t argSize,
const void *argValue) const {
if (getDefaultKernelInfo().kernelArgInfo[argIndex].isImage) {
auto &defaultKernelInfo = getDefaultKernelInfo();
if (defaultKernelInfo.kernelArgInfo[argIndex].isImage) {
cl_mem mem = *(static_cast<const cl_mem *>(argValue));
MemObj *pMemObj = nullptr;
WithCastToInternal(mem, &pMemObj);
if (pMemObj) {
auto accessQualifier = getDefaultKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier;
auto accessQualifier = defaultKernelInfo.kernelArgInfo[argIndex].metadata.accessQualifier;
cl_mem_flags flags = pMemObj->getFlags();
if ((accessQualifier == KernelArgMetadata::AccessReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
(accessQualifier == KernelArgMetadata::AccessWriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) {
@ -2372,11 +2381,16 @@ void Kernel::resolveArgs() {
}
}
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernelInfos.size(); rootDeviceIndex++) {
auto pKernelInfo = kernelInfos[rootDeviceIndex];
if (!pKernelInfo) {
continue;
}
if (canTransformImageTo2dArray) {
imageTransformer->transformImagesTo2dArray(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
imageTransformer->transformImagesTo2dArray(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
} else if (imageTransformer->didTransform()) {
imageTransformer->transformImagesTo3d(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
imageTransformer->transformImagesTo3d(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
}
}
}
@ -2385,10 +2399,11 @@ bool Kernel::canTransformImages() const {
return renderCoreFamily >= IGFX_GEN9_CORE && renderCoreFamily <= IGFX_GEN11LP_CORE;
}
void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation) {
void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex) {
memObjsForAuxTranslation.reserve(getKernelArgsNumber());
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
if (BUFFER_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) {
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
auto buffer = castToObject<Buffer>(getKernelArg(i));
if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
memObjsForAuxTranslation.insert(buffer);
@ -2396,7 +2411,7 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
auto &context = this->program->getContext();
if (context.isProvidingPerformanceHints()) {
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), i, getDefaultKernelInfo().kernelArgInfo.at(i).metadataExtended->argName.c_str());
kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str());
}
}
}

View File

@ -141,7 +141,7 @@ class Kernel : public BaseObject<_cl_kernel> {
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize,
void *paramValue, size_t *paramValueSizeRet) const;
void getAdditionalInfo(cl_kernel_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const;
void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const;
void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet, uint32_t rootDeviceIndex) const;
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
@ -154,13 +154,13 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet) const;
const void *getKernelHeap() const;
const void *getKernelHeap(uint32_t rootDeviceIndex) const;
void *getSurfaceStateHeap(uint32_t rootDeviceIndex) const;
const void *getDynamicStateHeap() const;
const void *getDynamicStateHeap(uint32_t rootDeviceIndex) const;
size_t getKernelHeapSize() const;
size_t getKernelHeapSize(uint32_t rootDeviceIndex) const;
size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const;
size_t getDynamicStateHeapSize() const;
size_t getDynamicStateHeapSize(uint32_t rootDeviceIndex) const;
size_t getNumberOfBindingTableStates() const;
size_t getBindingTableOffset() const {
return localBindingTableOffset;
@ -168,10 +168,10 @@ class Kernel : public BaseObject<_cl_kernel> {
void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
bool isKernelHeapSubstituted() const;
uint64_t getKernelId() const;
void setKernelId(uint64_t newKernelId);
void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
uint64_t getKernelId(uint32_t rootDeviceIndex) const;
void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);
uint32_t getStartOffset() const;
void setStartOffset(uint32_t offset);
@ -183,8 +183,8 @@ class Kernel : public BaseObject<_cl_kernel> {
return getDefaultKernelInfo().kernelArgInfo.size();
}
bool requiresSshForBuffers() const {
return getDefaultKernelInfo().requiresSshForBuffers;
bool requiresSshForBuffers(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).requiresSshForBuffers;
}
const KernelInfo &getKernelInfo(uint32_t rootDeviceIndex) const {
@ -200,12 +200,12 @@ class Kernel : public BaseObject<_cl_kernel> {
Program *getProgram() const { return program; }
uint32_t getScratchSize() {
return getDefaultKernelInfo().patchInfo.mediavfestate ? getDefaultKernelInfo().patchInfo.mediavfestate->PerThreadScratchSpace : 0;
uint32_t getScratchSize(uint32_t rootDeviceIndex) {
return getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate ? getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate->PerThreadScratchSpace : 0;
}
uint32_t getPrivateScratchSize() {
return getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1 ? getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
uint32_t getPrivateScratchSize(uint32_t rootDeviceIndex) {
return getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1 ? getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
}
void createReflectionSurface();
@ -215,7 +215,7 @@ class Kernel : public BaseObject<_cl_kernel> {
void patchDefaultDeviceQueue(DeviceQueue *devQueue);
void patchEventPool(DeviceQueue *devQueue);
void patchBlocksSimdSize(uint32_t rootDeviceIndex);
bool usesSyncBuffer();
bool usesSyncBuffer(uint32_t rootDeviceIndex);
void patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset);
void patchBindlessSurfaceStateOffsets(const Device &device, const size_t sshOffset);
@ -352,26 +352,26 @@ class Kernel : public BaseObject<_cl_kernel> {
KernelExecutionType getExecutionType() const {
return executionType;
}
bool isUsingSyncBuffer() const {
return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr);
bool isUsingSyncBuffer(uint32_t rootDeviceIndex) const {
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
}
bool checkIfIsParentKernelAndBlocksUsesPrintf();
bool is32Bit() const {
return getDefaultKernelInfo().gpuPointerSize == 4;
bool is32Bit(uint32_t rootDeviceIndex) const {
return getKernelInfo(rootDeviceIndex).gpuPointerSize == 4;
}
int32_t getDebugSurfaceBti() const {
if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) {
return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->BTI;
int32_t getDebugSurfaceBti(uint32_t rootDeviceIndex) const {
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->BTI;
}
return -1;
}
size_t getPerThreadSystemThreadSurfaceSize() const {
if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) {
return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
size_t getPerThreadSystemThreadSurfaceSize(uint32_t rootDeviceIndex) const {
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
}
return 0;
}
@ -381,7 +381,7 @@ class Kernel : public BaseObject<_cl_kernel> {
return usingImagesOnly;
}
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation);
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const;

View File

@ -143,13 +143,12 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
this->numThreadsPerSubSlice = static_cast<uint32_t>(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) *
device.getSharedDeviceInfo().numThreadsPerEU;
this->localMemSize = static_cast<uint32_t>(device.getSharedDeviceInfo().localMemSize);
setIfUseImg(dispatchInfo.getKernel());
setIfUseImg(kernelInfo);
setMinWorkGroupSize();
}
void WorkSizeInfo::setIfUseImg(Kernel *pKernel) {
auto ParamsCount = pKernel->getKernelArgsNumber();
for (auto i = 0u; i < ParamsCount; i++) {
if (pKernel->getDefaultKernelInfo().kernelArgInfo[i].isImage) {
void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
for (auto i = 0u; i < kernelInfo.kernelArgInfo.size(); i++) {
if (kernelInfo.kernelArgInfo[i].isImage) {
imgUsed = true;
yTiledSurfaces = true;
}

View File

@ -84,7 +84,7 @@ struct WorkSizeInfo {
WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
WorkSizeInfo(const DispatchInfo &dispatchInfo);
void setIfUseImg(Kernel *pKernel);
void setIfUseImg(const KernelInfo &kernelInfo);
void setMinWorkGroupSize();
void checkRatio(const size_t workItems[3]);
};

View File

@ -61,7 +61,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset);
patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
@ -75,8 +75,9 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
}
void PrintfHandler::printEnqueueOutput() {
auto rootDeviceIndex = device.getRootDeviceIndex();
PrintFormatter printFormatter(reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer()), static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize()),
kernel->is32Bit(), kernel->getKernelInfo(device.getRootDeviceIndex()).patchInfo.stringDataMap);
kernel->is32Bit(rootDeviceIndex), kernel->getKernelInfo(rootDeviceIndex).patchInfo.stringDataMap);
printFormatter.printKernelOutput();
}
} // namespace NEO

View File

@ -34,8 +34,9 @@ class SchedulerKernel : public Kernel {
}
size_t getCurbeSize() {
size_t crossTrheadDataSize = getDefaultKernelInfo().patchInfo.dataParameterStream ? getDefaultKernelInfo().patchInfo.dataParameterStream->DataParameterStreamSize : 0;
size_t dshSize = getDefaultKernelInfo().heapInfo.DynamicStateHeapSize;
auto &defaultKernelInfo = getDefaultKernelInfo();
size_t crossTrheadDataSize = defaultKernelInfo.patchInfo.dataParameterStream ? defaultKernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0;
size_t dshSize = defaultKernelInfo.heapInfo.DynamicStateHeapSize;
crossTrheadDataSize = alignUp(crossTrheadDataSize, 64);
dshSize = alignUp(dshSize, 64);

View File

@ -137,7 +137,7 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenEx
SPatchAllocateSyncBuffer patchAllocateSyncBuffer;
pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer;
EXPECT_TRUE(pKernel->isUsingSyncBuffer());
EXPECT_TRUE(pKernel->isUsingSyncBuffer(testedRootDeviceIndex));
retVal = clEnqueueNDRangeKernel(
pCommandQueue,

View File

@ -156,7 +156,7 @@ TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin
mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
cl_ulong scratchSpaceSize = static_cast<cl_ulong>(mockKernel.mockKernel->getScratchSize());
cl_ulong scratchSpaceSize = static_cast<cl_ulong>(mockKernel.mockKernel->getScratchSize(testedRootDeviceIndex));
EXPECT_EQ(scratchSpaceSize, 1024u);
retVal = clGetKernelWorkGroupInfo(

View File

@ -109,8 +109,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, simple) {
EXPECT_EQ(0u, addrIDD % alignmentIDD);
// Check kernel start pointer matches hard-coded kernel.
auto pExpectedISA = pKernel->getKernelHeap();
auto expectedSize = pKernel->getKernelHeapSize();
auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex);
auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex);
auto pSBA = reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
ASSERT_NE(nullptr, pSBA);
@ -268,8 +268,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, simple) {
EXPECT_EQ(0u, addrIDD % alignmentIDD);
// Check kernel start pointer matches hard-coded kernel.
auto pExpectedISA = pKernel->getKernelHeap();
auto expectedSize = pKernel->getKernelHeapSize();
auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex);
auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex);
auto pSBA = reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
ASSERT_NE(nullptr, pSBA);

View File

@ -183,7 +183,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DTh
EXPECT_NE(dshBefore, pDSH->getUsed());
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -202,9 +202,9 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) {
auto kernel = multiDispatchInfo.begin()->getKernel();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}
@ -329,7 +329,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentZeroMatchesSourceAd
ASSERT_NE(nullptr, kernel);
// Determine where the argument is
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
EXPECT_EQ(reinterpret_cast<void *>(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
}
@ -357,7 +357,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentOneMatchesDestinati
ASSERT_NE(nullptr, kernel);
// Determine where the argument is
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
EXPECT_EQ(reinterpret_cast<void *>(dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
}

View File

@ -85,7 +85,7 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenIndirectDataG
auto sshBefore = pSSH->getUsed();
EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -85,7 +85,7 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenIndirectDataGetsAdded) {
auto sshBefore = pSSH->getUsed();
EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -84,7 +84,7 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenIndirectDataG
auto sshBefore = pSSH->getUsed();
enqueueCopyImageToBuffer<FamilyType>();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -115,9 +115,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded)
auto kernel = multiDispatchInfo.begin()->getKernel();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
@ -302,7 +302,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentZeroShouldMatch
ASSERT_NE(nullptr, kernel);
// Determine where the argument is
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
EXPECT_EQ((void *)((uintptr_t)buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
@ -335,7 +335,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
ASSERT_NE(nullptr, kernel);
// Determine where the argument is
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 2u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 2u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
EXPECT_NE(nullptr, *pArgument);
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);

View File

@ -92,7 +92,7 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenIndirectDataGetsAdded) {
auto sshBefore = pSSH->getUsed();
EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -415,9 +415,9 @@ HWTEST_F(EnqueueKernelTest, addsIndirectData) {
auto sshBefore = pSSH->getUsed();
callOneWorkItemNDRKernel();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
if (pKernel->requiresSshForBuffers(rootDeviceIndex) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -196,7 +196,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndi
EXPECT_NE(dshBefore, pDSH->getUsed());
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -156,9 +156,9 @@ HWTEST_F(EnqueueReadBufferTypeTest, addsIndirectData) {
auto kernel = multiDispatchInfo.begin()->getKernel();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -95,7 +95,7 @@ HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenIndirectDataIsAdded) {
auto sshBefore = pSSH->getUsed();
EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -170,7 +170,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInd
EXPECT_NE(dshBefore, pDSH->getUsed());
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -154,9 +154,9 @@ HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) {
auto kernel = multiDispatchInfo.begin()->getKernel();
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
if (kernel->requiresSshForBuffers()) {
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
EXPECT_NE(sshBefore, pSSH->getUsed());
}
}

View File

@ -97,7 +97,7 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenIndirectDataIsAdded) {
auto sshBefore = pSSH->getUsed();
EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
EXPECT_NE(iohBefore, pIOH->getUsed());
EXPECT_NE(sshBefore, pSSH->getUsed());
}

View File

@ -452,7 +452,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
testing::internal::CaptureStdout();
MemObjsForAuxTranslation memObjects;
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects);
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects, rootDeviceIndex);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION],
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str());

View File

@ -603,9 +603,9 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
alignedFree(ssh->getCpuBase());

View File

@ -170,7 +170,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
pKernel->getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
UnitTestHelper<FamilyType>::getDefaultSshUsage();
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
if ((pKernel->requiresSshForBuffers(rootDeviceIndex)) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
EXPECT_EQ(expectedSizeSSH, sshUsed);
}

View File

@ -763,7 +763,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa
Kernel *pKernel1 = (Kernel *)kernel1;
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
constexpr size_t n = 256;
@ -797,7 +797,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa
Kernel *pKernel2 = (Kernel *)kernel2;
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
@ -911,7 +911,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
Kernel *pKernel1 = (Kernel *)kernel1;
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
cl_uint workDim = 1;
@ -951,7 +951,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
Kernel *pKernel2 = (Kernel *)kernel2;
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
@ -1267,7 +1267,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG
Kernel *pKernel = (Kernel *)kernel;
const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId = pKernel->getKernelId();
uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId);
constexpr size_t n = 256;
@ -1380,7 +1380,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs
Kernel *pKernel = (Kernel *)kernel;
const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId = pKernel->getKernelId();
uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId);
constexpr size_t n = 256;
@ -1504,7 +1504,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice
Kernel *pKernel1 = (Kernel *)kernel1;
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
constexpr size_t n = 256;
@ -1542,7 +1542,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice
Kernel *pKernel2 = (Kernel *)kernel2;
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
@ -2325,18 +2325,18 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) {
Kernel *pKernel = castToObject<Kernel>(kernel);
ASSERT_NE(nullptr, pKernel);
bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted();
bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex);
EXPECT_FALSE(isKernelCodeSubstituted);
// Substitute new kernel code
constexpr size_t newCodeSize = 64;
uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4};
pKernel->substituteKernelHeap(&newCode[0], newCodeSize);
pKernel->substituteKernelHeap(rootDeviceIndex, &newCode[0], newCodeSize);
// Verify that substitution went properly
isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted();
isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex);
EXPECT_TRUE(isKernelCodeSubstituted);
uint8_t *pBin2 = reinterpret_cast<uint8_t *>(const_cast<void *>(pKernel->getKernelHeap()));
uint8_t *pBin2 = reinterpret_cast<uint8_t *>(const_cast<void *>(pKernel->getKernelHeap(rootDeviceIndex)));
EXPECT_EQ(pBin2, &newCode[0]);
auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getUnderlyingBuffer();

View File

@ -21,7 +21,7 @@ template <typename GfxFamily>
struct UnitTestHelper {
static bool isL3ConfigProgrammable();
static bool evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel);
static bool evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel, uint32_t rootDeviceIndex);
static bool isPageTableManagerSupported(const HardwareInfo &hwInfo);

View File

@ -13,7 +13,7 @@ bool UnitTestHelper<GfxFamily>::isL3ConfigProgrammable() {
};
template <typename GfxFamily>
bool UnitTestHelper<GfxFamily>::evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel) {
bool UnitTestHelper<GfxFamily>::evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel, uint32_t rootDeviceIndex) {
if (sizeBeforeEnqueue != sizeAfterEnqueue) {
return true;
}

View File

@ -22,7 +22,7 @@ TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetDebugSurfaceBtiIsCal
program.enableKernelDebug();
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockDebugKernel>(device->getDevice(), &program));
EXPECT_EQ(0, kernel->getDebugSurfaceBti());
EXPECT_EQ(0, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
}
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) {
@ -31,7 +31,7 @@ TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThrea
program.enableKernelDebug();
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(device->getDevice(), &program));
EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize());
EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex()));
}
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenQueryingIsKernelDebugEnabledThenTrueIsReturned) {
@ -60,7 +60,7 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetDebugSurfaceBtiIsCalledT
program.enableKernelDebug();
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(device->getDevice(), &program));
EXPECT_EQ(-1, kernel->getDebugSurfaceBti());
EXPECT_EQ(-1, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
}
TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) {
@ -69,5 +69,5 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSur
program.enableKernelDebug();
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(device->getDevice(), &program));
EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize());
EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex()));
}

View File

@ -97,8 +97,8 @@ TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) {
}
TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) {
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap());
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize());
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap(rootDeviceIndex));
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize(rootDeviceIndex));
}
TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) {
@ -167,7 +167,7 @@ TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinary
size_t paramValueSize = 0;
char *paramValue = nullptr;
size_t paramValueSizeRet = 0;
const char *pKernelData = reinterpret_cast<const char *>(pKernel->getKernelHeap());
const char *pKernelData = reinterpret_cast<const char *>(pKernel->getKernelHeap(rootDeviceIndex));
EXPECT_NE(nullptr, pKernelData);
// get size of kernel binary
@ -2670,7 +2670,7 @@ TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) {
MockProgram program(&context, false, toClDeviceVector(*device));
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex)));
EXPECT_TRUE(kernel->is32Bit());
EXPECT_TRUE(kernel->is32Bit(rootDeviceIndex));
}
TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
@ -2683,7 +2683,7 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
MockProgram program(&context, false, toClDeviceVector(*device));
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex)));
EXPECT_FALSE(kernel->is32Bit());
EXPECT_FALSE(kernel->is32Bit(rootDeviceIndex));
}
TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) {
@ -2978,7 +2978,7 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1;
EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize());
EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
}
TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) {
@ -2987,7 +2987,7 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc
MockKernelWithInternals mockKernel(*device);
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr;
EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize());
EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
}
TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) {

View File

@ -21,7 +21,7 @@ using namespace NEO;
class KernelTransformableTest : public ::testing::Test {
public:
void SetUp() override {
rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex();
context = std::make_unique<MockContext>(deviceFactory.rootDevices[rootDeviceIndex]);
pKernelInfo = std::make_unique<KernelInfo>();
KernelArgPatchInfo kernelArgPatchInfo;
@ -45,7 +45,7 @@ class KernelTransformableTest : public ::testing::Test {
pKernelInfo->kernelArgInfo[3].isImage = true;
pKernelInfo->argumentsToPatchNum = 4;
program = std::make_unique<MockProgram>(toClDeviceVector(*context.getDevice(0)));
program = std::make_unique<MockProgram>(context.get(), false, toClDeviceVector(*context->getDevice(0)));
pKernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
@ -66,7 +66,8 @@ class KernelTransformableTest : public ::testing::Test {
const int secondImageOffset = 0x40;
cl_int retVal = CL_SUCCESS;
MockContext context;
UltClDeviceFactory deviceFactory{2, 0};
std::unique_ptr<MockContext> context;
std::unique_ptr<MockProgram> program;
std::unique_ptr<Sampler> sampler;
std::unique_ptr<KernelInfo> pKernelInfo;
@ -75,14 +76,14 @@ class KernelTransformableTest : public ::testing::Test {
std::unique_ptr<Image> image;
SKernelBinaryHeaderCommon kernelHeader;
char surfaceStateHeap[0x80];
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
const uint32_t rootDeviceIndex = 1;
};
HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -110,7 +111,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -137,7 +138,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -169,7 +170,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTran
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -196,7 +197,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamp
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image2dHelper<>::create(&context));
image.reset(Image2dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -223,7 +224,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -255,7 +256,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenRes
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
sampler.reset(createNonTransformableSampler());
cl_mem clImage = image.get();
cl_sampler clSampler = sampler.get();
@ -287,7 +288,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImag
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
image.reset(Image3dHelper<>::create(&context));
image.reset(Image3dHelper<>::create(context.get()));
cl_mem clImage = image.get();
pKernelInfo->kernelArgInfo[0].isSampler = false;

View File

@ -34,7 +34,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT
const size_t newHeapSize = initialHeapSize + 1;
char newHeap[newHeapSize];
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
EXPECT_NE(nullptr, secondAllocation);
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@ -64,7 +64,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen
const size_t newHeapSize = initialHeapSize;
char newHeap[newHeapSize];
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
EXPECT_NE(nullptr, secondAllocation);
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@ -93,7 +93,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT
const size_t newHeapSize = initialHeapSize - 1;
char newHeap[newHeapSize];
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
EXPECT_NE(nullptr, secondAllocation);
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@ -125,7 +125,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe
EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());

View File

@ -122,7 +122,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre
ASSERT_EQ(CL_SUCCESS, retVal);
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
ptrOffset(pKernel->getDynamicStateHeap(),
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ(static_cast<cl_bool>(CL_TRUE), static_cast<cl_bool>(!samplerState->getNonNormalizedCoordinateEnable()));
EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcxAddressControlMode());
@ -426,7 +426,7 @@ HWTEST_P(NormalizedTest, WhenSettingKernelArgSamplerThenCoordsAreCorrect) {
ASSERT_EQ(CL_SUCCESS, retVal);
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
ptrOffset(pKernel->getDynamicStateHeap(),
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ(normalizedCoordinates, static_cast<cl_bool>(!samplerState->getNonNormalizedCoordinateEnable()));
@ -477,7 +477,7 @@ HWTEST_P(AddressingModeTest, WhenSettingKernelArgSamplerThenModesAreCorrect) {
ASSERT_EQ(CL_SUCCESS, retVal);
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
ptrOffset(pKernel->getDynamicStateHeap(),
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
auto expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR;
@ -557,7 +557,7 @@ HWTEST_F(SamplerSetArgTest, GivenMipmapsWhenSettingKernelArgSamplerThenLodAreCor
ASSERT_EQ(CL_SUCCESS, retVal);
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
ptrOffset(pKernel->getDynamicStateHeap(),
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ(FamilyType::SAMPLER_STATE::MIP_MODE_FILTER_LINEAR, samplerState->getMipModeFilter());
@ -588,7 +588,7 @@ HWTEST_P(FilterModeTest, WhenSettingKernelArgSamplerThenFiltersAreCorrect) {
retVal);
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
ptrOffset(pKernel->getDynamicStateHeap(),
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
sampler->setArg(const_cast<SAMPLER_STATE *>(samplerState));

View File

@ -131,7 +131,7 @@ struct HardwareParse {
}
template <typename FamilyType>
const void *getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
const void *getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
template <typename CmdType>
CmdType *getCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {

View File

@ -94,7 +94,7 @@ void HardwareParse::findHardwareCommands() {
}
template <typename FamilyType>
const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh) {
const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex) {
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
@ -115,7 +115,7 @@ const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uin
offsetCrossThreadData);
// Determine where the argument is
auto &patchInfo = kernel.getDefaultKernelInfo().patchInfo;
auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo;
for (auto &arg : patchInfo.statelessGlobalMemObjKernelArgs) {
if (arg->ArgumentNumber == indexArg) {
return ptrOffset(pCrossThreadData, arg->DataParamOffset);

View File

@ -85,5 +85,5 @@ template struct CmdParse<GenGfxFamily>;
namespace NEO {
template void HardwareParse::findHardwareCommands<ICLFamily>();
template void HardwareParse::findHardwareCommands<ICLFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<ICLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
template const void *HardwareParse::getStatelessArgumentPointer<ICLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
} // namespace NEO

View File

@ -99,5 +99,5 @@ template struct CmdParse<GenGfxFamily>;
namespace NEO {
template void HardwareParse::findHardwareCommands<TGLLPFamily>();
template void HardwareParse::findHardwareCommands<TGLLPFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<TGLLPFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
template const void *HardwareParse::getStatelessArgumentPointer<TGLLPFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
} // namespace NEO

View File

@ -67,5 +67,5 @@ template struct CmdParse<GenGfxFamily>;
namespace NEO {
template void HardwareParse::findHardwareCommands<BDWFamily>();
template void HardwareParse::findHardwareCommands<BDWFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<BDWFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
template const void *HardwareParse::getStatelessArgumentPointer<BDWFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
} // namespace NEO

View File

@ -85,5 +85,5 @@ template struct CmdParse<GenGfxFamily>;
namespace NEO {
template void HardwareParse::findHardwareCommands<SKLFamily>();
template void HardwareParse::findHardwareCommands<SKLFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<SKLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
template const void *HardwareParse::getStatelessArgumentPointer<SKLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
} // namespace NEO