Pass root device index to get proper kernel info
Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
d3f3730989
commit
350ec9f16b
|
@ -3410,7 +3410,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
|
|||
}
|
||||
|
||||
if ((pKernel->getExecutionType() != KernelExecutionType::Default) ||
|
||||
pKernel->isUsingSyncBuffer()) {
|
||||
pKernel->isUsingSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
|
||||
return retVal;
|
||||
|
@ -5818,7 +5818,9 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
|||
return retVal;
|
||||
}
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevices()[0]->getHardwareInfo();
|
||||
auto &device = pCommandQueue->getClDevice();
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto &hardwareInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isCooperativeDispatchSupported(pCommandQueue->getGpgpuEngine().getEngineType(), hardwareInfo.platform.eProductFamily)) {
|
||||
retVal = CL_INVALID_COMMAND_QUEUE;
|
||||
|
@ -5842,13 +5844,13 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
|||
}
|
||||
}
|
||||
|
||||
if (pKernel->isUsingSyncBuffer()) {
|
||||
if (pKernel->isUsingSyncBuffer(rootDeviceIndex)) {
|
||||
if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
pCommandQueue->getDevice().getSpecializedDevice<ClDevice>()->allocateSyncBufferHandler();
|
||||
device.allocateSyncBufferHandler();
|
||||
}
|
||||
|
||||
if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_KERNEL_INTEL, eventWaitList, event)) {
|
||||
|
|
|
@ -529,7 +529,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
|||
auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
|
||||
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex));
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset);
|
||||
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
|
||||
|
|
|
@ -64,16 +64,16 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
|||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
} else {
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
||||
builtInLock.takeOwnership(builder);
|
||||
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation);
|
||||
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation, rootDeviceIndex);
|
||||
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
|
||||
if (!memObjsForAuxTranslation.empty()) {
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
}
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
|
@ -373,7 +373,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
|||
printfHandler->prepareDispatch(multiDispatchInfo);
|
||||
}
|
||||
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(device->getRootDeviceIndex())) {
|
||||
auto &gws = multiDispatchInfo.begin()->getGWS();
|
||||
auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize();
|
||||
size_t workGroupsCount = (gws.x * gws.y * gws.z) /
|
||||
|
@ -649,7 +649,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
printfHandler->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(rootDeviceIndex)) {
|
||||
device->syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
|
@ -674,7 +675,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
auto specialPipelineSelectMode = false;
|
||||
Kernel *kernel = nullptr;
|
||||
bool usePerDssBackedBuffer = false;
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
|
|
|
@ -412,7 +412,7 @@ SchedulerKernel &Context::getSchedulerKernel() {
|
|||
kernelInfos,
|
||||
&retVal);
|
||||
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize(clDevice->getRootDeviceIndex()) != 0);
|
||||
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
};
|
||||
|
|
|
@ -151,13 +151,13 @@ template <typename GfxFamily>
|
|||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
void *pDSH = dynamicStateHeap.getCpuBase();
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2);
|
||||
|
||||
// Parent's dsh is located after ColorCalcState and 2 ID tables
|
||||
igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp((uint32_t)parentKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp(static_cast<uint32_t>(parentKernel->getDynamicStateHeapSize(rootDeviceIndex)), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize();
|
||||
|
||||
igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart;
|
||||
|
|
|
@ -73,7 +73,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
|||
// Kernel with no SSH or Kernel EM, not supported
|
||||
return;
|
||||
}
|
||||
if (pKernel->isKernelHeapSubstituted()) {
|
||||
if (pKernel->isKernelHeapSubstituted(rootDeviceIndex)) {
|
||||
// ISA for this kernel was already substituted
|
||||
return;
|
||||
}
|
||||
|
@ -85,8 +85,8 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
|||
|
||||
paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS;
|
||||
paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize();
|
||||
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap();
|
||||
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize());
|
||||
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(rootDeviceIndex);
|
||||
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize(rootDeviceIndex));
|
||||
paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL;
|
||||
paramsIn.buffer_desc.BTI = static_cast<uint32_t>(gtpinBTI);
|
||||
paramsIn.igc_hash_id = kernelInfo.shaderHashCode;
|
||||
|
@ -97,8 +97,8 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
|||
instrument_params_out_t paramsOut = {0};
|
||||
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut);
|
||||
// Substitute ISA of created kernel with instrumented code
|
||||
pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
||||
pKernel->setKernelId(paramsOut.kernel_id);
|
||||
pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
||||
pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -106,14 +106,15 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
|||
if (isGTPinInitialized) {
|
||||
auto pCmdQ = reinterpret_cast<CommandQueue *>(pCmdQueue);
|
||||
auto &device = pCmdQ->getDevice();
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(device.getRootDeviceIndex()) == 0) {
|
||||
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(rootDeviceIndex) == 0) {
|
||||
// Kernel with no SSH, not supported
|
||||
return;
|
||||
}
|
||||
Context *pContext = &(pKernel->getContext());
|
||||
cl_context context = (cl_context)pContext;
|
||||
uint64_t kernelId = pKernel->getKernelId();
|
||||
uint64_t kernelId = pKernel->getKernelId(rootDeviceIndex);
|
||||
command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++));
|
||||
uint32_t kernelOffset = 0;
|
||||
resource_handle_t resource = 0;
|
||||
|
@ -138,7 +139,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
|||
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
|
||||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, device.getRootDeviceIndex());
|
||||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex);
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);
|
||||
|
|
|
@ -19,11 +19,11 @@ bool DispatchInfo::usesStatelessPrintfSurface() const {
|
|||
}
|
||||
|
||||
uint32_t DispatchInfo::getRequiredScratchSize() const {
|
||||
return (kernel == nullptr) ? 0 : kernel->getScratchSize();
|
||||
return (kernel == nullptr) ? 0 : kernel->getScratchSize(pClDevice->getRootDeviceIndex());
|
||||
}
|
||||
|
||||
uint32_t DispatchInfo::getRequiredPrivateScratchSize() const {
|
||||
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize();
|
||||
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(pClDevice->getRootDeviceIndex());
|
||||
}
|
||||
|
||||
Kernel *MultiDispatchInfo::peekMainKernel() const {
|
||||
|
|
|
@ -52,7 +52,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(
|
|||
|
||||
totalSize += borderColorSize + additionalSizeRequiredDsh();
|
||||
|
||||
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload));
|
||||
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize(rootDeviceIndex) || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload));
|
||||
|
||||
return alignUp(totalSize, EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
|
||||
}
|
||||
|
@ -245,7 +245,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||
uint32_t samplerCount = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
samplerCount = patchInfo.samplerStateArray->Count;
|
||||
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset, samplerCount, patchInfo.samplerStateArray->BorderColorOffset, kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper());
|
||||
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset,
|
||||
samplerCount, patchInfo.samplerStateArray->BorderColorOffset,
|
||||
kernel.getDynamicStateHeap(rootDeviceIndex), device.getBindlessHeapsHelper());
|
||||
}
|
||||
|
||||
auto threadPayload = kernelInfo.patchInfo.threadPayload;
|
||||
|
|
|
@ -11,6 +11,6 @@ namespace NEO {
|
|||
void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const {
|
||||
}
|
||||
|
||||
void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const {
|
||||
void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet, uint32_t rootDeviceIndex) const {
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -290,7 +290,7 @@ cl_int Kernel::initialize() {
|
|||
}
|
||||
|
||||
if (patchInfo.pAllocateStatelessEventPoolSurface) {
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
|
||||
|
@ -299,7 +299,7 @@ cl_int Kernel::initialize() {
|
|||
|
||||
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
|
||||
|
@ -442,16 +442,18 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
|||
const _cl_context *ctxt;
|
||||
cl_uint refCount = 0;
|
||||
uint64_t nonCannonizedGpuAddress = 0llu;
|
||||
auto defaultRootDeviceIndex = getDevices()[0]->getRootDeviceIndex();
|
||||
auto &defaultKernelInfo = getKernelInfo(defaultRootDeviceIndex);
|
||||
|
||||
switch (paramName) {
|
||||
case CL_KERNEL_FUNCTION_NAME:
|
||||
pSrc = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str();
|
||||
srcSize = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.length() + 1;
|
||||
pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str();
|
||||
srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_NUM_ARGS:
|
||||
srcSize = sizeof(cl_uint);
|
||||
numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size();
|
||||
numArgs = static_cast<cl_uint>(defaultKernelInfo.kernelArgInfo.size());
|
||||
pSrc = &numArgs;
|
||||
break;
|
||||
|
||||
|
@ -474,16 +476,16 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
|||
break;
|
||||
|
||||
case CL_KERNEL_ATTRIBUTES:
|
||||
pSrc = getDefaultKernelInfo().attributes.c_str();
|
||||
srcSize = getDefaultKernelInfo().attributes.length() + 1;
|
||||
pSrc = defaultKernelInfo.attributes.c_str();
|
||||
srcSize = defaultKernelInfo.attributes.length() + 1;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_BINARY_PROGRAM_INTEL:
|
||||
pSrc = getKernelHeap();
|
||||
srcSize = getKernelHeapSize();
|
||||
pSrc = getKernelHeap(defaultRootDeviceIndex);
|
||||
srcSize = getKernelHeapSize(defaultRootDeviceIndex);
|
||||
break;
|
||||
case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL:
|
||||
nonCannonizedGpuAddress = GmmHelper::decanonize(getDefaultKernelInfo().kernelAllocation->getGpuAddress());
|
||||
nonCannonizedGpuAddress = GmmHelper::decanonize(defaultKernelInfo.kernelAllocation->getGpuAddress());
|
||||
pSrc = &nonCannonizedGpuAddress;
|
||||
srcSize = sizeof(nonCannonizedGpuAddress);
|
||||
break;
|
||||
|
@ -504,8 +506,9 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
|
|||
cl_int retVal;
|
||||
const void *pSrc = nullptr;
|
||||
size_t srcSize = GetInfo::invalidSourceSize;
|
||||
auto numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size();
|
||||
const auto &argInfo = getDefaultKernelInfo().kernelArgInfo[argIndx];
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
auto numArgs = static_cast<cl_uint>(defaultKernelInfo.kernelArgInfo.size());
|
||||
const auto &argInfo = defaultKernelInfo.kernelArgInfo[argIndx];
|
||||
|
||||
if (argIndx >= numArgs) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
|
@ -626,7 +629,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
|||
pSrc = &privateMemSize;
|
||||
break;
|
||||
default:
|
||||
getAdditionalWorkGroupInfo(paramName, pSrc, srcSize);
|
||||
getAdditionalWorkGroupInfo(paramName, pSrc, srcSize, rootDeviceIndex);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -742,16 +745,16 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
|
|||
}
|
||||
}
|
||||
|
||||
const void *Kernel::getKernelHeap() const {
|
||||
return getDefaultKernelInfo().heapInfo.pKernelHeap;
|
||||
const void *Kernel::getKernelHeap(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap;
|
||||
}
|
||||
|
||||
size_t Kernel::getKernelHeapSize() const {
|
||||
return getDefaultKernelInfo().heapInfo.KernelHeapSize;
|
||||
size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
|
||||
}
|
||||
|
||||
void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) {
|
||||
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getDefaultKernelInfo());
|
||||
void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
|
||||
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
|
||||
void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
|
||||
*pKernelHeap = newKernelHeap;
|
||||
auto &heapInfo = pKernelInfo->heapInfo;
|
||||
|
@ -771,16 +774,16 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
|
|||
UNRECOVERABLE_IF(!status);
|
||||
}
|
||||
|
||||
bool Kernel::isKernelHeapSubstituted() const {
|
||||
return getDefaultKernelInfo().isKernelHeapSubstituted;
|
||||
bool Kernel::isKernelHeapSubstituted(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).isKernelHeapSubstituted;
|
||||
}
|
||||
|
||||
uint64_t Kernel::getKernelId() const {
|
||||
return getDefaultKernelInfo().kernelId;
|
||||
uint64_t Kernel::getKernelId(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).kernelId;
|
||||
}
|
||||
|
||||
void Kernel::setKernelId(uint64_t newKernelId) {
|
||||
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getDefaultKernelInfo());
|
||||
void Kernel::setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId) {
|
||||
KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
|
||||
pKernelInfo->kernelId = newKernelId;
|
||||
}
|
||||
uint32_t Kernel::getStartOffset() const {
|
||||
|
@ -794,12 +797,12 @@ void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const {
|
|||
return kernelInfos[rootDeviceIndex]->usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr;
|
||||
}
|
||||
|
||||
size_t Kernel::getDynamicStateHeapSize() const {
|
||||
return getDefaultKernelInfo().heapInfo.DynamicStateHeapSize;
|
||||
size_t Kernel::getDynamicStateHeapSize(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).heapInfo.DynamicStateHeapSize;
|
||||
}
|
||||
|
||||
const void *Kernel::getDynamicStateHeap() const {
|
||||
return getDefaultKernelInfo().heapInfo.pDsh;
|
||||
const void *Kernel::getDynamicStateHeap(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).heapInfo.pDsh;
|
||||
}
|
||||
|
||||
size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const {
|
||||
|
@ -823,8 +826,9 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
|
|||
cl_int retVal = CL_SUCCESS;
|
||||
bool updateExposedKernel = true;
|
||||
auto argWasUncacheable = false;
|
||||
if (getDefaultKernelInfo().builtinDispatchBuilder != nullptr) {
|
||||
updateExposedKernel = getDefaultKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal);
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
if (defaultKernelInfo.builtinDispatchBuilder != nullptr) {
|
||||
updateExposedKernel = defaultKernelInfo.builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal);
|
||||
}
|
||||
if (updateExposedKernel) {
|
||||
if (argIndex >= kernelArgHandlers.size()) {
|
||||
|
@ -882,13 +886,14 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap
|
|||
|
||||
cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) {
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
void *ptrToPatch = patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex);
|
||||
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
|
||||
void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex);
|
||||
setArgImmediate(argIndex, sizeof(void *), &svmPtr);
|
||||
|
||||
storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags);
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
|
||||
}
|
||||
|
@ -905,7 +910,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
|||
DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc);
|
||||
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
|
||||
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
|
||||
|
||||
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
|
||||
|
||||
|
@ -918,8 +924,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
|||
|
||||
patchWithRequiredSize(patchLocation, patchSize, reinterpret_cast<uintptr_t>(svmPtr));
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
size_t allocSize = 0;
|
||||
size_t offset = 0;
|
||||
|
@ -1049,7 +1055,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
|||
return 0;
|
||||
}
|
||||
|
||||
auto executionEnvironment = getDefaultKernelInfo().patchInfo.executionEnvironment;
|
||||
auto executionEnvironment = getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
|
||||
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
|
@ -1227,6 +1233,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
|||
const void *argVal) {
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
auto crossThreadData = reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex));
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
|
||||
storeKernelArg(argIndex, SLM_OBJ, nullptr, argVal, argSize);
|
||||
|
||||
|
@ -1234,7 +1241,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
|||
|
||||
// Extract our current slmOffset
|
||||
auto slmOffset = *ptrOffset(crossThreadData,
|
||||
getDefaultKernelInfo().kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
defaultKernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
|
||||
// Add our size
|
||||
slmOffset += static_cast<uint32_t>(argSize);
|
||||
|
@ -1242,7 +1249,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
|||
// Update all slm offsets after this argIndex
|
||||
++argIndex;
|
||||
while (argIndex < slmSizes.size()) {
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
const auto &kernelArgInfo = defaultKernelInfo.kernelArgInfo[argIndex];
|
||||
auto slmAlignment = kernelArgInfo.slmAlignment;
|
||||
|
||||
// If an local argument, alignment should be non-zero
|
||||
|
@ -1260,7 +1267,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
|||
++argIndex;
|
||||
}
|
||||
|
||||
slmTotalSize = getDefaultKernelInfo().workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
|
||||
slmTotalSize = defaultKernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
@ -1272,9 +1279,9 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
if (argSize != sizeof(cl_mem *))
|
||||
return CL_INVALID_ARG_SIZE;
|
||||
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
auto clMem = reinterpret_cast<const cl_mem *>(argVal);
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
|
||||
patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex);
|
||||
|
||||
if (clMem && *clMem) {
|
||||
|
@ -1323,7 +1330,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
forceNonAuxMode = true;
|
||||
}
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, getDevice().getDevice());
|
||||
}
|
||||
|
@ -1348,7 +1355,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
|
||||
storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize);
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
|
||||
}
|
||||
|
@ -1365,7 +1372,6 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
|
|||
return CL_INVALID_ARG_SIZE;
|
||||
}
|
||||
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
auto clMem = reinterpret_cast<const cl_mem *>(argVal);
|
||||
|
||||
if (clMem && *clMem) {
|
||||
|
@ -1388,6 +1394,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
|
|||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex),
|
||||
kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
|
||||
|
@ -1397,7 +1404,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
|
|||
|
||||
auto graphicsAllocation = pipe->getGraphicsAllocation(rootDeviceIndex);
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState,
|
||||
pipe->getSize(), pipe->getCpuAddress(), 0,
|
||||
|
@ -1421,7 +1428,8 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
|
|||
const void *argVal, uint32_t mipLevel) {
|
||||
auto retVal = CL_INVALID_ARG_VALUE;
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex);
|
||||
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
|
||||
patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex);
|
||||
|
||||
auto clMemObj = *(static_cast<const cl_mem *>(argVal));
|
||||
auto pImage = castToObject<Image>(clMemObj);
|
||||
|
@ -1430,7 +1438,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
|
|||
if (pImage->peekSharingHandler()) {
|
||||
usingSharedObjArgs = true;
|
||||
}
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
|
||||
|
||||
DBG_LOG_INPUTS("setArgImage cl_mem", clMemObj);
|
||||
|
||||
|
@ -1441,7 +1449,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
|
|||
|
||||
// Sets SS structure
|
||||
if (kernelArgInfo.isMediaImage) {
|
||||
DEBUG_BREAK_IF(!getDefaultKernelInfo().isVmeWorkload);
|
||||
DEBUG_BREAK_IF(!kernelInfo.isVmeWorkload);
|
||||
pImage->setMediaImageArg(surfaceState, rootDeviceIndex);
|
||||
} else {
|
||||
pImage->setImageArg(surfaceState, kernelArgInfo.isMediaBlockImage, mipLevel, rootDeviceIndex);
|
||||
|
@ -1487,7 +1495,7 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex,
|
|||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
|
||||
if (argVal) {
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
|
||||
DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0);
|
||||
|
||||
storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize);
|
||||
|
@ -1540,11 +1548,11 @@ cl_int Kernel::setArgSampler(uint32_t argIndex,
|
|||
}
|
||||
|
||||
if (pSampler && argSize == sizeof(cl_sampler *)) {
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
|
||||
|
||||
storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize);
|
||||
|
||||
auto dsh = getDynamicStateHeap();
|
||||
auto dsh = getDynamicStateHeap(rootDeviceIndex);
|
||||
auto samplerState = ptrOffset(dsh, kernelArgInfo.offsetHeap);
|
||||
|
||||
pSampler->setArg(const_cast<void *>(samplerState));
|
||||
|
@ -1583,7 +1591,7 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex,
|
|||
if (pAccelerator) {
|
||||
storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize);
|
||||
|
||||
const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex];
|
||||
const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex];
|
||||
|
||||
if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) {
|
||||
auto crossThreadData = getCrossThreadData(rootDeviceIndex);
|
||||
|
@ -2267,7 +2275,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
|
|||
patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize,
|
||||
static_cast<uintptr_t>(devQueue->getQueueBuffer()->getGpuAddressToPatch()));
|
||||
}
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
|
||||
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
|
||||
|
@ -2289,7 +2297,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
|
|||
static_cast<uintptr_t>(devQueue->getEventPoolBuffer()->getGpuAddressToPatch()));
|
||||
}
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
|
||||
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(),
|
||||
|
@ -2311,8 +2319,8 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) {
|
|||
}
|
||||
}
|
||||
|
||||
bool Kernel::usesSyncBuffer() {
|
||||
return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) {
|
||||
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
}
|
||||
|
||||
void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
||||
|
@ -2322,7 +2330,7 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation,
|
|||
patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize,
|
||||
ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset));
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
|
||||
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
|
||||
|
@ -2339,12 +2347,13 @@ bool Kernel::isPatched() const {
|
|||
cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex,
|
||||
size_t argSize,
|
||||
const void *argValue) const {
|
||||
if (getDefaultKernelInfo().kernelArgInfo[argIndex].isImage) {
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
if (defaultKernelInfo.kernelArgInfo[argIndex].isImage) {
|
||||
cl_mem mem = *(static_cast<const cl_mem *>(argValue));
|
||||
MemObj *pMemObj = nullptr;
|
||||
WithCastToInternal(mem, &pMemObj);
|
||||
if (pMemObj) {
|
||||
auto accessQualifier = getDefaultKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier;
|
||||
auto accessQualifier = defaultKernelInfo.kernelArgInfo[argIndex].metadata.accessQualifier;
|
||||
cl_mem_flags flags = pMemObj->getFlags();
|
||||
if ((accessQualifier == KernelArgMetadata::AccessReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
|
||||
(accessQualifier == KernelArgMetadata::AccessWriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) {
|
||||
|
@ -2372,11 +2381,16 @@ void Kernel::resolveArgs() {
|
|||
}
|
||||
}
|
||||
}
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernelInfos.size(); rootDeviceIndex++) {
|
||||
auto pKernelInfo = kernelInfos[rootDeviceIndex];
|
||||
if (!pKernelInfo) {
|
||||
continue;
|
||||
}
|
||||
if (canTransformImageTo2dArray) {
|
||||
imageTransformer->transformImagesTo2dArray(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
|
||||
imageTransformer->transformImagesTo2dArray(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
|
||||
} else if (imageTransformer->didTransform()) {
|
||||
imageTransformer->transformImagesTo3d(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
|
||||
imageTransformer->transformImagesTo3d(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2385,10 +2399,11 @@ bool Kernel::canTransformImages() const {
|
|||
return renderCoreFamily >= IGFX_GEN9_CORE && renderCoreFamily <= IGFX_GEN11LP_CORE;
|
||||
}
|
||||
|
||||
void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation) {
|
||||
void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex) {
|
||||
memObjsForAuxTranslation.reserve(getKernelArgsNumber());
|
||||
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
|
||||
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
|
||||
if (BUFFER_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) {
|
||||
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
|
||||
auto buffer = castToObject<Buffer>(getKernelArg(i));
|
||||
if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
memObjsForAuxTranslation.insert(buffer);
|
||||
|
@ -2396,7 +2411,7 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
|
|||
auto &context = this->program->getContext();
|
||||
if (context.isProvidingPerformanceHints()) {
|
||||
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), i, getDefaultKernelInfo().kernelArgInfo.at(i).metadataExtended->argName.c_str());
|
||||
kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
||||
void *paramValue, size_t *paramValueSizeRet) const;
|
||||
void getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const;
|
||||
void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const;
|
||||
void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet, uint32_t rootDeviceIndex) const;
|
||||
|
||||
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
|
||||
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
||||
|
@ -154,13 +154,13 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet) const;
|
||||
|
||||
const void *getKernelHeap() const;
|
||||
const void *getKernelHeap(uint32_t rootDeviceIndex) const;
|
||||
void *getSurfaceStateHeap(uint32_t rootDeviceIndex) const;
|
||||
const void *getDynamicStateHeap() const;
|
||||
const void *getDynamicStateHeap(uint32_t rootDeviceIndex) const;
|
||||
|
||||
size_t getKernelHeapSize() const;
|
||||
size_t getKernelHeapSize(uint32_t rootDeviceIndex) const;
|
||||
size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const;
|
||||
size_t getDynamicStateHeapSize() const;
|
||||
size_t getDynamicStateHeapSize(uint32_t rootDeviceIndex) const;
|
||||
size_t getNumberOfBindingTableStates() const;
|
||||
size_t getBindingTableOffset() const {
|
||||
return localBindingTableOffset;
|
||||
|
@ -168,10 +168,10 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
|
||||
void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
|
||||
|
||||
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
|
||||
bool isKernelHeapSubstituted() const;
|
||||
uint64_t getKernelId() const;
|
||||
void setKernelId(uint64_t newKernelId);
|
||||
void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
|
||||
bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
|
||||
uint64_t getKernelId(uint32_t rootDeviceIndex) const;
|
||||
void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);
|
||||
uint32_t getStartOffset() const;
|
||||
void setStartOffset(uint32_t offset);
|
||||
|
||||
|
@ -183,8 +183,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
return getDefaultKernelInfo().kernelArgInfo.size();
|
||||
}
|
||||
|
||||
bool requiresSshForBuffers() const {
|
||||
return getDefaultKernelInfo().requiresSshForBuffers;
|
||||
bool requiresSshForBuffers(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).requiresSshForBuffers;
|
||||
}
|
||||
|
||||
const KernelInfo &getKernelInfo(uint32_t rootDeviceIndex) const {
|
||||
|
@ -200,12 +200,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
|
||||
Program *getProgram() const { return program; }
|
||||
|
||||
uint32_t getScratchSize() {
|
||||
return getDefaultKernelInfo().patchInfo.mediavfestate ? getDefaultKernelInfo().patchInfo.mediavfestate->PerThreadScratchSpace : 0;
|
||||
uint32_t getScratchSize(uint32_t rootDeviceIndex) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate ? getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate->PerThreadScratchSpace : 0;
|
||||
}
|
||||
|
||||
uint32_t getPrivateScratchSize() {
|
||||
return getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1 ? getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
|
||||
uint32_t getPrivateScratchSize(uint32_t rootDeviceIndex) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1 ? getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
|
||||
}
|
||||
|
||||
void createReflectionSurface();
|
||||
|
@ -215,7 +215,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
void patchDefaultDeviceQueue(DeviceQueue *devQueue);
|
||||
void patchEventPool(DeviceQueue *devQueue);
|
||||
void patchBlocksSimdSize(uint32_t rootDeviceIndex);
|
||||
bool usesSyncBuffer();
|
||||
bool usesSyncBuffer(uint32_t rootDeviceIndex);
|
||||
void patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset);
|
||||
void patchBindlessSurfaceStateOffsets(const Device &device, const size_t sshOffset);
|
||||
|
||||
|
@ -352,26 +352,26 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
KernelExecutionType getExecutionType() const {
|
||||
return executionType;
|
||||
}
|
||||
bool isUsingSyncBuffer() const {
|
||||
return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
bool isUsingSyncBuffer(uint32_t rootDeviceIndex) const {
|
||||
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
}
|
||||
|
||||
bool checkIfIsParentKernelAndBlocksUsesPrintf();
|
||||
|
||||
bool is32Bit() const {
|
||||
return getDefaultKernelInfo().gpuPointerSize == 4;
|
||||
bool is32Bit(uint32_t rootDeviceIndex) const {
|
||||
return getKernelInfo(rootDeviceIndex).gpuPointerSize == 4;
|
||||
}
|
||||
|
||||
int32_t getDebugSurfaceBti() const {
|
||||
if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->BTI;
|
||||
int32_t getDebugSurfaceBti(uint32_t rootDeviceIndex) const {
|
||||
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->BTI;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t getPerThreadSystemThreadSurfaceSize() const {
|
||||
if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
|
||||
size_t getPerThreadSystemThreadSurfaceSize(uint32_t rootDeviceIndex) const {
|
||||
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -381,7 +381,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
return usingImagesOnly;
|
||||
}
|
||||
|
||||
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation);
|
||||
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex);
|
||||
|
||||
MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const;
|
||||
|
||||
|
|
|
@ -143,13 +143,12 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
|
|||
this->numThreadsPerSubSlice = static_cast<uint32_t>(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) *
|
||||
device.getSharedDeviceInfo().numThreadsPerEU;
|
||||
this->localMemSize = static_cast<uint32_t>(device.getSharedDeviceInfo().localMemSize);
|
||||
setIfUseImg(dispatchInfo.getKernel());
|
||||
setIfUseImg(kernelInfo);
|
||||
setMinWorkGroupSize();
|
||||
}
|
||||
void WorkSizeInfo::setIfUseImg(Kernel *pKernel) {
|
||||
auto ParamsCount = pKernel->getKernelArgsNumber();
|
||||
for (auto i = 0u; i < ParamsCount; i++) {
|
||||
if (pKernel->getDefaultKernelInfo().kernelArgInfo[i].isImage) {
|
||||
void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
|
||||
for (auto i = 0u; i < kernelInfo.kernelArgInfo.size(); i++) {
|
||||
if (kernelInfo.kernelArgInfo[i].isImage) {
|
||||
imgUsed = true;
|
||||
yTiledSurfaces = true;
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ struct WorkSizeInfo {
|
|||
|
||||
WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
|
||||
WorkSizeInfo(const DispatchInfo &dispatchInfo);
|
||||
void setIfUseImg(Kernel *pKernel);
|
||||
void setIfUseImg(const KernelInfo &kernelInfo);
|
||||
void setMinWorkGroupSize();
|
||||
void checkRatio(const size_t workItems[3]);
|
||||
};
|
||||
|
|
|
@ -61,7 +61,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
|
|||
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset);
|
||||
|
||||
patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
|
||||
void *addressToPatch = printfSurface->getUnderlyingBuffer();
|
||||
|
@ -75,8 +75,9 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
|||
}
|
||||
|
||||
void PrintfHandler::printEnqueueOutput() {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
PrintFormatter printFormatter(reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer()), static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize()),
|
||||
kernel->is32Bit(), kernel->getKernelInfo(device.getRootDeviceIndex()).patchInfo.stringDataMap);
|
||||
kernel->is32Bit(rootDeviceIndex), kernel->getKernelInfo(rootDeviceIndex).patchInfo.stringDataMap);
|
||||
printFormatter.printKernelOutput();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -34,8 +34,9 @@ class SchedulerKernel : public Kernel {
|
|||
}
|
||||
|
||||
size_t getCurbeSize() {
|
||||
size_t crossTrheadDataSize = getDefaultKernelInfo().patchInfo.dataParameterStream ? getDefaultKernelInfo().patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
size_t dshSize = getDefaultKernelInfo().heapInfo.DynamicStateHeapSize;
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
size_t crossTrheadDataSize = defaultKernelInfo.patchInfo.dataParameterStream ? defaultKernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
size_t dshSize = defaultKernelInfo.heapInfo.DynamicStateHeapSize;
|
||||
|
||||
crossTrheadDataSize = alignUp(crossTrheadDataSize, 64);
|
||||
dshSize = alignUp(dshSize, 64);
|
||||
|
|
|
@ -137,7 +137,7 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenEx
|
|||
SPatchAllocateSyncBuffer patchAllocateSyncBuffer;
|
||||
pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer;
|
||||
|
||||
EXPECT_TRUE(pKernel->isUsingSyncBuffer());
|
||||
EXPECT_TRUE(pKernel->isUsingSyncBuffer(testedRootDeviceIndex));
|
||||
|
||||
retVal = clEnqueueNDRangeKernel(
|
||||
pCommandQueue,
|
||||
|
|
|
@ -156,7 +156,7 @@ TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin
|
|||
mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
cl_ulong scratchSpaceSize = static_cast<cl_ulong>(mockKernel.mockKernel->getScratchSize());
|
||||
cl_ulong scratchSpaceSize = static_cast<cl_ulong>(mockKernel.mockKernel->getScratchSize(testedRootDeviceIndex));
|
||||
EXPECT_EQ(scratchSpaceSize, 1024u);
|
||||
|
||||
retVal = clGetKernelWorkGroupInfo(
|
||||
|
|
|
@ -109,8 +109,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, simple) {
|
|||
EXPECT_EQ(0u, addrIDD % alignmentIDD);
|
||||
|
||||
// Check kernel start pointer matches hard-coded kernel.
|
||||
auto pExpectedISA = pKernel->getKernelHeap();
|
||||
auto expectedSize = pKernel->getKernelHeapSize();
|
||||
auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex);
|
||||
auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex);
|
||||
|
||||
auto pSBA = reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
|
||||
ASSERT_NE(nullptr, pSBA);
|
||||
|
@ -268,8 +268,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, simple) {
|
|||
EXPECT_EQ(0u, addrIDD % alignmentIDD);
|
||||
|
||||
// Check kernel start pointer matches hard-coded kernel.
|
||||
auto pExpectedISA = pKernel->getKernelHeap();
|
||||
auto expectedSize = pKernel->getKernelHeapSize();
|
||||
auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex);
|
||||
auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex);
|
||||
|
||||
auto pSBA = reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
|
||||
ASSERT_NE(nullptr, pSBA);
|
||||
|
|
|
@ -183,7 +183,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DTh
|
|||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -202,9 +202,9 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) {
|
|||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
@ -329,7 +329,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentZeroMatchesSourceAd
|
|||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
// Determine where the argument is
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<void *>(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentOneMatchesDestinati
|
|||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
// Determine where the argument is
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<void *>(dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenIndirectDataG
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenIndirectDataGetsAdded) {
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenIndirectDataG
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
enqueueCopyImageToBuffer<FamilyType>();
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -115,9 +115,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded)
|
|||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
||||
|
@ -302,7 +302,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentZeroShouldMatch
|
|||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
// Determine where the argument is
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
|
||||
|
||||
EXPECT_EQ((void *)((uintptr_t)buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument);
|
||||
|
||||
|
@ -335,7 +335,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
|
|||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
// Determine where the argument is
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 2u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
|
||||
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 2u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0), rootDeviceIndex);
|
||||
EXPECT_NE(nullptr, *pArgument);
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
|
|
|
@ -92,7 +92,7 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenIndirectDataGetsAdded) {
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -415,9 +415,9 @@ HWTEST_F(EnqueueKernelTest, addsIndirectData) {
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
callOneWorkItemNDRKernel();
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
if (pKernel->requiresSshForBuffers(rootDeviceIndex) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -196,7 +196,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndi
|
|||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -156,9 +156,9 @@ HWTEST_F(EnqueueReadBufferTypeTest, addsIndirectData) {
|
|||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenIndirectDataIsAdded) {
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -170,7 +170,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInd
|
|||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -154,9 +154,9 @@ HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) {
|
|||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->requiresSshForBuffers(rootDeviceIndex)) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenIndirectDataIsAdded) {
|
|||
auto sshBefore = pSSH->getUsed();
|
||||
|
||||
EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr));
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
|
|
@ -452,7 +452,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
|
|||
|
||||
testing::internal::CaptureStdout();
|
||||
MemObjsForAuxTranslation memObjects;
|
||||
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects);
|
||||
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects, rootDeviceIndex);
|
||||
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION],
|
||||
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str());
|
||||
|
|
|
@ -603,9 +603,9 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
|
|||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
|
|
|
@ -170,7 +170,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
|
|||
pKernel->getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
|
||||
UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
|
||||
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
if ((pKernel->requiresSshForBuffers(rootDeviceIndex)) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
||||
}
|
||||
|
||||
|
|
|
@ -763,7 +763,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa
|
|||
|
||||
Kernel *pKernel1 = (Kernel *)kernel1;
|
||||
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
|
||||
|
||||
constexpr size_t n = 256;
|
||||
|
@ -797,7 +797,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa
|
|||
|
||||
Kernel *pKernel2 = (Kernel *)kernel2;
|
||||
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
|
||||
|
||||
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
|
||||
|
@ -911,7 +911,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
|
|||
|
||||
Kernel *pKernel1 = (Kernel *)kernel1;
|
||||
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
|
||||
|
||||
cl_uint workDim = 1;
|
||||
|
@ -951,7 +951,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
|
|||
|
||||
Kernel *pKernel2 = (Kernel *)kernel2;
|
||||
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
|
||||
|
||||
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
|
||||
|
@ -1267,7 +1267,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG
|
|||
|
||||
Kernel *pKernel = (Kernel *)kernel;
|
||||
const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId = pKernel->getKernelId();
|
||||
uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId);
|
||||
|
||||
constexpr size_t n = 256;
|
||||
|
@ -1380,7 +1380,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs
|
|||
|
||||
Kernel *pKernel = (Kernel *)kernel;
|
||||
const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId = pKernel->getKernelId();
|
||||
uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId);
|
||||
|
||||
constexpr size_t n = 256;
|
||||
|
@ -1504,7 +1504,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice
|
|||
|
||||
Kernel *pKernel1 = (Kernel *)kernel1;
|
||||
const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId();
|
||||
uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1);
|
||||
|
||||
constexpr size_t n = 256;
|
||||
|
@ -1542,7 +1542,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice
|
|||
|
||||
Kernel *pKernel2 = (Kernel *)kernel2;
|
||||
const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex);
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId();
|
||||
uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex);
|
||||
EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2);
|
||||
|
||||
auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
|
||||
|
@ -2325,18 +2325,18 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) {
|
|||
Kernel *pKernel = castToObject<Kernel>(kernel);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted();
|
||||
bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex);
|
||||
EXPECT_FALSE(isKernelCodeSubstituted);
|
||||
|
||||
// Substitute new kernel code
|
||||
constexpr size_t newCodeSize = 64;
|
||||
uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4};
|
||||
pKernel->substituteKernelHeap(&newCode[0], newCodeSize);
|
||||
pKernel->substituteKernelHeap(rootDeviceIndex, &newCode[0], newCodeSize);
|
||||
|
||||
// Verify that substitution went properly
|
||||
isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted();
|
||||
isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex);
|
||||
EXPECT_TRUE(isKernelCodeSubstituted);
|
||||
uint8_t *pBin2 = reinterpret_cast<uint8_t *>(const_cast<void *>(pKernel->getKernelHeap()));
|
||||
uint8_t *pBin2 = reinterpret_cast<uint8_t *>(const_cast<void *>(pKernel->getKernelHeap(rootDeviceIndex)));
|
||||
EXPECT_EQ(pBin2, &newCode[0]);
|
||||
|
||||
auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getUnderlyingBuffer();
|
||||
|
|
|
@ -21,7 +21,7 @@ template <typename GfxFamily>
|
|||
struct UnitTestHelper {
|
||||
static bool isL3ConfigProgrammable();
|
||||
|
||||
static bool evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel);
|
||||
static bool evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel, uint32_t rootDeviceIndex);
|
||||
|
||||
static bool isPageTableManagerSupported(const HardwareInfo &hwInfo);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ bool UnitTestHelper<GfxFamily>::isL3ConfigProgrammable() {
|
|||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool UnitTestHelper<GfxFamily>::evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel) {
|
||||
bool UnitTestHelper<GfxFamily>::evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel, uint32_t rootDeviceIndex) {
|
||||
if (sizeBeforeEnqueue != sizeAfterEnqueue) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetDebugSurfaceBtiIsCal
|
|||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockDebugKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(0, kernel->getDebugSurfaceBti());
|
||||
EXPECT_EQ(0, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) {
|
||||
|
@ -31,7 +31,7 @@ TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThrea
|
|||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize());
|
||||
EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenQueryingIsKernelDebugEnabledThenTrueIsReturned) {
|
||||
|
@ -60,7 +60,7 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetDebugSurfaceBtiIsCalledT
|
|||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(-1, kernel->getDebugSurfaceBti());
|
||||
EXPECT_EQ(-1, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) {
|
||||
|
@ -69,5 +69,5 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSur
|
|||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize());
|
||||
EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
|
|
@ -97,8 +97,8 @@ TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) {
|
|||
}
|
||||
|
||||
TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) {
|
||||
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap());
|
||||
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize());
|
||||
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap(rootDeviceIndex));
|
||||
EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize(rootDeviceIndex));
|
||||
}
|
||||
|
||||
TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) {
|
||||
|
@ -167,7 +167,7 @@ TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinary
|
|||
size_t paramValueSize = 0;
|
||||
char *paramValue = nullptr;
|
||||
size_t paramValueSizeRet = 0;
|
||||
const char *pKernelData = reinterpret_cast<const char *>(pKernel->getKernelHeap());
|
||||
const char *pKernelData = reinterpret_cast<const char *>(pKernel->getKernelHeap(rootDeviceIndex));
|
||||
EXPECT_NE(nullptr, pKernelData);
|
||||
|
||||
// get size of kernel binary
|
||||
|
@ -2670,7 +2670,7 @@ TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) {
|
|||
MockProgram program(&context, false, toClDeviceVector(*device));
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex)));
|
||||
|
||||
EXPECT_TRUE(kernel->is32Bit());
|
||||
EXPECT_TRUE(kernel->is32Bit(rootDeviceIndex));
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
|
||||
|
@ -2683,7 +2683,7 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
|
|||
MockProgram program(&context, false, toClDeviceVector(*device));
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex)));
|
||||
|
||||
EXPECT_FALSE(kernel->is32Bit());
|
||||
EXPECT_FALSE(kernel->is32Bit(rootDeviceIndex));
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) {
|
||||
|
@ -2978,7 +2978,7 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat
|
|||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1;
|
||||
|
||||
EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize());
|
||||
EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) {
|
||||
|
@ -2987,7 +2987,7 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc
|
|||
MockKernelWithInternals mockKernel(*device);
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr;
|
||||
|
||||
EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize());
|
||||
EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) {
|
||||
|
|
|
@ -21,7 +21,7 @@ using namespace NEO;
|
|||
class KernelTransformableTest : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex();
|
||||
context = std::make_unique<MockContext>(deviceFactory.rootDevices[rootDeviceIndex]);
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
|
@ -45,7 +45,7 @@ class KernelTransformableTest : public ::testing::Test {
|
|||
pKernelInfo->kernelArgInfo[3].isImage = true;
|
||||
pKernelInfo->argumentsToPatchNum = 4;
|
||||
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*context.getDevice(0)));
|
||||
program = std::make_unique<MockProgram>(context.get(), false, toClDeviceVector(*context->getDevice(0)));
|
||||
pKernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
|
@ -66,7 +66,8 @@ class KernelTransformableTest : public ::testing::Test {
|
|||
const int secondImageOffset = 0x40;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
MockContext context;
|
||||
UltClDeviceFactory deviceFactory{2, 0};
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<MockProgram> program;
|
||||
std::unique_ptr<Sampler> sampler;
|
||||
std::unique_ptr<KernelInfo> pKernelInfo;
|
||||
|
@ -75,14 +76,14 @@ class KernelTransformableTest : public ::testing::Test {
|
|||
std::unique_ptr<Image> image;
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
char surfaceStateHeap[0x80];
|
||||
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
|
||||
const uint32_t rootDeviceIndex = 1;
|
||||
};
|
||||
|
||||
HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -110,7 +111,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -137,7 +138,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -169,7 +170,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTran
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -196,7 +197,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamp
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image2dHelper<>::create(&context));
|
||||
image.reset(Image2dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -223,7 +224,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -255,7 +256,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenRes
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
sampler.reset(createNonTransformableSampler());
|
||||
cl_mem clImage = image.get();
|
||||
cl_sampler clSampler = sampler.get();
|
||||
|
@ -287,7 +288,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImag
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
image.reset(Image3dHelper<>::create(&context));
|
||||
image.reset(Image3dHelper<>::create(context.get()));
|
||||
cl_mem clImage = image.get();
|
||||
|
||||
pKernelInfo->kernelArgInfo[0].isSampler = false;
|
||||
|
|
|
@ -34,7 +34,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT
|
|||
const size_t newHeapSize = initialHeapSize + 1;
|
||||
char newHeap[newHeapSize];
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
|
||||
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
|
||||
EXPECT_NE(nullptr, secondAllocation);
|
||||
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
|
||||
|
@ -64,7 +64,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen
|
|||
const size_t newHeapSize = initialHeapSize;
|
||||
char newHeap[newHeapSize];
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
|
||||
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
|
||||
EXPECT_NE(nullptr, secondAllocation);
|
||||
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
|
||||
|
@ -93,7 +93,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT
|
|||
const size_t newHeapSize = initialHeapSize - 1;
|
||||
char newHeap[newHeapSize];
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
|
||||
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
|
||||
EXPECT_NE(nullptr, secondAllocation);
|
||||
auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
|
||||
|
@ -125,7 +125,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe
|
|||
|
||||
EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
|
||||
auto secondAllocation = kernel.kernelInfo.kernelAllocation;
|
||||
|
||||
EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());
|
||||
|
|
|
@ -122,7 +122,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre
|
|||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
|
||||
ptrOffset(pKernel->getDynamicStateHeap(),
|
||||
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
EXPECT_EQ(static_cast<cl_bool>(CL_TRUE), static_cast<cl_bool>(!samplerState->getNonNormalizedCoordinateEnable()));
|
||||
EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcxAddressControlMode());
|
||||
|
@ -426,7 +426,7 @@ HWTEST_P(NormalizedTest, WhenSettingKernelArgSamplerThenCoordsAreCorrect) {
|
|||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
|
||||
ptrOffset(pKernel->getDynamicStateHeap(),
|
||||
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
|
||||
EXPECT_EQ(normalizedCoordinates, static_cast<cl_bool>(!samplerState->getNonNormalizedCoordinateEnable()));
|
||||
|
@ -477,7 +477,7 @@ HWTEST_P(AddressingModeTest, WhenSettingKernelArgSamplerThenModesAreCorrect) {
|
|||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
|
||||
ptrOffset(pKernel->getDynamicStateHeap(),
|
||||
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
|
||||
auto expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR;
|
||||
|
@ -557,7 +557,7 @@ HWTEST_F(SamplerSetArgTest, GivenMipmapsWhenSettingKernelArgSamplerThenLodAreCor
|
|||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
|
||||
ptrOffset(pKernel->getDynamicStateHeap(),
|
||||
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
|
||||
EXPECT_EQ(FamilyType::SAMPLER_STATE::MIP_MODE_FILTER_LINEAR, samplerState->getMipModeFilter());
|
||||
|
@ -588,7 +588,7 @@ HWTEST_P(FilterModeTest, WhenSettingKernelArgSamplerThenFiltersAreCorrect) {
|
|||
retVal);
|
||||
|
||||
auto samplerState = reinterpret_cast<const SAMPLER_STATE *>(
|
||||
ptrOffset(pKernel->getDynamicStateHeap(),
|
||||
ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
|
||||
sampler->setArg(const_cast<SAMPLER_STATE *>(samplerState));
|
||||
|
|
|
@ -131,7 +131,7 @@ struct HardwareParse {
|
|||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
const void *getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
|
||||
const void *getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
|
||||
template <typename CmdType>
|
||||
CmdType *getCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
|
|
|
@ -94,7 +94,7 @@ void HardwareParse::findHardwareCommands() {
|
|||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh) {
|
||||
const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex) {
|
||||
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
|
||||
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
|
@ -115,7 +115,7 @@ const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uin
|
|||
offsetCrossThreadData);
|
||||
|
||||
// Determine where the argument is
|
||||
auto &patchInfo = kernel.getDefaultKernelInfo().patchInfo;
|
||||
auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo;
|
||||
for (auto &arg : patchInfo.statelessGlobalMemObjKernelArgs) {
|
||||
if (arg->ArgumentNumber == indexArg) {
|
||||
return ptrOffset(pCrossThreadData, arg->DataParamOffset);
|
||||
|
|
|
@ -85,5 +85,5 @@ template struct CmdParse<GenGfxFamily>;
|
|||
namespace NEO {
|
||||
template void HardwareParse::findHardwareCommands<ICLFamily>();
|
||||
template void HardwareParse::findHardwareCommands<ICLFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<ICLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<ICLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -99,5 +99,5 @@ template struct CmdParse<GenGfxFamily>;
|
|||
namespace NEO {
|
||||
template void HardwareParse::findHardwareCommands<TGLLPFamily>();
|
||||
template void HardwareParse::findHardwareCommands<TGLLPFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<TGLLPFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<TGLLPFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -67,5 +67,5 @@ template struct CmdParse<GenGfxFamily>;
|
|||
namespace NEO {
|
||||
template void HardwareParse::findHardwareCommands<BDWFamily>();
|
||||
template void HardwareParse::findHardwareCommands<BDWFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<BDWFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<BDWFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -85,5 +85,5 @@ template struct CmdParse<GenGfxFamily>;
|
|||
namespace NEO {
|
||||
template void HardwareParse::findHardwareCommands<SKLFamily>();
|
||||
template void HardwareParse::findHardwareCommands<SKLFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<SKLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<SKLFamily>(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
} // namespace NEO
|
||||
|
|
Loading…
Reference in New Issue