Add resolve capability for compressed USM device allocations

Related-To: NEO-5107

Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:
Slawomir Milczarek
2020-12-22 00:03:25 +00:00
committed by Compute-Runtime-Automation
parent d7ff26cc5a
commit 55f3c8f134
27 changed files with 636 additions and 215 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,9 +8,12 @@
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info_builder.h"
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
#include "opencl/source/mem_obj/buffer.h"
#include "pipe_control_args.h"
@@ -24,13 +27,12 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
template <typename GfxFamily>
bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
size_t kernelInstanceNumber = 0;
size_t numMemObjectsToTranslate = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
resizeKernelInstances(numMemObjectsToTranslate);
size_t numKernelObjectsToTranslate = multiDispatchInfo.getKernelObjsForAuxTranslation()->size();
resizeKernelInstances(numKernelObjectsToTranslate);
multiDispatchInfo.setBuiltinOpParams(operationParams);
for (auto &memObj : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) {
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder(clDevice);
size_t allocationSize = alignUp(memObj->getSize(), 512);
UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1);
@@ -38,7 +40,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
// Before Kernel
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchInitCommands, true);
}
if (kernelInstanceNumber == numMemObjectsToTranslate - 1) {
if (kernelInstanceNumber == numKernelObjectsToTranslate - 1) {
// After Kernel
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchEpilogueCommands, false);
}
@@ -50,8 +52,20 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get());
}
builder.setArg(0, memObj);
builder.setArg(1, memObj);
size_t allocationSize = 0;
if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
auto buffer = static_cast<Buffer *>(kernelObj.object);
builder.setArg(0, buffer);
builder.setArg(1, buffer);
allocationSize = alignUp(buffer->getSize(), 512);
} else {
DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
auto svmAlloc = static_cast<GraphicsAllocation *>(kernelObj.object);
auto svmPtr = reinterpret_cast<void *>(svmAlloc->getGpuAddressToPatch());
builder.setArgSvmAlloc(0, svmPtr, svmAlloc);
builder.setArgSvmAlloc(1, svmPtr, svmAlloc);
allocationSize = alignUp(svmAlloc->getUnderlyingBufferSize(), 512);
}
size_t xGws = allocationSize / 16;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -534,7 +534,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset);
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device->getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
return true;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
const cl_event *eventWaitList,
cl_event *event) {
BuiltInOwnershipWrapper builtInLock;
MemObjsForAuxTranslation memObjsForAuxTranslation;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo(kernel);
if (DebugManager.flags.ForceDispatchScheduler.get()) {
@@ -69,9 +69,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
if (kernel->isAuxTranslationRequired()) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder);
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation, rootDeviceIndex);
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
if (!memObjsForAuxTranslation.empty()) {
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
if (!kernelObjsForAuxTranslation.empty()) {
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
}
}
@@ -89,7 +89,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (kernel->isAuxTranslationRequired()) {
if (!memObjsForAuxTranslation.empty()) {
if (!kernelObjsForAuxTranslation.empty()) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
}
@@ -479,23 +479,31 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const Multi
const EventsRequest &eventsRequest, bool queueBlocked) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
blitPropertiesContainer.resize(numBuffers * 2);
auto numKernelObjs = multiDispatchInfo.getKernelObjsForAuxTranslation()->size();
blitPropertiesContainer.resize(numKernelObjs * 2);
auto bufferIndex = 0;
for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) {
GraphicsAllocation *allocation = nullptr;
if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
auto buffer = static_cast<Buffer *>(kernelObj.object);
allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
} else {
DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
allocation = static_cast<GraphicsAllocation *>(kernelObj.object);
}
{
// Aux to NonAux
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(
AuxTranslationDirection::AuxToNonAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
auto auxToNonAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
}
{
// NonAux to Aux
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
blitPropertiesContainer[bufferIndex + numKernelObjs] = BlitProperties::constructPropertiesForAuxTranslation(
AuxTranslationDirection::NonAuxToAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
auto nonAuxToAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -194,9 +194,9 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
for (auto &dispatchInfo : multiDispatchInfo) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
size_t memObjAuxCount = multiDispatchInfo.getMemObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getMemObjsForAuxTranslation()->size() : 0;
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
}
if (parentKernel) {
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -101,7 +101,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh),
sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
}
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -17,8 +17,8 @@ template <typename Family>
bool ClHwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
hwInfo.capabilityTable.blitterOperationsSupported &&
multiDispatchInfo.getMemObjsForAuxTranslation() &&
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
multiDispatchInfo.getKernelObjsForAuxTranslation() &&
(multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0);
}
template <typename GfxFamily>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -13,6 +13,7 @@
#include "shared/source/utilities/stackvec.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
#include "opencl/source/mem_obj/mem_obj.h"
#include <algorithm>
@@ -197,19 +198,19 @@ struct MultiDispatchInfo {
return builtinOpParams;
}
void setMemObjsForAuxTranslation(const MemObjsForAuxTranslation &memObjsForAuxTranslation) {
this->memObjsForAuxTranslation = &memObjsForAuxTranslation;
void setKernelObjsForAuxTranslation(const KernelObjsForAuxTranslation &kernelObjsForAuxTranslation) {
this->kernelObjsForAuxTranslation = &kernelObjsForAuxTranslation;
}
const MemObjsForAuxTranslation *getMemObjsForAuxTranslation() const {
return memObjsForAuxTranslation;
const KernelObjsForAuxTranslation *getKernelObjsForAuxTranslation() const {
return kernelObjsForAuxTranslation;
}
protected:
BuiltinOpParams builtinOpParams = {};
StackVec<DispatchInfo, 9> dispatchInfos;
StackVec<MemObj *, 2> redescribedSurfaces;
const MemObjsForAuxTranslation *memObjsForAuxTranslation = nullptr;
const KernelObjsForAuxTranslation *kernelObjsForAuxTranslation = nullptr;
Kernel *mainKernel = nullptr;
};
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -13,7 +13,6 @@
#include "opencl/source/api/cl_types.h"
#include <array>
#include <unordered_set>
namespace NEO {
class MemObj;
@@ -34,7 +33,6 @@ struct EventsRequest {
using MemObjSizeArray = std::array<size_t, 3>;
using MemObjOffsetArray = std::array<size_t, 3>;
using MemObjsForAuxTranslation = std::unordered_set<MemObj *>;
struct TransferProperties {
TransferProperties() = delete;

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2020 Intel Corporation
# Copyright (C) 2018-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -14,6 +14,7 @@ set(RUNTIME_SRCS_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_objects_for_aux_translation.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -158,7 +158,7 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
auto surfaceState = ptrOffset(ssh, sshOffset);
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, surfaceState, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
}
}
@@ -339,7 +339,7 @@ cl_int Kernel::initialize() {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
}
}
@@ -348,7 +348,7 @@ cl_int Kernel::initialize() {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
}
}
@@ -937,7 +937,7 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
if (requiresSshForBuffers(rootDeviceIndex)) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
}
if (!kernelArguments[argIndex].isPatched) {
patchedArgumentsNum++;
@@ -966,6 +966,21 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
patchWithRequiredSize(patchLocation, patchSize, reinterpret_cast<uintptr_t>(svmPtr));
bool disableL3 = false;
bool forceNonAuxMode = false;
bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection);
if (isAuxTranslationKernel) {
if (((AuxTranslationDirection::AuxToNonAux == auxTranslationDirection) && argIndex == 1) ||
((AuxTranslationDirection::NonAuxToAux == auxTranslationDirection) && argIndex == 0)) {
forceNonAuxMode = true;
}
disableL3 = (argIndex == 0);
} else if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED &&
!kernelArgInfo.pureStatefulBufferAccess) {
forceNonAuxMode = true;
}
if (requiresSshForBuffers(rootDeviceIndex)) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
@@ -976,7 +991,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch());
allocSize -= offset;
}
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, allocSize, ptrToPatch, offset, svmAlloc, 0, 0);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0);
}
if (!kernelArguments[argIndex].isPatched) {
@@ -1503,7 +1518,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
}
isArgSet.set(rootDeviceIndex);
}
@@ -1554,7 +1569,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState,
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false,
pipe->getSize(), pipe->getCpuAddress(), 0,
graphicsAllocation, 0, 0);
}
@@ -2432,7 +2447,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
}
}
@@ -2454,7 +2469,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(),
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(),
(void *)devQueue->getEventPoolBuffer()->getGpuAddress(), 0, devQueue->getEventPoolBuffer(), 0, 0);
}
}
@@ -2489,7 +2504,7 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation,
patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, surfaceState, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0);
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0);
}
}
@@ -2553,15 +2568,25 @@ bool Kernel::canTransformImages() const {
return renderCoreFamily >= IGFX_GEN9_CORE && renderCoreFamily <= IGFX_GEN11LP_CORE;
}
void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex) {
memObjsForAuxTranslation.reserve(getKernelArgsNumber());
void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation, uint32_t rootDeviceIndex) {
kernelObjsForAuxTranslation.reserve(getKernelArgsNumber());
auto &kernelInfo = getKernelInfo(rootDeviceIndex);
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
auto buffer = castToObject<Buffer>(getKernelArg(i));
if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
memObjsForAuxTranslation.insert(buffer);
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer});
auto &context = this->program->getContext();
if (context.isProvidingPerformanceHints()) {
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str());
}
}
}
if (SVM_ALLOC_OBJ == getKernelArguments().at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
auto svmAlloc = reinterpret_cast<GraphicsAllocation *>(const_cast<void *>(getKernelArg(i)));
if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, svmAlloc});
auto &context = this->program->getContext();
if (context.isProvidingPerformanceHints()) {
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
@@ -2580,6 +2605,12 @@ bool Kernel::hasDirectStatelessAccessToHostMemory() const {
return true;
}
}
if (SVM_ALLOC_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) {
auto svmAlloc = reinterpret_cast<const GraphicsAllocation *>(getKernelArg(i));
if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
return true;
}
}
}
return false;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -23,6 +23,7 @@
#include "opencl/source/helpers/base_object.h"
#include "opencl/source/helpers/properties_helper.h"
#include "opencl/source/kernel/kernel_execution_type.h"
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
#include "opencl/source/program/kernel_info.h"
#include "opencl/source/program/program.h"
@@ -357,7 +358,7 @@ class Kernel : public BaseObject<_cl_kernel> {
return usingImagesOnly;
}
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation, uint32_t rootDeviceIndex);
void fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation, uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const;
@@ -635,4 +636,5 @@ class Kernel : public BaseObject<_cl_kernel> {
std::unordered_map<KernelConfig, KernelSubmissionData, KernelConfigHash> kernelSubmissionMap;
bool singleSubdevicePreferedInCurrentEnqueue = false;
};
} // namespace NEO

View File

@@ -0,0 +1,38 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <unordered_set>
namespace NEO {
struct KernelObjForAuxTranslation {
enum class Type {
MEM_OBJ,
GFX_ALLOC
};
KernelObjForAuxTranslation(Type type, void *object) : type(type), object(object) {}
Type type;
void *object;
bool operator==(const KernelObjForAuxTranslation &t) const {
return (this->object == t.object);
}
};
struct KernelObjForAuxTranslationHash {
std::size_t operator()(const KernelObjForAuxTranslation &kernelObj) const {
return reinterpret_cast<size_t>(kernelObj.object);
}
};
using KernelObjsForAuxTranslation = std::unordered_set<KernelObjForAuxTranslation, KernelObjForAuxTranslationHash>;
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -739,6 +739,8 @@ bool Buffer::isCompressed(uint32_t rootDeviceIndex) const {
void Buffer::setSurfaceState(const Device *device,
void *surfaceState,
bool forceNonAuxMode,
bool disableL3,
size_t svmSize,
void *svmPtr,
size_t offset,
@@ -750,7 +752,7 @@ void Buffer::setSurfaceState(const Device *device,
multiGraphicsAllocation.addAllocation(gfxAlloc);
}
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
buffer->setArgStateful(surfaceState, false, false, false, false, *device);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device);
delete buffer;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -120,6 +120,8 @@ class Buffer : public MemObj {
static void setSurfaceState(const Device *device,
void *surfaceState,
bool forceNonAuxMode,
bool disableL3,
size_t svmSize,
void *svmPtr,
size_t offset,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -66,7 +66,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device.getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0);
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0);
}
}

View File

@@ -122,6 +122,18 @@ struct VmeBuiltInTests : BuiltInTests {
}
};
struct AuxBuiltInTests : BuiltInTests, public ::testing::WithParamInterface<KernelObjForAuxTranslation::Type> {
void SetUp() override {
BuiltInTests::SetUp();
kernelObjType = GetParam();
}
KernelObjForAuxTranslation::Type kernelObjType;
};
INSTANTIATE_TEST_CASE_P(,
AuxBuiltInTests,
testing::ValuesIn({KernelObjForAuxTranslation::Type::MEM_OBJ, KernelObjForAuxTranslation::Type::GFX_ALLOC}));
TEST_F(BuiltInTests, WhenBuildingListOfBuiltinsThenBuiltinsHaveBeenGenerated) {
for (auto supportsImages : ::testing::Bool()) {
allBuiltIns.clear();
@@ -223,24 +235,24 @@ TEST_F(BuiltInTests, GivenCopyBufferToBufferWhenDispatchInfoIsCreatedThenParamsA
delete dstPtr;
}
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
HWTEST_P(AuxBuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
std::vector<Kernel *> builtinKernels;
MockBuffer mockBuffer[3];
mockBuffer[0].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x1000);
mockBuffer[1].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x20000);
mockBuffer[2].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x30000);
std::vector<MockKernelObjForAuxTranslation> mockKernelObjForAuxTranslation;
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x1000));
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x20000));
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x30000));
BuiltinOpParams builtinOpsParams;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
for (auto &buffer : mockBuffer) {
memObjsForAuxTranslation.insert(&buffer);
for (auto &kernelObj : mockKernelObjForAuxTranslation) {
kernelObjsForAuxTranslation.insert(kernelObj);
}
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
@@ -249,18 +261,37 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTrans
for (auto &dispatchInfo : multiDispatchInfo) {
auto kernel = dispatchInfo.getKernel();
builtinKernels.push_back(kernel);
MemObj *buffer = *memObjsForAuxTranslation.find(castToObject<Buffer>(kernel->getKernelArguments().at(0).object));
EXPECT_NE(nullptr, buffer);
memObjsForAuxTranslation.erase(buffer);
cl_mem clMem = buffer;
EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object);
if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) {
auto buffer = castToObject<Buffer>(kernel->getKernelArguments().at(0).object);
auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::MEM_OBJ, kernelObj.type);
kernelObjsForAuxTranslation.erase(kernelObj);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(buffer->getSize(), 512) / 16;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
cl_mem clMem = buffer;
EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(buffer->getSize(), 512) / 16;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
} else {
auto gfxAllocation = static_cast<GraphicsAllocation *>(kernel->getKernelArguments().at(0).object);
auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
kernelObjsForAuxTranslation.erase(kernelObj);
EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
}
}
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
// always pick different kernel
@@ -270,24 +301,24 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTrans
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
}
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
HWTEST_P(AuxBuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
std::vector<Kernel *> builtinKernels;
MockBuffer mockBuffer[3];
mockBuffer[0].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x1000);
mockBuffer[1].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x20000);
mockBuffer[2].getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setSize(0x30000);
std::vector<MockKernelObjForAuxTranslation> mockKernelObjForAuxTranslation;
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x1000));
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x20000));
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x30000));
BuiltinOpParams builtinOpsParams;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
for (auto &buffer : mockBuffer) {
memObjsForAuxTranslation.insert(&buffer);
for (auto &kernelObj : mockKernelObjForAuxTranslation) {
kernelObjsForAuxTranslation.insert(kernelObj);
}
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
@@ -296,18 +327,37 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslat
for (auto &dispatchInfo : multiDispatchInfo) {
auto kernel = dispatchInfo.getKernel();
builtinKernels.push_back(kernel);
MemObj *buffer = *memObjsForAuxTranslation.find(castToObject<Buffer>(kernel->getKernelArguments().at(1).object));
EXPECT_NE(nullptr, buffer);
memObjsForAuxTranslation.erase(buffer);
cl_mem clMem = buffer;
EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object);
if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) {
auto buffer = castToObject<Buffer>(kernel->getKernelArguments().at(0).object);
auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::MEM_OBJ, kernelObj.type);
kernelObjsForAuxTranslation.erase(kernelObj);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(buffer->getSize(), 4) / 4;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
cl_mem clMem = buffer;
EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(buffer->getSize(), 4) / 4;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
} else {
auto gfxAllocation = static_cast<GraphicsAllocation *>(kernel->getKernelArguments().at(0).object);
auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation});
EXPECT_NE(nullptr, kernelObj.object);
EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
kernelObjsForAuxTranslation.erase(kernelObj);
EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object);
EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object);
EXPECT_EQ(1u, dispatchInfo.getDim());
size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16;
Vec3<size_t> gws = {xGws, 1, 1};
EXPECT_EQ(gws, dispatchInfo.getGWS());
}
}
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
// always pick different kernel
@@ -317,20 +367,23 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslat
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
}
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
HWTEST_P(AuxBuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MockBuffer mockBuffer[3];
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
std::vector<MockKernelObjForAuxTranslation> mockKernelObjForAuxTranslation;
for (int i = 0; i < 3; i++) {
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType));
}
std::vector<Kernel *> builtinKernels;
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
BuiltinOpParams builtinOpsParams;
for (auto &buffer : mockBuffer) {
memObjsForAuxTranslation.insert(&buffer);
for (auto &kernelObj : mockKernelObjForAuxTranslation) {
kernelObjsForAuxTranslation.insert(kernelObj);
}
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
@@ -352,18 +405,18 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPic
EXPECT_NE(builtinKernels[2], builtinKernels[5]);
}
HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
HWTEST_P(AuxBuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MockBuffer mockBuffer;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
BuiltinOpParams builtinOpsParams;
memObjsForAuxTranslation.insert(&mockBuffer);
kernelObjsForAuxTranslation.insert(mockKernelObjForAuxTranslation);
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None;
EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams), std::exception);
@@ -388,21 +441,24 @@ TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) {
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
}
HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
HWTEST_P(AuxBuiltInTests, givenMoreKernelObjectsForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice);
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size());
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
MemObjsForAuxTranslation memObjsForAuxTranslation;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
BuiltinOpParams builtinOpsParams;
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
MockBuffer mockBuffer[7];
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
std::vector<MockKernelObjForAuxTranslation> mockKernelObjForAuxTranslation;
for (int i = 0; i < 7; i++) {
mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType));
}
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
for (auto &buffer : mockBuffer) {
memObjsForAuxTranslation.insert(&buffer);
for (auto &kernelObj : mockKernelObjForAuxTranslation) {
kernelObjsForAuxTranslation.insert(kernelObj);
}
EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
@@ -426,7 +482,7 @@ TEST_F(BuiltInTests, givenkAuxBuiltInWhenResizeIsCalledThenCloneAllNewInstancesF
}
}
HWTEST_F(BuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThenLockOnBuiltin) {
HWTEST_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThenLockOnBuiltin) {
BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto mockAuxBuiltInOp = new MockAuxBuilInOp(*pBuiltIns, *pClDevice);
pBuiltIns->BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::AuxTranslation)].first.reset(mockAuxBuiltInOp);
@@ -438,15 +494,21 @@ HWTEST_F(BuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThe
MockKernelWithInternals mockKernel(*pClDevice, pContext);
MockCommandQueueHw<FamilyType> cmdQ(pContext, pClDevice, nullptr);
size_t gws[3] = {1, 0, 0};
MockBuffer buffer;
cl_mem clMem = &buffer;
buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType);
if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) {
cl_mem clMem = mockKernelObjForAuxTranslation.mockBuffer.get();
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
} else {
auto gfxAllocation = mockKernelObjForAuxTranslation.mockGraphicsAllocation.get();
auto ptr = reinterpret_cast<void *>(gfxAllocation->getGpuAddressToPatch());
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation);
}
mockKernel.mockKernel->auxTranslationRequired = false;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
@@ -459,7 +521,7 @@ HWTEST_F(BuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThe
EXPECT_EQ(1u, mockBuiltinKernel->releaseOwnershipCalls);
}
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKernelArgsThenSetValidMocs) {
HWCMDTEST_P(IGFX_GEN8_CORE, AuxBuiltInTests, givenAuxTranslationKernelWhenSettingKernelArgsThenSetValidMocs) {
if (this->pDevice->areSharedSystemAllocationsAllowed()) {
GTEST_SKIP();
}
@@ -468,8 +530,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
BuiltinOpParams builtinOpParamsToAux;
builtinOpParamsToAux.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
@@ -477,9 +539,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe
BuiltinOpParams builtinOpParamsToNonAux;
builtinOpParamsToNonAux.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal));
memObjsForAuxTranslation.insert(buffer.get());
std::unique_ptr<Buffer> buffer = nullptr;
std::unique_ptr<GraphicsAllocation> gfxAllocation = nullptr;
if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) {
cl_int retVal = CL_SUCCESS;
buffer.reset(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal));
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer.get()});
} else {
gfxAllocation.reset(new MockGraphicsAllocation(nullptr, MemoryConstants::pageSize));
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation.get()});
}
mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpParamsToAux);
mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpParamsToNonAux);
@@ -515,7 +585,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe
}
}
HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) {
HWTEST_P(AuxBuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) {
if (this->pDevice->areSharedSystemAllocationsAllowed()) {
GTEST_SKIP();
}
@@ -525,20 +595,32 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
BuiltinOpParams builtinOpParams;
builtinOpParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal));
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false);
gmm->isRenderCompressed = true;
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm);
std::unique_ptr<Buffer> buffer = nullptr;
std::unique_ptr<GraphicsAllocation> gfxAllocation = nullptr;
memObjsForAuxTranslation.insert(buffer.get());
auto gmm = std::unique_ptr<Gmm>(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false));
gmm->isRenderCompressed = true;
if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) {
cl_int retVal = CL_SUCCESS;
buffer.reset(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal));
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm.release());
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer.get()});
} else {
gfxAllocation.reset(new MockGraphicsAllocation(nullptr, MemoryConstants::pageSize));
gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
gfxAllocation->setDefaultGmm(gmm.get());
kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation.get()});
}
mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpParams);
@@ -561,7 +643,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet
}
}
HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) {
HWTEST_P(AuxBuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) {
if (this->pDevice->areSharedSystemAllocationsAllowed()) {
GTEST_SKIP();
}
@@ -571,19 +653,21 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
BuiltinOpParams builtinOpParams;
builtinOpParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal));
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false);
MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType);
auto gmm = std::unique_ptr<Gmm>(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false));
gmm->isRenderCompressed = true;
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm);
memObjsForAuxTranslation.insert(buffer.get());
if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) {
mockKernelObjForAuxTranslation.mockBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm.get());
} else {
mockKernelObjForAuxTranslation.mockGraphicsAllocation->setDefaultGmm(gmm.get());
}
kernelObjsForAuxTranslation.insert(mockKernelObjForAuxTranslation);
mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpParams);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -130,6 +130,28 @@ struct BlitEnqueueTests : public ::testing::Test {
}
}
template <size_t N>
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
if (mockKernel->kernelInfo.kernelArgInfo.size() < allocs.size()) {
mockKernel->kernelInfo.kernelArgInfo.resize(allocs.size());
}
for (uint32_t i = 0; i < allocs.size(); i++) {
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
mockKernel->kernelInfo.kernelArgInfo.at(i).isBuffer = true;
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < allocs.size(); i++) {
auto alloc = allocs[i];
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc);
}
}
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
if (compressed) {
@@ -140,6 +162,16 @@ struct BlitEnqueueTests : public ::testing::Test {
return buffer;
}
std::unique_ptr<GraphicsAllocation> createGfxAllocation(size_t size, bool compressed) {
auto alloc = std::unique_ptr<GraphicsAllocation>(new MockGraphicsAllocation(nullptr, size));
if (compressed) {
alloc->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
} else {
alloc->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
}
return alloc;
}
template <typename Family>
GenCmdList getCmdList(LinearStream &linearStream, size_t offset) {
HardwareParse hwParser;
@@ -602,9 +634,9 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchi
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
MemObjsForAuxTranslation memObjects;
memObjects.insert(buffer0.get());
memObjects.insert(buffer2.get());
KernelObjsForAuxTranslation kernelObjects;
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer0.get()});
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer2.get()});
size_t numBuffersToEstimate = 2;
size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
@@ -620,11 +652,11 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchi
EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split
EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredCacheFlushWhenDispatchingThenEstimateCmdBufferSize) {
@@ -640,9 +672,9 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
MemObjsForAuxTranslation memObjects;
memObjects.insert(buffer0.get());
memObjects.insert(buffer2.get());
KernelObjsForAuxTranslation kernelObjects;
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer0.get()});
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer2.get()});
size_t numBuffersToEstimate = 2;
size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
@@ -660,11 +692,11 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC
EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split
EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize + cacheFlushSize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
EXPECT_EQ(dependencySize + cacheFlushSize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired()));
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBarrier) {
@@ -841,6 +873,20 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenEnqueueIsCal
EXPECT_TRUE(ultCsr->recordedDispatchFlags.implicitFlush);
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationOnGfxAllocationWhenEnqueueIsCalledThenDoImplicitFlushOnGpgpuCsr) {
auto gfxAllocation = createGfxAllocation(1, true);
setMockKernelArgs(std::array<GraphicsAllocation *, 1>{{gfxAllocation.get()}});
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
EXPECT_EQ(0u, ultCsr->taskCount);
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, ultCsr->taskCount);
EXPECT_TRUE(ultCsr->recordedDispatchFlags.implicitFlush);
}
using BlitEnqueueWithNoTimestampPacketTests = BlitEnqueueTests<0>;
HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacketsWritewhenEnqueueingBlitOperationThenEnginesAreSynchronized) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -108,6 +108,18 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
DebugManagerStateRestore dbgRestore;
};
struct DispatchWalkerTestForAuxTranslation : DispatchWalkerTest, public ::testing::WithParamInterface<KernelObjForAuxTranslation::Type> {
void SetUp() override {
DispatchWalkerTest::SetUp();
kernelObjType = GetParam();
}
KernelObjForAuxTranslation::Type kernelObjType;
};
INSTANTIATE_TEST_CASE_P(,
DispatchWalkerTestForAuxTranslation,
testing::ValuesIn({KernelObjForAuxTranslation::Type::MEM_OBJ, KernelObjForAuxTranslation::Type::GFX_ALLOC}));
HWTEST_F(DispatchWalkerTest, WhenGettingComputeDimensionsThenCorrectNumberOfDimensionsIsReturned) {
const size_t workItems1D[] = {100, 1, 1};
EXPECT_EQ(1u, computeDimensions(workItems1D));
@@ -1270,7 +1282,7 @@ TEST(DispatchWalker, WhenCalculatingDispatchDimensionsThenCorrectValuesAreReturn
}
}
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
@@ -1281,13 +1293,14 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh
auto &cmdStream = pCmdQ->getCS(0);
void *buffer = cmdStream.getCpuBase();
kernel.auxTranslationRequired = true;
MockBuffer mockBuffer[2];
MockKernelObjForAuxTranslation mockKernelObj1(kernelObjType);
MockKernelObjForAuxTranslation mockKernelObj2(kernelObjType);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
memObjsForAuxTranslation.insert(&mockBuffer[0]);
memObjsForAuxTranslation.insert(&mockBuffer[1]);
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
kernelObjsForAuxTranslation.insert(mockKernelObj1);
kernelObjsForAuxTranslation.insert(mockKernelObj2);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
@@ -1323,7 +1336,7 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh
EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable());
}
HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) {
HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) {
BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
@@ -1334,13 +1347,14 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh
auto &cmdStream = pCmdQ->getCS(0);
void *buffer = cmdStream.getCpuBase();
kernel.auxTranslationRequired = true;
MockBuffer mockBuffer[2];
MockKernelObjForAuxTranslation mockKernelObj1(kernelObjType);
MockKernelObjForAuxTranslation mockKernelObj2(kernelObjType);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
memObjsForAuxTranslation.insert(&mockBuffer[0]);
memObjsForAuxTranslation.insert(&mockBuffer[1]);
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
kernelObjsForAuxTranslation.insert(mockKernelObj1);
kernelObjsForAuxTranslation.insert(mockKernelObj2);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -782,7 +782,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
lastKernel = dispatchInfo.getKernel();
dispatchInfos.emplace_back(dispatchInfo);
}
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), *multiDispatchInfo.getMemObjsForAuxTranslation(),
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), *multiDispatchInfo.getKernelObjsForAuxTranslation(),
auxTranslationDirection);
}
@@ -793,7 +793,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
std::vector<AuxTranslationDirection> auxTranslationDirections;
std::vector<DispatchInfo> dispatchInfos;
std::vector<std::tuple<Kernel *, size_t, MemObjsForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
std::vector<std::tuple<Kernel *, size_t, KernelObjsForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
uint32_t waitCalled = 0;
};
@@ -865,11 +865,11 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
EXPECT_EQ(1u, std::get<KernelObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
EXPECT_EQ(1u, std::get<KernelObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
EXPECT_EQ(&buffer2, (*std::get<KernelObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin()).object);
EXPECT_EQ(&buffer2, (*std::get<KernelObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin()).object);
auto cmdStream = cmdQ.commandStream;
auto sizeUsed = cmdStream->getUsed();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -430,7 +430,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenConte
context->release();
}
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithBuffersForAuxTranslationThenContextProvidesProperHint) {
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnMemObjectThenContextProvidesProperHint) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.PrintDriverDiagnostics.set(1);
@@ -451,8 +451,8 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
testing::internal::CaptureStdout();
MemObjsForAuxTranslation memObjects;
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects, rootDeviceIndex);
KernelObjsForAuxTranslation kernelObjects;
mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION],
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str());
@@ -462,6 +462,122 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenContextProvidesProperHint) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.PrintDriverDiagnostics.set(1);
auto pDevice = castToObject<ClDevice>(devices[0]);
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
MockKernelWithInternals mockKernel(*pDevice, context);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended->argName = "arg0";
mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
testing::internal::CaptureStdout();
KernelObjsForAuxTranslation kernelObjects;
mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION],
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str());
std::string output = testing::internal::GetCapturedStdout();
EXPECT_NE(0u, output.size());
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKernelObjectWithGraphicsAllocationAccessedStatefullyOnlyThenDontReportAnyHint) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.PrintDriverDiagnostics.set(1);
auto pDevice = castToObject<ClDevice>(devices[0]);
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
MockKernelWithInternals mockKernel(*pDevice, context);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended->argName = "arg0";
mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
testing::internal::CaptureStdout();
KernelObjsForAuxTranslation kernelObjects;
mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.size());
}
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenDontReportAnyHint) {
auto pDevice = castToObject<ClDevice>(devices[0]);
cl_device_id clDevice = pDevice;
auto context = Context::create<MockContext>(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal);
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
MockKernelWithInternals mockKernel(*pDevice, context);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended->argName = "arg0";
mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
testing::internal::CaptureStdout();
KernelObjsForAuxTranslation kernelObjects;
mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.size());
context->release();
}
TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNullGfxAllocationThenDontReportAnyHint) {
auto pDevice = castToObject<ClDevice>(devices[0]);
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
MockKernelWithInternals mockKernel(*pDevice, context);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended->argName = "arg0";
mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr);
testing::internal::CaptureStdout();
KernelObjsForAuxTranslation kernelObjects;
mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.size());
}
TEST_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) {
cl_int retVal;
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();

View File

@@ -736,7 +736,7 @@ HWTEST_F(HwHelperTest, whenQueryingMaxNumSamplersThenReturnSixteen) {
HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) {
DebugManagerStateRestore restore;
MockBuffer buffer;
MemObjsForAuxTranslation memObjects;
KernelObjsForAuxTranslation kernelObjects;
MultiDispatchInfo multiDispatchInfo;
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
@@ -745,10 +745,10 @@ HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenChec
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
multiDispatchInfo.setMemObjsForAuxTranslation(memObjects);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects);
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
memObjects.insert(&buffer);
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer});
EXPECT_TRUE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
hwInfo.capabilityTable.blitterOperationsSupported = false;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -385,7 +385,39 @@ TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessT
}
}
TEST_F(KernelArgBufferTest, givenInvalidMemObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->kernelArgInfo[0].pureStatefulBufferAccess = pureStatefulBufferAccess;
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->kernelArgInfo[0].pureStatefulBufferAccess = pureStatefulBufferAccess;
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
}
}
TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
KernelInfo kernelInfo;
MockKernel emptyKernel(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0));
EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory());
@@ -395,6 +427,9 @@ TEST_F(KernelArgBufferTest, givenInvalidMemObjWhenHasDirectStatelessAccessToHost
pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ;
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ;
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
}
TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -270,8 +270,8 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
{
void *addressToPatch = svmAlloc.getUnderlyingBuffer();
size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize();
Buffer::setSurfaceState(pDevice, &expectedSurfaceState, sizeToPatch,
addressToPatch, 0, &svmAlloc, 0, 0);
Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false,
sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0);
}
// verify ssh was properly patched
@@ -426,7 +426,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
EXPECT_EQ(0U, *expectedOffsetPatchPtr);
}
Buffer::setSurfaceState(device.get(), &expectedSurfaceState, svmAlloc.getUnderlyingBufferSize(),
Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(),
svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0);
// verify ssh was properly patched

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1210,7 +1210,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenBufferSetSurfaceThatMemo
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1229,7 +1229,7 @@ HWTEST_F(BufferSetSurfaceTests, givenDebugVariableToDisableCachingForStatefulBuf
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1249,7 +1249,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToC
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, size, offsetedPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1268,7 +1268,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTo
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1287,7 +1287,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCach
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1306,7 +1306,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTh
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
auto width = surfaceState.getWidth();
EXPECT_EQ(alignUp(width, 4), width);
@@ -1324,7 +1324,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvm
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, offset, &svmAlloc, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0);
auto baseAddress = surfaceState.getSurfaceBaseAddress();
EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress);
@@ -1340,7 +1340,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenB
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
auto surfType = surfaceState.getSurfaceType();
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType);
@@ -1353,7 +1353,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNull
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
auto surfType = surfaceState.getSurfaceType();
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType);
@@ -1587,7 +1587,7 @@ HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgramm
uintptr_t ptr = 0xfffff000;
void *svmPtr = reinterpret_cast<void *>(ptr);
Buffer::setSurfaceState(device.get(), &surfaceState, 5, svmPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress());
SURFACE_STATE_BUFFER_LENGTH length = {};
@@ -1604,7 +1604,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBei
MockContext context;
void *svmPtr = reinterpret_cast<void *>(0x1005);
Buffer::setSurfaceState(device.get(), &surfaceState, 5, svmPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState());
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -59,7 +59,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
}
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
Buffer::setSurfaceState(this->device.get(), memory, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
}
GraphicsAllocation *externalAlloc = nullptr;
};
@@ -80,7 +80,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
Buffer::setSurfaceState(this->device.get(), memory, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
}
};
@@ -100,7 +100,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
Buffer::setSurfaceState(this->device.get(), memory, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
}
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,12 +10,15 @@
#include "shared/source/device/device.h"
#include "shared/source/helpers/string.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/test/unit_test/mocks/mock_graphics_allocation.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
#include "opencl/source/platform/platform.h"
#include "opencl/source/program/block_kernel_manager.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
@@ -23,6 +26,32 @@
namespace NEO {
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv);
struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation {
MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) {
if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
mockBuffer.reset(new MockBuffer);
this->object = mockBuffer.get();
} else {
DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
mockGraphicsAllocation.reset(new MockGraphicsAllocation(nullptr, 0x100));
this->object = mockGraphicsAllocation.get();
}
};
MockKernelObjForAuxTranslation(Type type, size_t size) : MockKernelObjForAuxTranslation(type) {
if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
mockBuffer->getGraphicsAllocation(0)->setSize(size);
} else {
DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
mockGraphicsAllocation->setSize(size);
}
}
std::unique_ptr<MockBuffer> mockBuffer = nullptr;
std::unique_ptr<MockGraphicsAllocation> mockGraphicsAllocation = nullptr;
};
////////////////////////////////////////////////////////////////////////////////
// Kernel - Core implementation
////////////////////////////////////////////////////////////////////////////////