Initial Blit aux translation support

Change-Id: I67fb71be57cff28a3736d5ffb9e1c39b2498feb8
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-11-09 19:02:25 +01:00
committed by sys_ocldev
parent 533cf92d12
commit 46b5513028
17 changed files with 605 additions and 11 deletions

2
Jenkinsfile vendored
View File

@@ -1,5 +1,5 @@
#!groovy
dependenciesRevision='e3bce757f3edc77263cc431a1dceb1b2cd0701dc-1335'
strategy='EQUAL'
allowedCD=259
allowedCD=261
allowedF=7

View File

@@ -180,6 +180,8 @@ class HwHelperHw : public HwHelper {
static AuxTranslationMode getAuxTranslationMode();
static bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
protected:
HwHelperHw() = default;
};

View File

@@ -14,6 +14,7 @@
#include "runtime/execution_environment/execution_environment.h"
#include "runtime/gmm_helper/gmm.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/os_interface/os_interface.h"
@@ -162,6 +163,13 @@ AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode() {
return AuxTranslationMode::Builtin;
}
template <typename Family>
bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo) {
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
multiDispatchInfo.getMemObjsForAuxTranslation() &&
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
}
template <typename Family>
typename Family::PIPE_CONTROL *PipeControlHelper<Family>::obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
POST_SYNC_OPERATION operation,

View File

@@ -62,12 +62,16 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
protected:
using RegisteredMethodDispatcherT = RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT,
DispatchInfo::EstimateCommandsMethodT>;
template <typename GfxFamily, bool dcFlush>
static void dispatchPipeControl(LinearStream &linearStream) {
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *) {
PipeControlHelper<GfxFamily>::addPipeControl(linearStream, dcFlush);
}
template <typename GfxFamily>
static size_t getSizeForSinglePipeControl(const MemObjsForAuxTranslation *) {
return PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl();
}
template <typename GfxFamily>
void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const {
if (dcFlush) {
@@ -75,7 +79,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
} else {
dispatcher.registerMethod(this->dispatchPipeControl<GfxFamily, false>);
}
dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl);
dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl<GfxFamily>);
}
void resizeKernelInstances(size_t size) const;

View File

@@ -334,6 +334,7 @@ class CommandQueueHw : public CommandQueue {
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@@ -391,6 +392,7 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList, cl_event *event);
MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection);
void setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo);
MOCKABLE_VIRTUAL bool forceStateless(size_t size);
@@ -420,6 +422,10 @@ class CommandQueueHw : public CommandQueue {
return commandStream;
}
void processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer,
TimestampPacketDependencies &timestampPacketDependencies, const EventsRequest &eventsRequest,
bool queueBlocked);
private:
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;

View File

@@ -122,4 +122,19 @@ bool CommandQueueHw<Family>::forceStateless(size_t size) {
return size >= 4ull * MemoryConstants::gigaByte;
}
template <typename Family>
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::AuxToNonAux>);
multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod(
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod(
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::NonAuxToAux>);
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod(
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
}
} // namespace NEO

View File

@@ -91,6 +91,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
setupBlitAuxTranslation(multiDispatchInfo);
}
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
}
@@ -219,8 +223,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
auto commandStreamStart = commandStream.getUsed();
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies,
eventsRequest, blockQueue);
}
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
}
bool flushDependenciesForNonKernelCommand = false;
@@ -273,6 +283,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
commandStreamStart,
blocking,
multiDispatchInfo,
enqueueProperties,
timestampPacketDependencies,
eventsRequest,
eventBuilder,
@@ -461,6 +472,51 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
return blitProperties;
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo,
BlitPropertiesContainer &blitPropertiesContainer,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest, bool queueBlocked) {
auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
blitPropertiesContainer.resize(numBuffers * 2);
auto bufferIndex = 0;
for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
{
// Aux to NonAux
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
buffer->getGraphicsAllocation());
auto auxToNonAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
blitPropertiesContainer[bufferIndex].outputTimestampPacket = auxToNonAuxNode;
}
{
// NonAux to Aux
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
buffer->getGraphicsAllocation());
auto nonAuxToAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
blitPropertiesContainer[bufferIndex + numBuffers].outputTimestampPacket = nonAuxToAuxNode;
}
bufferIndex++;
}
if (!queueBlocked) {
getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
// wait for barrier and events before AuxToNonAux
blitPropertiesContainer[0].csrDependencies.push_back(&timestampPacketDependencies.barrierNodes);
blitPropertiesContainer[0].csrDependencies.fillFromEventsRequest(eventsRequest, *getBcsCommandStreamReceiver(),
CsrDependencies::DependenciesType::All);
// wait for NDR before NonAuxToAux
blitPropertiesContainer[numBuffers].csrDependencies.push_back(this->timestampPacketContainer.get());
}
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
@@ -578,6 +634,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@@ -669,7 +726,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
DispatchFlags dispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
{}, //pipelineSelectArgs
this->flushStamp->getStampReference(), //flushStampReference
getThrottle(), //throttle
@@ -715,6 +772,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
gtpinNotifyPreFlushTask(this);
}
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false);
}
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask(
commandStream,

View File

@@ -199,8 +199,8 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
for (auto &dispatchInfo : multiDispatchInfo) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize();
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize();
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
}
if (parentKernel) {
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());

View File

@@ -82,7 +82,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
size_t currentDispatchIndex = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.dispatchInitCommands(*commandStream);
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies);
bool isMainKernel = (dispatchInfo.getKernel() == mainKernel);
dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel,
@@ -90,7 +90,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh);
currentDispatchIndex++;
dispatchInfo.dispatchEpilogueCommands(*commandStream);
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies);
}
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
uint64_t postSyncAddress = 0;

View File

@@ -20,12 +20,13 @@
namespace NEO {
class Kernel;
struct TimestampPacketDependencies;
class DispatchInfo {
public:
using DispatchCommandMethodT = void(LinearStream &commandStream);
using EstimateCommandsMethodT = size_t(void);
using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies);
using EstimateCommandsMethodT = size_t(const MemObjsForAuxTranslation *);
DispatchInfo() = default;
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)

View File

@@ -30,6 +30,7 @@ struct EnqueueProperties {
if (hasKernels) {
operation = Operation::GpuKernel;
this->blitPropertiesContainer = blitPropertiesContainer;
return;
}

View File

@@ -94,6 +94,8 @@ class TimestampPacketContainer : public NonCopyableClass {
struct TimestampPacketDependencies : public NonCopyableClass {
TimestampPacketContainer previousEnqueueNodes;
TimestampPacketContainer barrierNodes;
TimestampPacketContainer auxToNonAuxNodes;
TimestampPacketContainer nonAuxToAuxNodes;
};
struct TimestampPacketHelper {
@@ -121,6 +123,23 @@ struct TimestampPacketHelper {
}
}
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
const TimestampPacketDependencies *timestampPacketDependencies) {
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
? timestampPacketDependencies->auxToNonAuxNodes
: timestampPacketDependencies->nonAuxToAuxNodes;
for (auto &node : container.peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
}
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(const MemObjsForAuxTranslation *memObjsForAuxTranslation) {
return memObjsForAuxTranslation->size() * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependency() {
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
@@ -136,4 +155,5 @@ struct TimestampPacketHelper {
return totalNodesCount * getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
}
};
} // namespace NEO

View File

@@ -7,6 +7,7 @@
set(IGDRCL_SRCS_tests_command_queue
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp

View File

@@ -0,0 +1,441 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/unit_tests/helpers/debug_manager_state_restore.h"
#include "core/unit_tests/utilities/base_object_utils.h"
#include "test.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_device.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_timestamp_container.h"
using namespace NEO;
struct BlitAuxTranslationTests : public ::testing::Test {
class BcsMockContext : public MockContext {
public:
BcsMockContext(Device *device) : MockContext(device) {
bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false));
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex()));
bcsCsr->setupContext(*bcsOsContext);
bcsCsr->initializeTagAllocation();
}
CommandStreamReceiver *getCommandStreamReceiverForBlitOperation(MemObj &memObj) const override {
return bcsCsr.get();
}
BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override {
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, memory, 0, nullptr,
hostPtr, 0, 0, size);
BlitPropertiesContainer container;
container.push_back(blitProperties);
bcsCsr->blitBuffer(container, true);
return BlitOperationResult::Success;
}
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<CommandStreamReceiver> bcsCsr;
};
template <typename FamilyType>
void SetUpT() {
auto &hwHelper = HwHelper::get(platformDevices[0]->platform.eRenderCoreFamily);
if (is32bit || !hwHelper.requiresAuxResolves()) {
GTEST_SKIP();
}
DebugManager.flags.EnableTimestampPacket.set(1);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
DebugManager.flags.ForceAuxTranslationMode.set(1);
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto &capabilityTable = device->getExecutionEnvironment()->getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
capabilityTable.blitterOperationsSupported = true;
if (createBcsEngine) {
auto &engine = device->getEngine(HwHelperHw<FamilyType>::lowPriorityEngineType, true);
bcsOsContext.reset(OsContext::create(nullptr, 1, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false));
engine.osContext = bcsOsContext.get();
engine.commandStreamReceiver->setupContext(*bcsOsContext);
}
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
commandQueue.reset(mockCmdQueue);
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
mockKernel->mockKernel->auxTranslationRequired = true;
auto mockProgram = mockKernel->mockProgram;
mockProgram->setAllowNonUniform(true);
gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver;
bcsCsr = mockCmdQueue->bcsEngine->commandStreamReceiver;
}
template <typename FamilyType>
void TearDownT() {}
template <size_t N>
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) {
mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size());
}
mockKernel->mockKernel->initialize();
for (uint32_t i = 0; i < buffers.size(); i++) {
cl_mem clMem = buffers[i];
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
}
}
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
if (compressed) {
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
} else {
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
}
return buffer;
}
template <typename Family>
GenCmdList getCmdList(LinearStream &linearStream) {
HardwareParse hwParser;
hwParser.parseCommands<Family>(linearStream);
return hwParser.cmdList;
}
template <typename Family>
GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
PIPE_CONTROL *pipeControlCmd = nullptr;
GenCmdList::iterator commandItor = itorStart;
bool stallingWrite = false;
do {
commandItor = find<PIPE_CONTROL *>(commandItor, itorEnd);
if (itorEnd == commandItor) {
EXPECT_TRUE(false);
return itorEnd;
}
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*commandItor);
stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
pipeControlCmd->getCommandStreamerStallEnable();
++commandItor;
} while (!stallingWrite);
return --commandItor;
}
template <typename Command>
GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto commandItor = find<Command *>(itorStart, itorEnd);
EXPECT_TRUE(commandItor != itorEnd);
return commandItor;
}
template <typename Family>
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
DebugManagerStateRestore restore;
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<MockDevice> device;
std::unique_ptr<BcsMockContext> bcsMockContext;
std::unique_ptr<CommandQueue> commandQueue;
std::unique_ptr<MockKernelWithInternals> mockKernel;
CommandStreamReceiver *bcsCsr = nullptr;
CommandStreamReceiver *gpgpuCsr = nullptr;
size_t gws[3] = {63, 0, 0};
size_t lws[3] = {16, 0, 0};
uint32_t hostPtr = 0;
cl_int retVal = CL_SUCCESS;
};
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
auto initialBcsTaskCount = mockCmdQ->bcsTaskCount;
mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
EXPECT_EQ(mockCmdQ->bcsTaskCount, initialBcsTaskCount + 1);
// Gpgpu command buffer
{
auto cmdListCsr = getCmdList<FamilyType>(gpgpuCsr->getCS(0));
auto cmdListQueue = getCmdList<FamilyType>(commandQueue->getCS(0));
// Barrier
expectPipeControl<FamilyType>(cmdListCsr.begin(), cmdListCsr.end());
// Aux to NonAux
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListQueue.begin(), cmdListQueue.end());
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
// Walker
cmdFound = expectCommand<WALKER_TYPE>(++cmdFound, cmdListQueue.end());
cmdFound = expectCommand<WALKER_TYPE>(++cmdFound, cmdListQueue.end());
// NonAux to Aux
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
// task count
expectPipeControl<FamilyType>(++cmdFound, cmdListQueue.end());
}
// BCS command buffer
{
auto cmdList = getCmdList<FamilyType>(bcsCsr->getCS(0));
// Barrier
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdList.begin(), cmdList.end());
// Aux to NonAux
cmdFound = expectCommand<XY_COPY_BLT>(++cmdFound, cmdList.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdList.end());
cmdFound = expectCommand<XY_COPY_BLT>(++cmdFound, cmdList.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdList.end());
// wait for NDR
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdList.end());
// NonAux to Aux
cmdFound = expectCommand<XY_COPY_BLT>(++cmdFound, cmdList.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdList.end());
cmdFound = expectCommand<XY_COPY_BLT>(++cmdFound, cmdList.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdList.end());
// taskCount
expectCommand<MI_FLUSH_DW>(++cmdFound, cmdList.end());
}
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBarrier) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto buffer = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 1>{{buffer.get()}});
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
auto cmdListCsr = getCmdList<FamilyType>(gpgpuCsr->getCS(0));
auto pipeControl = expectPipeControl<FamilyType>(cmdListCsr.begin(), cmdListCsr.end());
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
uint64_t low = pipeControlCmd->getAddress();
uint64_t high = pipeControlCmd->getAddressHigh();
uint64_t barrierGpuAddress = (high << 32) | low;
auto cmdList = getCmdList<FamilyType>(bcsCsr->getCS(0));
auto semaphore = expectCommand<MI_SEMAPHORE_WAIT>(cmdList.begin(), cmdList.end());
verifySemaphore<FamilyType>(semaphore, barrierGpuAddress);
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 2>{{buffer0.get(), buffer1.get()}});
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
uint64_t auxToNonAuxOutputAddress[2] = {};
uint64_t nonAuxToAuxOutputAddress[2] = {};
{
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0));
auto cmdFound = expectCommand<XY_COPY_BLT>(cmdListBcs.begin(), cmdListBcs.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdListBcs.end());
auto miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress();
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdListBcs.end());
miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress();
cmdFound = expectCommand<XY_COPY_BLT>(++cmdFound, cmdListBcs.end());
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdListBcs.end());
miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress();
cmdFound = expectCommand<MI_FLUSH_DW>(++cmdFound, cmdListBcs.end());
miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress();
}
{
auto cmdListQueue = getCmdList<FamilyType>(commandQueue->getCS(0));
// Aux to NonAux
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListQueue.begin(), cmdListQueue.end());
verifySemaphore<FamilyType>(cmdFound, auxToNonAuxOutputAddress[0]);
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
verifySemaphore<FamilyType>(cmdFound, auxToNonAuxOutputAddress[1]);
// Walker
cmdFound = expectCommand<WALKER_TYPE>(++cmdFound, cmdListQueue.end());
// NonAux to Aux
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
verifySemaphore<FamilyType>(cmdFound, nonAuxToAuxOutputAddress[0]);
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
verifySemaphore<FamilyType>(cmdFound, nonAuxToAuxOutputAddress[1]);
}
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeKernel) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto buffer = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 1>{{buffer.get()}});
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0];
auto kernelNodeAddress = kernelNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto cmdList = getCmdList<FamilyType>(bcsCsr->getCS(0));
// Aux to nonAux
auto cmdFound = expectCommand<XY_COPY_BLT>(cmdList.begin(), cmdList.end());
// semaphore before NonAux to Aux
auto semaphore = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdList.end());
verifySemaphore<FamilyType>(semaphore, kernelNodeAddress);
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeEvents) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto buffer = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 1>{{buffer.get()}});
auto event = make_releaseable<Event>(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0);
MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1);
auto eventDependency = eventDependencyContainer.getNode(0);
event->addTimestampPacketNodes(eventDependencyContainer);
cl_event clEvent[] = {event.get()};
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, clEvent, nullptr);
auto eventDependencyAddress = eventDependency->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto cmdList = getCmdList<FamilyType>(bcsCsr->getCS(0));
// Barrier
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdList.begin(), cmdList.end());
// Event
auto semaphore = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdList.end());
verifySemaphore<FamilyType>(semaphore, eventDependencyAddress);
cmdFound = expectCommand<XY_COPY_BLT>(++semaphore, cmdList.end());
expectCommand<XY_COPY_BLT>(++cmdFound, cmdList.end());
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenOutEventWhenDispatchingThenAssignNonAuxNodes) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});
cl_event clEvent;
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent);
auto event = castToObject<Event>(clEvent);
auto &eventNodes = event->getTimestampPacketNodes()->peekNodes();
EXPECT_EQ(3u, eventNodes.size());
auto cmdListQueue = getCmdList<FamilyType>(commandQueue->getCS(0));
auto cmdFound = expectCommand<WALKER_TYPE>(cmdListQueue.begin(), cmdListQueue.end());
// NonAux to Aux
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
auto eventNodeAddress = eventNodes[1]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
verifySemaphore<FamilyType>(cmdFound, eventNodeAddress);
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(++cmdFound, cmdListQueue.end());
eventNodeAddress = eventNodes[2]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
verifySemaphore<FamilyType>(cmdFound, eventNodeAddress);
clReleaseEvent(clEvent);
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchingThenEstimateCmdBufferSize) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
MemObjsForAuxTranslation memObjects;
memObjects.insert(buffer0.get());
memObjects.insert(buffer2.get());
size_t numBuffersToEstimate = 2;
size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>();
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
mockCmdQ->storeMultiDispatchInfo = true;
mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo;
DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin();
DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin());
EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split
EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(&memObjects));
EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(&memObjects));
EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(&memObjects));
EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(&memObjects));
}

View File

@@ -673,6 +673,8 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
using CommandQueueHw<FamilyType>::commandStream;
using CommandQueueHw<FamilyType>::gpgpuEngine;
using CommandQueueHw<FamilyType>::bcsEngine;
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection);
@@ -807,9 +809,12 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue
HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhenDispatchingKernelWithRequiredAuxTranslationThenDontDispatch) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockKernelWithInternals mockKernel(*pDevice, context);
MyCmdQ<FamilyType> cmdQ(context, pDevice);
cmdQ.bcsEngine = cmdQ.gpgpuEngine;
size_t gws[3] = {1, 0, 0};
MockBuffer buffer;
cl_mem clMem = &buffer;

View File

@@ -14,12 +14,14 @@
#include "runtime/gmm_helper/gmm.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/resource_info.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "runtime/helpers/options.h"
#include "runtime/mem_obj/image.h"
#include "runtime/os_interface/os_interface.h"
#include "unit_tests/helpers/unit_test_helper.h"
#include "unit_tests/helpers/variable_backup.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_context.h"
#include <chrono>
@@ -681,6 +683,26 @@ TEST_F(HwHelperTest, givenVariousCachesRequestProperMOCSIndexesAreBeingReturned)
}
}
HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) {
DebugManagerStateRestore restore;
MockBuffer buffer;
MemObjsForAuxTranslation memObjects;
MultiDispatchInfo multiDispatchInfo;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
EXPECT_FALSE(HwHelperHw<FamilyType>::isBlitAuxTranslationRequired(multiDispatchInfo));
multiDispatchInfo.setMemObjsForAuxTranslation(memObjects);
EXPECT_FALSE(HwHelperHw<FamilyType>::isBlitAuxTranslationRequired(multiDispatchInfo));
memObjects.insert(&buffer);
EXPECT_TRUE(HwHelperHw<FamilyType>::isBlitAuxTranslationRequired(multiDispatchInfo));
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
EXPECT_FALSE(HwHelperHw<FamilyType>::isBlitAuxTranslationRequired(multiDispatchInfo));
}
HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidValue) {
bool tilingSupported = UnitTestHelper<FamilyType>::tiledImagesSupported;

View File

@@ -75,6 +75,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
public:
using BaseClass::bcsEngine;
using BaseClass::bcsTaskCount;
using BaseClass::commandStream;
using BaseClass::gpgpuEngine;
using BaseClass::multiEngineQueue;
@@ -130,8 +131,12 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
kernelParams = dispatchInfo.peekBuiltinOpParams();
lastCommandType = commandType;
for (auto &di : dispatchInfo) {
lastEnqueuedKernels.push_back(di.getKernel());
if (storeMultiDispatchInfo) {
storedMultiDispatchInfo.push(di);
}
}
}
@@ -144,9 +149,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
unsigned int lastCommandType;
std::vector<Kernel *> lastEnqueuedKernels;
MultiDispatchInfo storedMultiDispatchInfo;
size_t EnqueueWriteImageCounter = 0;
size_t EnqueueWriteBufferCounter = 0;
bool blockingWriteBuffer = false;
bool storeMultiDispatchInfo = false;
bool notifyEnqueueReadBufferCalled = false;
bool notifyEnqueueReadImageCalled = false;
bool cpuDataTransferHandlerCalled = false;