Add pipe control before and after buffer translation
Change-Id: I4ee32c410e1ac2bcdb3ceae203cd461de79146a5
This commit is contained in:
parent
c9e667d601
commit
4b1871bf0e
|
@ -557,6 +557,9 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
||||||
|
|
||||||
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||||
AuxTranslationDirection auxTranslationDirection) {
|
AuxTranslationDirection auxTranslationDirection) {
|
||||||
|
if (!multiDispatchInfo.empty()) {
|
||||||
|
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
|
||||||
|
}
|
||||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
||||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dispatchParams;
|
BuiltinDispatchInfoBuilder::BuiltinOpParams dispatchParams;
|
||||||
|
|
||||||
|
@ -564,6 +567,8 @@ void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo,
|
||||||
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
||||||
|
|
||||||
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
|
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
|
||||||
|
|
||||||
|
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes) {
|
void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes) {
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "runtime/helpers/base_object.h"
|
#include "runtime/helpers/base_object.h"
|
||||||
#include "runtime/helpers/engine_control.h"
|
#include "runtime/helpers/engine_control.h"
|
||||||
#include "runtime/helpers/task_information.h"
|
#include "runtime/helpers/task_information.h"
|
||||||
|
#include "runtime/helpers/dispatch_info.h"
|
||||||
#include "instrumentation.h"
|
#include "instrumentation.h"
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
@ -27,6 +28,7 @@ class Kernel;
|
||||||
class MemObj;
|
class MemObj;
|
||||||
class PerformanceCounters;
|
class PerformanceCounters;
|
||||||
struct CompletionStamp;
|
struct CompletionStamp;
|
||||||
|
struct MultiDispatchInfo;
|
||||||
|
|
||||||
enum class QueuePriority {
|
enum class QueuePriority {
|
||||||
LOW,
|
LOW,
|
||||||
|
|
|
@ -63,7 +63,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
||||||
builtInLock.takeOwnership(builder, this->context);
|
builtInLock.takeOwnership(builder, this->context);
|
||||||
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation);
|
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation);
|
||||||
dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::AuxToNonAux);
|
if (!memObjsForAuxTranslation.empty()) {
|
||||||
|
dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::AuxToNonAux);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||||
|
@ -85,7 +87,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||||
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::NonAuxToAux);
|
if (!memObjsForAuxTranslation.empty()) {
|
||||||
|
dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::AuxToNonAux);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -387,8 +387,14 @@ template <typename GfxFamily>
|
||||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, cl_uint numEventsInWaitList, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, cl_uint numEventsInWaitList, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||||
size_t expectedSizeCS = 0;
|
size_t expectedSizeCS = 0;
|
||||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||||
|
if (multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->isAuxTranslationRequired()) {
|
||||||
|
expectedSizeCS += sizeof(PIPE_CONTROL);
|
||||||
|
}
|
||||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
||||||
|
if (dispatchInfo.isPipeControlRequired()) {
|
||||||
|
expectedSizeCS += sizeof(PIPE_CONTROL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (parentKernel) {
|
if (parentKernel) {
|
||||||
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
|
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
|
||||||
|
|
|
@ -30,6 +30,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
LinearStream *commandStream = nullptr;
|
LinearStream *commandStream = nullptr;
|
||||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||||
|
auto mainKernel = multiDispatchInfo.peekMainKernel();
|
||||||
|
|
||||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
// Compute local workgroup sizes
|
// Compute local workgroup sizes
|
||||||
|
@ -109,10 +110,16 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
|
|
||||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||||
|
|
||||||
|
if (mainKernel->isAuxTranslationRequired()) {
|
||||||
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
|
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||||
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
}
|
||||||
|
|
||||||
size_t currentDispatchIndex = 0;
|
size_t currentDispatchIndex = 0;
|
||||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
auto &kernel = *dispatchInfo.getKernel();
|
auto &kernel = *dispatchInfo.getKernel();
|
||||||
|
|
||||||
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
|
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
|
||||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
|
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
|
||||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2));
|
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2));
|
||||||
|
@ -152,7 +159,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
*kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y);
|
*kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y);
|
||||||
*kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z);
|
*kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z);
|
||||||
|
|
||||||
if ((&kernel == multiDispatchInfo.peekMainKernel()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) {
|
if ((&kernel == mainKernel) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) {
|
||||||
*kernel.localWorkSizeX = static_cast<uint32_t>(lws.x);
|
*kernel.localWorkSizeX = static_cast<uint32_t>(lws.x);
|
||||||
*kernel.localWorkSizeY = static_cast<uint32_t>(lws.y);
|
*kernel.localWorkSizeY = static_cast<uint32_t>(lws.y);
|
||||||
*kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z);
|
*kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z);
|
||||||
|
@ -166,7 +173,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
*kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y);
|
*kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y);
|
||||||
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
|
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
|
||||||
|
|
||||||
if (&kernel == multiDispatchInfo.peekMainKernel()) {
|
if (&kernel == mainKernel) {
|
||||||
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
|
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
|
||||||
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
|
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
|
||||||
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);
|
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);
|
||||||
|
@ -231,6 +238,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
GpgpuWalkerHelper<GfxFamily>::adjustWalkerData(commandStream, walkerCmd, kernel, dispatchInfo);
|
GpgpuWalkerHelper<GfxFamily>::adjustWalkerData(commandStream, walkerCmd, kernel, dispatchInfo);
|
||||||
|
|
||||||
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
||||||
|
if (dispatchInfo.isPipeControlRequired()) {
|
||||||
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
|
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||||
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
}
|
||||||
currentDispatchIndex++;
|
currentDispatchIndex++;
|
||||||
}
|
}
|
||||||
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||||
|
|
|
@ -25,6 +25,8 @@ class DispatchInfo {
|
||||||
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
|
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
|
||||||
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
||||||
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
||||||
|
bool isPipeControlRequired() const { return pipeControlRequired; }
|
||||||
|
void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; }
|
||||||
bool usesSlm() const;
|
bool usesSlm() const;
|
||||||
bool usesStatelessPrintfSurface() const;
|
bool usesStatelessPrintfSurface() const;
|
||||||
uint32_t getRequiredScratchSize() const;
|
uint32_t getRequiredScratchSize() const;
|
||||||
|
@ -50,6 +52,7 @@ class DispatchInfo {
|
||||||
void setStartOfWorkgroups(const Vec3<size_t> &swgs) { this->swgs = swgs; }
|
void setStartOfWorkgroups(const Vec3<size_t> &swgs) { this->swgs = swgs; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
bool pipeControlRequired = false;
|
||||||
Kernel *kernel = nullptr;
|
Kernel *kernel = nullptr;
|
||||||
uint32_t dim = 0;
|
uint32_t dim = 0;
|
||||||
|
|
||||||
|
@ -106,14 +109,38 @@ struct MultiDispatchInfo {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DispatchInfo *begin() {
|
||||||
|
return dispatchInfos.begin();
|
||||||
|
}
|
||||||
|
|
||||||
const DispatchInfo *begin() const {
|
const DispatchInfo *begin() const {
|
||||||
return dispatchInfos.begin();
|
return dispatchInfos.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::reverse_iterator<DispatchInfo *> rbegin() {
|
||||||
|
return dispatchInfos.rbegin();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse_iterator<const DispatchInfo *> crbegin() const {
|
||||||
|
return dispatchInfos.crbegin();
|
||||||
|
}
|
||||||
|
|
||||||
|
DispatchInfo *end() {
|
||||||
|
return dispatchInfos.end();
|
||||||
|
}
|
||||||
|
|
||||||
const DispatchInfo *end() const {
|
const DispatchInfo *end() const {
|
||||||
return dispatchInfos.end();
|
return dispatchInfos.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::reverse_iterator<DispatchInfo *> rend() {
|
||||||
|
return dispatchInfos.rend();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse_iterator<const DispatchInfo *> crend() const {
|
||||||
|
return dispatchInfos.crend();
|
||||||
|
}
|
||||||
|
|
||||||
void push(const DispatchInfo &dispatchInfo) {
|
void push(const DispatchInfo &dispatchInfo) {
|
||||||
dispatchInfos.push_back(dispatchInfo);
|
dispatchInfos.push_back(dispatchInfo);
|
||||||
}
|
}
|
||||||
|
|
|
@ -372,7 +372,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||||
return usingImagesOnly;
|
return usingImagesOnly;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &buffersForAuxTranslation);
|
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation);
|
||||||
|
|
||||||
bool requiresCacheFlushCommand() const;
|
bool requiresCacheFlushCommand() const;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,8 @@ class StackVec {
|
||||||
public:
|
public:
|
||||||
using iterator = DataType *;
|
using iterator = DataType *;
|
||||||
using const_iterator = const DataType *;
|
using const_iterator = const DataType *;
|
||||||
|
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||||
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||||
|
|
||||||
static const size_t onStackCaps = OnStackCapacity;
|
static const size_t onStackCaps = OnStackCapacity;
|
||||||
|
|
||||||
|
@ -181,6 +183,14 @@ class StackVec {
|
||||||
return onStackMem;
|
return onStackMem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reverse_iterator rbegin() {
|
||||||
|
return reverse_iterator(end());
|
||||||
|
}
|
||||||
|
|
||||||
|
const_reverse_iterator crbegin() const {
|
||||||
|
return const_reverse_iterator(end());
|
||||||
|
}
|
||||||
|
|
||||||
const_iterator begin() const {
|
const_iterator begin() const {
|
||||||
if (dynamicMem) {
|
if (dynamicMem) {
|
||||||
return dynamicMem->data();
|
return dynamicMem->data();
|
||||||
|
@ -197,6 +207,14 @@ class StackVec {
|
||||||
return onStackMem + onStackSize;
|
return onStackMem + onStackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reverse_iterator rend() {
|
||||||
|
return reverse_iterator(begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
const_reverse_iterator crend() const {
|
||||||
|
return const_reverse_iterator(begin());
|
||||||
|
}
|
||||||
|
|
||||||
const_iterator end() const {
|
const_iterator end() const {
|
||||||
if (dynamicMem) {
|
if (dynamicMem) {
|
||||||
return dynamicMem->data() + dynamicMem->size();
|
return dynamicMem->data() + dynamicMem->size();
|
||||||
|
|
|
@ -1108,3 +1108,38 @@ HWTEST_F(DispatchWalkerTest, WhenCallingDefaultWaMethodsThenExpectNothing) {
|
||||||
size_t actualSize = GpgpuWalkerHelper<GENX>::getSizeForWADisableLSQCROPERFforOCL(&kernel);
|
size_t actualSize = GpgpuWalkerHelper<GENX>::getSizeForWADisableLSQCROPERFforOCL(&kernel);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationWithoutParentKernelThenPipeControlAdded) {
|
||||||
|
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||||
|
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||||
|
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||||
|
|
||||||
|
auto &cmdStream = pCmdQ->getCS(0);
|
||||||
|
void *buffer = cmdStream.getCpuBase();
|
||||||
|
kernel.auxTranslationRequired = true;
|
||||||
|
|
||||||
|
MockMultiDispatchInfo multiDispatchInfo(&kernel);
|
||||||
|
DispatchInfo di1(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
|
||||||
|
di1.setPipeControlRequired(true);
|
||||||
|
multiDispatchInfo.push(di1);
|
||||||
|
|
||||||
|
HardwareInterface<FamilyType>::dispatchWalker(
|
||||||
|
*pCmdQ,
|
||||||
|
multiDispatchInfo,
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
pDevice->getPreemptionMode(),
|
||||||
|
false);
|
||||||
|
|
||||||
|
auto sizeUsed = cmdStream.getUsed();
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed));
|
||||||
|
|
||||||
|
auto itorCmd = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_NE(cmdList.end(), itorCmd);
|
||||||
|
}
|
||||||
|
|
|
@ -665,6 +665,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
|
||||||
Kernel *lastKernel = nullptr;
|
Kernel *lastKernel = nullptr;
|
||||||
for (const auto &dispatchInfo : multiDispatchInfo) {
|
for (const auto &dispatchInfo : multiDispatchInfo) {
|
||||||
lastKernel = dispatchInfo.getKernel();
|
lastKernel = dispatchInfo.getKernel();
|
||||||
|
dispatchInfos.emplace_back(dispatchInfo);
|
||||||
}
|
}
|
||||||
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), memObjsForAuxTranslation, auxTranslationDirection);
|
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), memObjsForAuxTranslation, auxTranslationDirection);
|
||||||
}
|
}
|
||||||
|
@ -674,31 +675,20 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
|
||||||
CommandQueueHw<FamilyType>::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
|
CommandQueueHw<FamilyType>::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<DispatchInfo> dispatchInfos;
|
||||||
std::vector<std::tuple<Kernel *, size_t, MemObjsForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
|
std::vector<std::tuple<Kernel *, size_t, MemObjsForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
|
||||||
uint32_t waitCalled = 0;
|
uint32_t waitCalled = 0;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
|
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationAndWithoutArgumentsWhenEnqueuedThenNoGuardKernelWithAuxTranslations) {
|
||||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||||
MyCmdQ<FamilyType> cmdQ(context, pDevice);
|
MyCmdQ<FamilyType> cmdQ(context, pDevice);
|
||||||
size_t gws[3] = {1, 0, 0};
|
size_t gws[3] = {1, 0, 0};
|
||||||
|
|
||||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size());
|
||||||
|
|
||||||
// before kernel
|
|
||||||
EXPECT_EQ(0u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0)));
|
|
||||||
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(0)));
|
|
||||||
|
|
||||||
// after kernel
|
|
||||||
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1)));
|
|
||||||
EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(1)));
|
|
||||||
|
|
||||||
mockKernel.mockKernel->auxTranslationRequired = false;
|
|
||||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
|
||||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // not changed
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||||
|
@ -738,11 +728,20 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
|
||||||
|
|
||||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
||||||
|
|
||||||
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
|
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
|
||||||
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
|
EXPECT_EQ(1u, std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
|
||||||
|
|
||||||
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
|
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
|
||||||
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
|
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
|
||||||
|
uint32_t pipeControlCount = 0;
|
||||||
|
for (auto dispatchInfo : cmdQ.dispatchInfos) {
|
||||||
|
if (dispatchInfo.isPipeControlRequired()) {
|
||||||
|
++pipeControlCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(4u, pipeControlCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) {
|
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) {
|
||||||
|
@ -821,6 +820,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTrans
|
||||||
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer0.getGraphicsAllocation()->getAllocationType());
|
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer0.getGraphicsAllocation()->getAllocationType());
|
||||||
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, buffer1.getGraphicsAllocation()->getAllocationType());
|
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, buffer1.getGraphicsAllocation()->getAllocationType());
|
||||||
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer2.getGraphicsAllocation()->getAllocationType());
|
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer2.getGraphicsAllocation()->getAllocationType());
|
||||||
|
uint32_t pipeControlCount = 0;
|
||||||
|
for (auto dispatchInfo : cmdQ.dispatchInfos) {
|
||||||
|
if (dispatchInfo.isPipeControlRequired()) {
|
||||||
|
++pipeControlCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, pipeControlCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -307,4 +307,21 @@ TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParen
|
||||||
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
|
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
|
||||||
EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel());
|
EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MultiDispatchInfo multiDispatchInfo;
|
||||||
|
multiDispatchInfo.push(parentKernelDispatchInfo);
|
||||||
|
multiDispatchInfo.push(baseDispatchInfo);
|
||||||
|
multiDispatchInfo.push(builtInDispatchInfo);
|
||||||
|
|
||||||
|
std::reverse_iterator<DispatchInfo *> rend = multiDispatchInfo.rend();
|
||||||
|
std::reverse_iterator<const DispatchInfo *> crend = multiDispatchInfo.crend();
|
||||||
|
std::reverse_iterator<DispatchInfo *> rbegin = multiDispatchInfo.rbegin();
|
||||||
|
std::reverse_iterator<const DispatchInfo *> crbegin = multiDispatchInfo.crbegin();
|
||||||
|
|
||||||
|
EXPECT_EQ(rbegin.base(), multiDispatchInfo.end());
|
||||||
|
EXPECT_EQ(crbegin.base(), multiDispatchInfo.end());
|
||||||
|
EXPECT_EQ(rend.base(), multiDispatchInfo.begin());
|
||||||
|
EXPECT_EQ(crend.base(), multiDispatchInfo.begin());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1429,6 +1429,17 @@ TEST(StackVec, Clear) {
|
||||||
ASSERT_EQ(0U, v2.size());
|
ASSERT_EQ(0U, v2.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StackVec, ReverseBeginningFunctions) {
|
||||||
|
using VecType = StackVec<int, 1>;
|
||||||
|
VecType v;
|
||||||
|
v.push_back(5);
|
||||||
|
|
||||||
|
ASSERT_EQ(v.begin(), v.rend().base());
|
||||||
|
ASSERT_EQ(v.end(), v.rbegin().base());
|
||||||
|
ASSERT_EQ(v.begin(), v.crend().base());
|
||||||
|
ASSERT_EQ(v.end(), v.crbegin().base());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(StackVec, ConstMemberFunctions) {
|
TEST(StackVec, ConstMemberFunctions) {
|
||||||
using VecType = StackVec<int, 3>;
|
using VecType = StackVec<int, 3>;
|
||||||
VecType v;
|
VecType v;
|
||||||
|
|
Loading…
Reference in New Issue