feature(ocl) use tags to synchronize multi root device events
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
fecb52ac49
commit
16bc84e27d
|
@ -391,7 +391,8 @@ class CommandQueueHw : public CommandQueue {
|
|||
EventsRequest &eventsRequest,
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr);
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode);
|
||||
|
||||
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
|
@ -422,7 +423,7 @@ class CommandQueueHw : public CommandQueue {
|
|||
TimestampPacketDependencies ×tampPacketDependencies,
|
||||
const EventsRequest &eventsRequest,
|
||||
LinearStream *commandStream,
|
||||
uint32_t commandType, bool queueBlocked);
|
||||
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync);
|
||||
void submitCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream,
|
||||
|
@ -433,6 +434,8 @@ class CommandQueueHw : public CommandQueue {
|
|||
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;
|
||||
void processSignalMultiRootDeviceNode(LinearStream *commandStream,
|
||||
TagNodeBase *node);
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
|
@ -473,7 +476,7 @@ class CommandQueueHw : public CommandQueue {
|
|||
blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
|
||||
} else {
|
||||
commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
|
||||
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0);
|
||||
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0, eventsRequest.outEvent);
|
||||
}
|
||||
return commandStream;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -45,7 +45,7 @@ bool CommandQueueHw<Family>::isCacheFlushCommand(uint32_t commandType) const {
|
|||
}
|
||||
|
||||
template <>
|
||||
LinearStream &getCommandStream<Family, CL_COMMAND_RESOURCE_BARRIER>(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) {
|
||||
LinearStream &getCommandStream<Family, CL_COMMAND_RESOURCE_BARRIER>(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) {
|
||||
size_t expectedSizeCS = 0;
|
||||
[[maybe_unused]] bool usePostSync = false;
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
|
|
@ -177,7 +177,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
|
||||
if (this->context->getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, computeCommandStreamReceiver);
|
||||
}
|
||||
|
||||
const bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo);
|
||||
|
@ -226,7 +226,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
}
|
||||
|
||||
if (this->context->getRootDeviceIndices().size() > 1) {
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, csrDeps);
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, csrDeps);
|
||||
}
|
||||
|
||||
if (enqueueWithBlitAuxTranslation) {
|
||||
|
@ -280,6 +280,17 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
} else if (isMarkerWithPostSyncWrite) {
|
||||
processDispatchForMarker(*this, &commandStream, eventsRequest, csrDeps);
|
||||
}
|
||||
TagNodeBase *multiRootEventSyncStamp = nullptr;
|
||||
if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
|
||||
multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode();
|
||||
if (!blockQueue) {
|
||||
this->getGpgpuCommandStreamReceiver().makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation());
|
||||
}
|
||||
processSignalMultiRootDeviceNode(&commandStream, multiRootEventSyncStamp);
|
||||
if (CL_COMMAND_MARKER == commandType) {
|
||||
flushDependenciesForNonKernelCommand = true;
|
||||
}
|
||||
}
|
||||
|
||||
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
|
||||
const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
|
||||
|
@ -382,7 +393,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
eventsRequest,
|
||||
eventBuilder,
|
||||
std::move(printfHandler),
|
||||
nullptr);
|
||||
nullptr,
|
||||
multiRootEventSyncStamp);
|
||||
}
|
||||
|
||||
if (deferredTimestampPackets.get()) {
|
||||
|
@ -497,7 +509,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
|
|||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TimestampPacketDependencies ×tampPacketDependencies,
|
||||
const EventsRequest &eventsRequest, LinearStream *commandStream,
|
||||
uint32_t commandType, bool queueBlocked) {
|
||||
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync) {
|
||||
auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType);
|
||||
|
||||
auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver,
|
||||
|
@ -510,7 +522,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
|
|||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
||||
}
|
||||
|
||||
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
|
||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
|
||||
|
||||
|
@ -616,7 +628,20 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarker(CommandQueue &commandQu
|
|||
HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processSignalMultiRootDeviceNode(LinearStream *commandStream,
|
||||
TagNodeBase *node) {
|
||||
const auto &hwInfo = getDevice().getHardwareInfo();
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
|
||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandStream,
|
||||
PostSyncMode::ImmediateData,
|
||||
node->getGpuAddress() + node->getContextEndOffset(),
|
||||
std::numeric_limits<uint64_t>::max(),
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue,
|
||||
LinearStream *commandStream,
|
||||
|
@ -901,7 +926,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
|||
EventsRequest &eventsRequest,
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr) {
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode) {
|
||||
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
|
@ -972,7 +998,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
|||
std::move(printfHandler),
|
||||
preemptionMode,
|
||||
multiDispatchInfo.peekMainKernel(),
|
||||
(uint32_t)multiDispatchInfo.size());
|
||||
(uint32_t)multiDispatchInfo.size(),
|
||||
multiRootDeviceSyncNode);
|
||||
}
|
||||
if (storeTimestampPackets) {
|
||||
command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies));
|
||||
|
@ -1274,10 +1301,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
}
|
||||
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
TagNodeBase *multiRootEventSyncStamp = nullptr;
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
CsrDependencies csrDeps;
|
||||
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
if (this->context->getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, bcsCsr);
|
||||
}
|
||||
auto allocator = bcsCsr.getTimestampPacketAllocator();
|
||||
|
||||
if (!blockQueue) {
|
||||
|
@ -1304,6 +1335,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
|
||||
multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode();
|
||||
bcsCsr.makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation());
|
||||
}
|
||||
|
||||
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
|
||||
|
||||
|
@ -1320,7 +1355,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
}
|
||||
|
||||
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
|
||||
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
|
||||
|
||||
if (!blockQueue) {
|
||||
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
|
||||
|
@ -1347,7 +1382,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
||||
|
||||
if (blockQueue) {
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
|
|
|
@ -88,7 +88,7 @@ class GpgpuWalkerHelper {
|
|||
template <typename GfxFamily>
|
||||
struct EnqueueOperation {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList);
|
||||
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent);
|
||||
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
|
||||
static size_t getSizeRequiredForTimestampPacketWrite();
|
||||
static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue);
|
||||
|
@ -101,8 +101,8 @@ struct EnqueueOperation {
|
|||
template <typename GfxFamily, uint32_t eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace,
|
||||
bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo,
|
||||
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) {
|
||||
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList);
|
||||
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) {
|
||||
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList, outEvent);
|
||||
return commandQueue.getCS(expectedSizeCS);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -165,7 +165,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(cons
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist) {
|
||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, cl_event *outEvent) {
|
||||
size_t expectedSizeCS = 0;
|
||||
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
||||
auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper();
|
||||
|
@ -218,8 +218,14 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
|||
if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
|
||||
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDeps);
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDeps);
|
||||
if (outEvent) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*outEvent);
|
||||
if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) {
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(hwInfo, false);
|
||||
}
|
||||
}
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
|
||||
|
||||
return expectedSizeCS;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -31,6 +31,7 @@ struct HardwareInterfaceWalkerArgs {
|
|||
size_t localWorkSizes[3] = {};
|
||||
TagNodeBase *hwTimeStamps = nullptr;
|
||||
TagNodeBase *hwPerfCounter = nullptr;
|
||||
TagNodeBase *multiRootDeviceEventStamp = nullptr;
|
||||
TimestampPacketDependencies *timestampPacketDependencies = nullptr;
|
||||
TimestampPacketContainer *currentTimestampPacketNodes = nullptr;
|
||||
const Vec3<size_t> *numberOfWorkgroups = nullptr;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -133,6 +133,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||
walkerArgs.currentTimestampPacketNodes);
|
||||
|
||||
walkerArgs.currentDispatchIndex = 0;
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
walkerArgs.isMainKernel = (dispatchInfo.getKernel() == mainKernel);
|
||||
|
|
|
@ -49,7 +49,9 @@ Context::Context(
|
|||
|
||||
Context::~Context() {
|
||||
gtpinNotifyContextDestroy((cl_context)this);
|
||||
|
||||
if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) {
|
||||
multiRootDeviceTimestampPacketAllocator.reset();
|
||||
}
|
||||
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
|
||||
smallBufferPoolAllocator.releaseSmallBufferPool();
|
||||
}
|
||||
|
@ -558,5 +560,15 @@ void Context::BufferPoolAllocator::releaseSmallBufferPool() {
|
|||
delete this->mainStorage;
|
||||
this->mainStorage = nullptr;
|
||||
}
|
||||
TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() {
|
||||
return multiRootDeviceTimestampPacketAllocator.get();
|
||||
}
|
||||
void Context::setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator) {
|
||||
multiRootDeviceTimestampPacketAllocator = std::move(allocator);
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> Context::obtainOwnershipForMultiRootDeviceAllocator() {
|
||||
return std::unique_lock<std::mutex>(multiRootDeviceAllocatorMtx);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -37,6 +37,7 @@ class SharingFunctions;
|
|||
class SVMAllocsManager;
|
||||
class Program;
|
||||
class Platform;
|
||||
class TagAllocatorBase;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_context> {
|
||||
|
@ -223,6 +224,9 @@ class Context : public BaseObject<_cl_context> {
|
|||
BufferPoolAllocator &getBufferPoolAllocator() {
|
||||
return this->smallBufferPoolAllocator;
|
||||
}
|
||||
TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator();
|
||||
std::unique_lock<std::mutex> obtainOwnershipForMultiRootDeviceAllocator();
|
||||
void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator);
|
||||
|
||||
protected:
|
||||
struct BuiltInKernel {
|
||||
|
@ -263,6 +267,8 @@ class Context : public BaseObject<_cl_context> {
|
|||
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
|
||||
cl_bool preferD3dSharedResources = 0u;
|
||||
ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT;
|
||||
std::unique_ptr<TagAllocatorBase> multiRootDeviceTimestampPacketAllocator;
|
||||
std::mutex multiRootDeviceAllocatorMtx;
|
||||
|
||||
bool interopUserSync = false;
|
||||
bool resolvesRequiredInKernels = false;
|
||||
|
|
|
@ -131,6 +131,9 @@ Event::~Event() {
|
|||
if (timeStampNode != nullptr) {
|
||||
timeStampNode->returnTag();
|
||||
}
|
||||
if (multiRootTimeStampSyncNode != nullptr) {
|
||||
multiRootTimeStampSyncNode->returnTag();
|
||||
}
|
||||
if (perfCounterNode != nullptr) {
|
||||
cmdQueue->getPerfCounters()->deleteQuery(perfCounterNode->getQueryHandleRef());
|
||||
perfCounterNode->getQueryHandleRef() = {};
|
||||
|
@ -883,7 +886,6 @@ TagNodeBase *Event::getHwTimeStampNode() {
|
|||
}
|
||||
|
||||
TagNodeBase *Event::getHwPerfCounterNode() {
|
||||
|
||||
if (!perfCounterNode && cmdQueue->getPerfCounters()) {
|
||||
const uint32_t gpuReportSize = HwPerfCounter::getSize(*(cmdQueue->getPerfCounters()));
|
||||
perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag();
|
||||
|
@ -891,11 +893,27 @@ TagNodeBase *Event::getHwPerfCounterNode() {
|
|||
return perfCounterNode;
|
||||
}
|
||||
|
||||
TagNodeBase *Event::getMultiRootTimestampSyncNode() {
|
||||
auto lock = getContext()->obtainOwnershipForMultiRootDeviceAllocator();
|
||||
if (getContext()->getMultiRootDeviceTimestampPacketAllocator() == nullptr) {
|
||||
auto allocator = cmdQueue->getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(getContext()->getRootDeviceIndices());
|
||||
getContext()->setMultiRootDeviceTimestampPacketAllocator(allocator);
|
||||
}
|
||||
lock.unlock();
|
||||
if (multiRootDeviceTimestampPacketContainer.get() == nullptr) {
|
||||
multiRootDeviceTimestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
}
|
||||
multiRootTimeStampSyncNode = getContext()->getMultiRootDeviceTimestampPacketAllocator()->getTag();
|
||||
multiRootDeviceTimestampPacketContainer->add(multiRootTimeStampSyncNode);
|
||||
return multiRootTimeStampSyncNode;
|
||||
}
|
||||
|
||||
void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer);
|
||||
}
|
||||
|
||||
TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); }
|
||||
TimestampPacketContainer *Event::getMultiRootDeviceTimestampPacketNodes() const { return multiRootDeviceTimestampPacketContainer.get(); }
|
||||
|
||||
bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
|
||||
bool userEventsDependencies = false;
|
||||
|
|
|
@ -115,6 +115,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
|
||||
void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
TimestampPacketContainer *getTimestampPacketNodes() const;
|
||||
TimestampPacketContainer *getMultiRootDeviceTimestampPacketNodes() const;
|
||||
|
||||
bool isPerfCountersEnabled() const {
|
||||
return perfCountersEnabled;
|
||||
|
@ -129,6 +130,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
}
|
||||
|
||||
TagNodeBase *getHwPerfCounterNode();
|
||||
TagNodeBase *getMultiRootTimestampSyncNode();
|
||||
|
||||
std::unique_ptr<FlushStampTracker> flushStamp;
|
||||
std::atomic<TaskCountType> taskLevel;
|
||||
|
@ -384,8 +386,10 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
bool perfCountersEnabled;
|
||||
TagNodeBase *timeStampNode = nullptr;
|
||||
TagNodeBase *perfCounterNode = nullptr;
|
||||
TagNodeBase *multiRootTimeStampSyncNode = nullptr;
|
||||
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
||||
// number of events this event depends on
|
||||
std::unique_ptr<TimestampPacketContainer> multiRootDeviceTimestampPacketContainer;
|
||||
std::atomic<int> parentCount;
|
||||
// event parents
|
||||
std::vector<Event *> parentEvents;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -26,7 +26,6 @@ namespace NEO {
|
|||
void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) {
|
||||
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
|
||||
dependentCsr.updateTagFromWait();
|
||||
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
}
|
||||
|
||||
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
||||
|
@ -60,6 +59,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
|||
if (productHelper.isDcFlushAllowed()) {
|
||||
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||
flushDependentCsr(dependentCsr, csrDeps);
|
||||
// csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
currentCsr.makeResident(*dependentCsr.getTagAllocation());
|
||||
}
|
||||
}
|
||||
|
@ -68,23 +68,22 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
|||
}
|
||||
}
|
||||
|
||||
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||
if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
|
||||
auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes();
|
||||
if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) {
|
||||
continue;
|
||||
}
|
||||
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
|
||||
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||
flushDependentCsr(dependentCsr, csrDeps);
|
||||
} else {
|
||||
csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
}
|
||||
|
||||
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
|
||||
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
|
||||
csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ struct EventsRequest {
|
|||
: numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {}
|
||||
|
||||
void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const;
|
||||
void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const;
|
||||
void fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const;
|
||||
void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const;
|
||||
|
||||
cl_uint numEventsInWaitList;
|
||||
|
|
|
@ -118,10 +118,11 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate
|
|||
|
||||
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
|
||||
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
|
||||
TagNodeBase *multiRootDeviceSyncNode)
|
||||
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
|
||||
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
|
||||
kernelCount(kernelCount), preemptionMode(preemptionMode) {
|
||||
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
|
||||
UNRECOVERABLE_IF(nullptr == this->kernel);
|
||||
kernel->incRefInternal();
|
||||
}
|
||||
|
@ -163,6 +164,9 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
|||
printfHandler->makeResident(commandStreamReceiver);
|
||||
}
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
if (multiRootDeviceSyncNode != nullptr) {
|
||||
commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation());
|
||||
}
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
CsrDependencies csrDeps;
|
||||
|
@ -214,7 +218,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
|||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
|
@ -307,7 +311,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
|
|||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr);
|
||||
}
|
||||
|
||||
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||
|
@ -389,7 +393,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
|
|||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
|
|
|
@ -127,7 +127,7 @@ class CommandComputeKernel : public Command {
|
|||
public:
|
||||
CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
|
||||
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, TagNodeBase *multiRootDeviceSyncNode);
|
||||
|
||||
~CommandComputeKernel() override;
|
||||
|
||||
|
@ -146,6 +146,7 @@ class CommandComputeKernel : public Command {
|
|||
Kernel *kernel;
|
||||
uint32_t kernelCount;
|
||||
PreemptionMode preemptionMode;
|
||||
TagNodeBase *multiRootDeviceSyncNode;
|
||||
};
|
||||
|
||||
class CommandWithoutKernel : public Command {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -19,6 +19,7 @@
|
|||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/image_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/cl_hw_parse.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
@ -193,6 +194,7 @@ class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCo
|
|||
template <typename GfxFamily>
|
||||
struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw<GfxFamily> {
|
||||
using CommandQueueHw<GfxFamily>::CommandQueueHw;
|
||||
using CommandQueueHw<GfxFamily>::timestampPacketContainer;
|
||||
MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr;
|
||||
CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; }
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
|
@ -970,4 +971,4 @@ HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsP
|
|||
EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x);
|
||||
EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x);
|
||||
EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel());
|
||||
}
|
||||
}
|
|
@ -31,6 +31,7 @@
|
|||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -557,11 +557,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp
|
|||
MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector<Kernel *>({kernel1.mockKernel, kernel2.mockKernel}));
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
size_t sizeWithDisabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
size_t sizeWithEnabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
size_t additionalSize = 0u;
|
||||
|
@ -669,7 +669,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal
|
|||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
||||
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
||||
false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr);
|
||||
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
|
||||
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
|
||||
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);
|
||||
|
@ -738,7 +738,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
|
|||
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
|
||||
false, *cmdQ.get(), multiDispatchInfo, false, false);
|
||||
false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr);
|
||||
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
|
||||
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
|
||||
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);
|
||||
|
|
|
@ -234,7 +234,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
|
|||
Surface *surfaces[] = {nullptr};
|
||||
mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
|
||||
blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr);
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr);
|
||||
EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue);
|
||||
}
|
||||
|
||||
|
@ -267,7 +267,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
|
|||
Surface *surfaces[] = {nullptr};
|
||||
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
|
||||
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation());
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr);
|
||||
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
|
||||
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation);
|
||||
EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation);
|
||||
|
@ -351,7 +351,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutK
|
|||
|
||||
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
|
||||
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr);
|
||||
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
@ -395,7 +395,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenA
|
|||
mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr);
|
||||
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
|
||||
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
||||
|
@ -441,7 +441,7 @@ HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoT
|
|||
std::vector<Surface *> v;
|
||||
|
||||
pKernel->setAdditionalKernelExecInfo(123u);
|
||||
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1));
|
||||
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr));
|
||||
cmd->submit(1u, false);
|
||||
EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u);
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ HWTEST2_F(DispatchFlagsTests, whenSubmittingKernelWithAdditionalKernelExecInfoTh
|
|||
std::vector<Surface *> v;
|
||||
|
||||
pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::DisableOverdispatch);
|
||||
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1));
|
||||
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr));
|
||||
cmd->submit(1u, false);
|
||||
EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::DisableOverdispatch);
|
||||
|
||||
|
|
|
@ -2003,10 +2003,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
|
|||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
|
||||
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
|
||||
}
|
||||
|
|
|
@ -1014,8 +1014,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW
|
|||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false);
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size + MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false), extendedCommandStreamSize);
|
||||
}
|
||||
|
@ -1033,8 +1033,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMulti
|
|||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false);
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size + ImplicitScalingDispatch<FamilyType>::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize);
|
||||
}
|
||||
|
@ -1047,8 +1047,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait
|
|||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true);
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true, nullptr);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size, extendedCommandStreamSize);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -23,6 +23,7 @@
|
|||
#include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/image_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
|
@ -96,7 +97,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferCo
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -149,7 +150,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -203,7 +204,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndComm
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -258,7 +259,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCom
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -313,7 +314,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndComm
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -365,7 +366,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand
|
|||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
|
@ -380,6 +381,68 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand
|
|||
EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH);
|
||||
}
|
||||
|
||||
HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForSingleDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) {
|
||||
UltClDeviceFactory deviceFactory{1, 0};
|
||||
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
|
||||
|
||||
cl_device_id devices[] = {deviceFactory.rootDevices[0]};
|
||||
|
||||
MockContext pContext(ClDeviceVector(devices, 1));
|
||||
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto event = std::make_unique<MockEvent<Event>>(&pContext, nullptr, 0, 0, 0);
|
||||
cl_event clEvent = event.get();
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize);
|
||||
}
|
||||
|
||||
HWTEST_F(GetSizeRequiredBufferTest, GivenUserEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) {
|
||||
UltClDeviceFactory deviceFactory{2, 0};
|
||||
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
|
||||
|
||||
cl_device_id devices[] = {deviceFactory.rootDevices[0],
|
||||
deviceFactory.rootDevices[1]};
|
||||
|
||||
MockContext pContext(ClDeviceVector(devices, 2));
|
||||
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto userEvent1 = std::make_unique<UserEvent>(&pContext);
|
||||
cl_event clEvent = userEvent1.get();
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize);
|
||||
}
|
||||
|
||||
HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsAdded) {
|
||||
UltClDeviceFactory deviceFactory{2, 0};
|
||||
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
|
||||
|
||||
cl_device_id devices[] = {deviceFactory.rootDevices[0],
|
||||
deviceFactory.rootDevices[1]};
|
||||
|
||||
MockContext pContext(ClDeviceVector(devices, 2));
|
||||
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto event = std::make_unique<MockEvent<Event>>(&pContext, nullptr, 0, 0, 0);
|
||||
cl_event clEvent = event.get();
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getHardwareInfo(), false), extendedCommandStreamSize);
|
||||
}
|
||||
|
||||
HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
pCmdQ->getClDevice());
|
||||
|
|
|
@ -1903,7 +1903,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke
|
|||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
std::vector<Surface *> surfaces;
|
||||
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1));
|
||||
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr));
|
||||
event->submitCommand(false);
|
||||
|
||||
EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired);
|
||||
|
@ -1948,7 +1948,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenInitializeBc
|
|||
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
|
||||
std::vector<Surface *> surfaces;
|
||||
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1));
|
||||
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr));
|
||||
event->submitCommand(false);
|
||||
EXPECT_FALSE(pCmdQ->isCsrLocked);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
|
@ -14,6 +15,7 @@
|
|||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
@ -45,12 +47,18 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
|
|||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
auto node1 = event1.getMultiRootTimestampSyncNode();
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
auto node3 = event3.getMultiRootTimestampSyncNode();
|
||||
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
auto node4 = event4.getMultiRootTimestampSyncNode();
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
auto node5 = event5.getMultiRootTimestampSyncNode();
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
userEvent1.getMultiRootTimestampSyncNode();
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
userEvent2.getMultiRootTimestampSyncNode();
|
||||
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
userEvent2.setStatus(CL_COMPLETE);
|
||||
|
@ -87,12 +95,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
|
|||
EXPECT_EQ(2u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -115,12 +123,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
|
|||
EXPECT_EQ(2u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
@ -147,17 +155,24 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
|||
cl_device_id devices[] = {device1, device2, device3};
|
||||
|
||||
auto context = std::make_unique<MockContext>(ClDeviceVector(devices, 3), false);
|
||||
|
||||
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(context->getRootDeviceIndices(), device1->getExecutionEnvironment()->memoryManager.get(), 10u);
|
||||
std::unique_ptr<TagAllocatorBase> uniquePtr(mockTagAllocator.release());
|
||||
context->setMultiRootDeviceTimestampPacketAllocator(uniquePtr);
|
||||
auto pCmdQ1 = context->getSpecialQueue(1u);
|
||||
auto pCmdQ2 = context->getSpecialQueue(2u);
|
||||
auto pCmdQ3 = context->getSpecialQueue(3u);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
auto node1 = event1.getMultiRootTimestampSyncNode();
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
auto node3 = event3.getMultiRootTimestampSyncNode();
|
||||
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
auto node4 = event4.getMultiRootTimestampSyncNode();
|
||||
Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21);
|
||||
auto node5 = event5.getMultiRootTimestampSyncNode();
|
||||
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
auto node6 = event6.getMultiRootTimestampSyncNode();
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
|
@ -190,16 +205,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
|||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
|
||||
EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node6->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -215,16 +230,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
|||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
|
||||
EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -249,8 +264,8 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
|||
EXPECT_EQ(1u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -286,11 +301,16 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
|
|||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
event1.getMultiRootTimestampSyncNode();
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
|
||||
event3.getMultiRootTimestampSyncNode();
|
||||
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
event4.getMultiRootTimestampSyncNode();
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
event5.getMultiRootTimestampSyncNode();
|
||||
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
event6.getMultiRootTimestampSyncNode();
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
|
@ -316,10 +336,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
|
|||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver());
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver());
|
||||
|
||||
EXPECT_EQ(0u, csrDeps.taskCountContainer.size());
|
||||
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
|
||||
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -342,10 +362,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
|
|||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
|
||||
|
||||
EXPECT_EQ(3u, csrDeps.taskCountContainer.size());
|
||||
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
EXPECT_EQ(3u, csrDeps.multiRootTimeStampSyncContainer.size());
|
||||
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -376,6 +396,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
|
||||
cl_event outputEvent2{};
|
||||
|
||||
auto currentCsUsedCmdq1 = pCmdQ1->getCS(0).getUsed();
|
||||
pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent1,
|
||||
|
@ -399,14 +420,12 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
nullptr);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsedCmdq1);
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
event1->release();
|
||||
event2->release();
|
||||
pCmdQ1->finish();
|
||||
pCmdQ2->finish();
|
||||
{
|
||||
|
@ -417,7 +436,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
|
@ -426,9 +445,11 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
|
||||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
event1->release();
|
||||
event2->release();
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
|
@ -458,14 +479,14 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
char hostPtr[MemoryConstants::pageSize]{};
|
||||
|
||||
cl_event outputEvent2{};
|
||||
|
||||
auto currentCsUsed = pCmdQ1->getCS(0).getUsed();
|
||||
pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent1,
|
||||
&outputEvent2);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsed);
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
|
@ -482,7 +503,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
nullptr);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsed);
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
|
@ -590,9 +611,6 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
event1->release();
|
||||
event2->release();
|
||||
event3->release();
|
||||
pCmdQ1->finish();
|
||||
pCmdQ2->finish();
|
||||
|
||||
|
@ -604,7 +622,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
auto node = event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0);
|
||||
EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
|
@ -620,8 +639,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
|
||||
EXPECT_EQ(2u, semaphores.size());
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
auto node = event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0);
|
||||
EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
|
@ -630,6 +650,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
|||
|
||||
EXPECT_LE(1u, semaphores.size());
|
||||
}
|
||||
event1->release();
|
||||
event2->release();
|
||||
event3->release();
|
||||
buffer->release();
|
||||
pCmdQ1->release();
|
||||
pCmdQ2->release();
|
||||
|
@ -961,3 +984,73 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea
|
|||
|
||||
EXPECT_EQ(buffer.get(), cmdQueue->migrateMultiGraphicsAllocationsReceivedOperationParams.srcMemObj);
|
||||
}
|
||||
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) {
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
|
||||
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
userEvent2.setStatus(CL_COMPLETE);
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event3,
|
||||
&event4,
|
||||
&event5,
|
||||
&event6,
|
||||
&userEvent1,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
|
||||
|
||||
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
|
||||
}
|
||||
}
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeContainersThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) {
|
||||
|
||||
MockEvent<Event> event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
event1.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
|
||||
MockEvent<Event> event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
MockEvent<Event> event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
event3.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
|
||||
MockEvent<Event> event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
event4.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
|
||||
MockEvent<Event> event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
event5.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
|
||||
MockEvent<Event> event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
event6.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event3,
|
||||
&event4,
|
||||
&event5,
|
||||
&event6,
|
||||
&userEvent1,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
|
||||
|
||||
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/logical_state_helper.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
|
|
|
@ -1791,4 +1791,4 @@ HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsFound)
|
|||
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
|
||||
auto cmdIterator = find<typename FamilyType::XY_BLOCK_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), cmdIterator);
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -79,7 +79,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
|
|||
public:
|
||||
using CommandComputeKernel::eventsWaitlist;
|
||||
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
|
||||
};
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
@ -129,7 +129,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
|
|||
public:
|
||||
using CommandComputeKernel::eventsWaitlist;
|
||||
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
|
||||
};
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
|
|
@ -485,7 +485,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefA
|
|||
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
v.push_back(bufferSurf);
|
||||
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
auto taskLevelBefore = csr.peekTaskLevel();
|
||||
|
@ -528,7 +528,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefAp
|
|||
NullSurface *surface = new NullSurface;
|
||||
v.push_back(surface);
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
auto taskLevelBefore = csr.peekTaskLevel();
|
||||
|
@ -579,7 +579,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
|
|||
|
||||
std::vector<Surface *> v;
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
event.submitCommand(false);
|
||||
|
@ -631,7 +631,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWit
|
|||
|
||||
std::vector<Surface *> v;
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
event.submitCommand(false);
|
||||
|
@ -680,7 +680,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWi
|
|||
|
||||
std::vector<Surface *> v;
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
event.submitCommand(false);
|
||||
|
@ -1169,7 +1169,7 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) {
|
|||
public:
|
||||
using CommandComputeKernel::eventsWaitlist;
|
||||
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
|
||||
};
|
||||
class MockEvent : public Event {
|
||||
public:
|
||||
|
@ -1750,7 +1750,7 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
|
|||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
std::vector<Surface *> v;
|
||||
auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
|
||||
auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
|
||||
event->setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
FlushStamp expectedFlushStamp = 0;
|
||||
|
@ -1893,3 +1893,35 @@ TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabled
|
|||
EXPECT_TRUE(event.isWaitForTimestampsEnabled());
|
||||
}
|
||||
}
|
||||
TEST(MultiRootEvent, givenContextWithMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsNotCreated) {
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context{};
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
|
||||
auto allocator = cmdQ.getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(context.getRootDeviceIndices());
|
||||
auto allocatorPtr = allocator.get();
|
||||
context.setMultiRootDeviceTimestampPacketAllocator(allocator);
|
||||
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
|
||||
event.getMultiRootTimestampSyncNode();
|
||||
EXPECT_EQ(allocatorPtr, context.getMultiRootDeviceTimestampPacketAllocator());
|
||||
}
|
||||
TEST(MultiRootEvent, givenContextWithoutMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsCreated) {
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context{};
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
|
||||
EXPECT_EQ(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr);
|
||||
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
|
||||
event.getMultiRootTimestampSyncNode();
|
||||
EXPECT_NE(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr);
|
||||
}
|
||||
TEST(MultiRootEvent, givenEventWithTagWhenEventGetsNewTagThenNewTagContainerIsNotCreated) {
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context{};
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
|
||||
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
|
||||
EXPECT_EQ(event.getMultiRootDeviceTimestampPacketNodes(), nullptr);
|
||||
event.getMultiRootTimestampSyncNode();
|
||||
auto containerPtr = event.getMultiRootDeviceTimestampPacketNodes();
|
||||
EXPECT_NE(containerPtr, nullptr);
|
||||
event.getMultiRootTimestampSyncNode();
|
||||
EXPECT_EQ(containerPtr, event.getMultiRootDeviceTimestampPacketNodes());
|
||||
}
|
|
@ -2435,7 +2435,7 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed
|
|||
|
||||
gtpinNotifyKernelSubmit(kernel.mockMultiDeviceKernel, mockCmdQ.get());
|
||||
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
|
||||
CompletionStamp stamp = command->submit(20, false);
|
||||
|
||||
ASSERT_EQ(1u, kernelExecQueue.size());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -154,7 +154,7 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
|
|||
public:
|
||||
using CommandComputeKernel::eventsWaitlist;
|
||||
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
|
||||
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
|
||||
};
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
@ -291,7 +291,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
|
|||
for (auto &surface : surfaces) {
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
}
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
|
||||
command->submit(20, false);
|
||||
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode);
|
||||
|
@ -339,7 +339,7 @@ HWTEST_F(DispatchFlagsTests, givenClCommandCopyImageWhenSubmitThenFlushTextureCa
|
|||
for (auto &surface : surfaces) {
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
}
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1));
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1, nullptr));
|
||||
command->submit(20, false);
|
||||
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode);
|
||||
|
@ -425,7 +425,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
|
|||
bool flushDC = false;
|
||||
bool slmUsed = false;
|
||||
bool ndRangeKernel = false;
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
|
||||
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
|
||||
command->submit(20, false);
|
||||
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -34,11 +34,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
|
|||
auto mockCmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL);
|
||||
|
@ -52,7 +52,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat
|
|||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false,
|
||||
false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
@ -82,7 +82,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat
|
|||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(
|
||||
csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
|
@ -143,7 +143,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
|
|||
auto mockCmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
@ -172,7 +172,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
|
|||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -212,7 +212,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd
|
|||
{
|
||||
EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
|
||||
|
||||
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false);
|
||||
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -226,7 +226,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd
|
|||
ultCsr.multiOsContextCapable = false;
|
||||
EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
|
||||
|
||||
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false);
|
||||
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush);
|
||||
|
|
|
@ -504,6 +504,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
|||
using BaseClass::CommandStreamReceiver;
|
||||
|
||||
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
|
||||
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
|
||||
|
||||
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::SUCCESS; };
|
||||
void updateTagFromWait() override{};
|
||||
|
|
|
@ -766,11 +766,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
|
|||
auto &hwInfo = cmdQ->getDevice().getHardwareInfo();
|
||||
|
||||
auto readBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo, false, false);
|
||||
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto writeBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo, false, false);
|
||||
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto copyBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo, false, false);
|
||||
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
|
||||
|
||||
if (cmdQ->isCacheFlushForBcsRequired()) {
|
||||
|
|
|
@ -249,6 +249,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
|||
using BaseClass::latestSentEnqueueType;
|
||||
using BaseClass::obtainCommandStream;
|
||||
using BaseClass::obtainNewTimestampPacketNodes;
|
||||
using BaseClass::processDispatchForKernels;
|
||||
using BaseClass::requiresCacheFlushAfterWalker;
|
||||
using BaseClass::throttle;
|
||||
using BaseClass::timestampPacketContainer;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -39,6 +39,7 @@ struct MockEvent : public BaseEventType {
|
|||
using Event::calculateSubmitTimestampData;
|
||||
using Event::isWaitForTimestampsEnabled;
|
||||
using Event::magic;
|
||||
using Event::multiRootDeviceTimestampPacketContainer;
|
||||
using Event::queueTimeStamp;
|
||||
using Event::submitTimeStamp;
|
||||
using Event::timestampPacketContainer;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -71,13 +71,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor
|
|||
|
||||
MultiDispatchInfo multiDispatchInfo(&kernel);
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), true, false, false,
|
||||
multiDispatchInfo, nullptr, 0, false, false);
|
||||
multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, CsrDependencies(), true, false, false,
|
||||
multiDispatchInfo, nullptr, 0, false, false);
|
||||
multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
|
@ -93,13 +93,13 @@ HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKerne
|
|||
MultiDispatchInfo multiDispatchInfo(nullptr);
|
||||
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
|
||||
true, false, false,
|
||||
multiDispatchInfo, nullptr, 0, false, false);
|
||||
multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true,
|
||||
false, false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);
|
||||
|
@ -121,9 +121,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor
|
|||
multiDispatchInfo.push(dispatchInfo);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, CsrDependencies(), true, false, false,
|
||||
multiDispatchInfo, nullptr, 0, false, false);
|
||||
multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false,
|
||||
false, *pCmdQ, multiDispatchInfo, false, false);
|
||||
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
}
|
||||
|
@ -741,13 +741,13 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount
|
|||
MultiDispatchInfo multiDispatchInfo(nullptr);
|
||||
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
|
||||
true, true, false, multiDispatchInfo,
|
||||
nullptr, 0, false, false);
|
||||
nullptr, 0, false, false, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true, true, false,
|
||||
multiDispatchInfo, nullptr, 0, false, false);
|
||||
multiDispatchInfo, nullptr, 0, false, false, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);
|
||||
|
|
|
@ -243,10 +243,14 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
|
|||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
auto node1 = event1.getMultiRootTimestampSyncNode();
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
auto node3 = event3.getMultiRootTimestampSyncNode();
|
||||
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
auto node4 = event4.getMultiRootTimestampSyncNode();
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
auto node5 = event5.getMultiRootTimestampSyncNode();
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
|
@ -285,12 +289,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
|
|||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -313,12 +317,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
|
|||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
|
|
@ -221,6 +221,7 @@ class CommandStreamReceiver {
|
|||
TagAllocatorBase *getEventTsAllocator();
|
||||
TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize);
|
||||
virtual TagAllocatorBase *getTimestampPacketAllocator() = 0;
|
||||
virtual std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) = 0;
|
||||
|
||||
virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
|
||||
|
||||
|
|
|
@ -130,6 +130,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
GraphicsAllocation *getClearColorAllocation() override;
|
||||
|
||||
TagAllocatorBase *getTimestampPacketAllocator() override;
|
||||
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override;
|
||||
|
||||
void postInitFlagsSetup() override;
|
||||
void programActivePartitionConfig(LinearStream &csr);
|
||||
|
|
|
@ -406,7 +406,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
|
||||
programActivePartitionConfigFlushTask(commandStreamCSR);
|
||||
programEngineModeCommands(commandStreamCSR, dispatchFlags);
|
||||
|
@ -980,7 +980,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
|||
}
|
||||
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
|
||||
size += EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(kernelArgsBufferAllocation, logicalStateHelper.get());
|
||||
|
||||
|
@ -1196,7 +1196,7 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
|
|||
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
|
||||
BlitCommandsHelper<GfxFamily>::encodeWa(commandStream, blitProperties, latestSentBcsWaValue);
|
||||
|
||||
|
@ -1229,6 +1229,12 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
|
|||
if (blitProperties.clearColorAllocation) {
|
||||
makeResident(*blitProperties.clearColorAllocation);
|
||||
}
|
||||
if (blitProperties.multiRootDeviceEventSync != nullptr) {
|
||||
MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, blitProperties.multiRootDeviceEventSync->getGpuAddress() + blitProperties.multiRootDeviceEventSync->getContextEndOffset(), std::numeric_limits<uint64_t>::max(), args, hwInfo);
|
||||
}
|
||||
}
|
||||
|
||||
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
|
||||
|
@ -1245,7 +1251,6 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
|
|||
|
||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo());
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(),
|
||||
DebugPauseState::waitingForUserEndConfirmation,
|
||||
|
@ -1522,6 +1527,11 @@ TagAllocatorBase *CommandStreamReceiverHw<GfxFamily>::getTimestampPacketAllocato
|
|||
return timestampPacketAllocator.get();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
std::unique_ptr<TagAllocatorBase> CommandStreamReceiverHw<GfxFamily>::createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) {
|
||||
auto &gfxCoreHelper = getGfxCoreHelper();
|
||||
return gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield());
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup() {
|
||||
useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -22,7 +22,7 @@ class CsrDependencies {
|
|||
All
|
||||
};
|
||||
|
||||
StackVec<std::pair<TaskCountType, uint64_t>, 32> taskCountContainer;
|
||||
StackVec<TimestampPacketContainer *, 32> multiRootTimeStampSyncContainer;
|
||||
StackVec<TimestampPacketContainer *, 32> timestampPacketContainer;
|
||||
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -50,6 +50,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
|
|||
BlitterConstants::BlitDirection::HostPtrToImage == blitDirection) {
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
nullptr, // multiRootDeviceEventSync
|
||||
blitDirection, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
|
@ -73,6 +74,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
|
|||
} else {
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
nullptr, // multiRootDeviceEventSync
|
||||
blitDirection, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
|
@ -104,6 +106,7 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds
|
|||
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
nullptr, // multiRootDeviceEventSync
|
||||
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
|
@ -128,6 +131,7 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati
|
|||
auto allocationSize = allocation->getUnderlyingBufferSize();
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
nullptr, // multiRootDeviceEventSync
|
||||
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
|
||||
{}, // csrDependencies
|
||||
auxTranslationDirection, // auxTranslationDirection
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -61,6 +61,7 @@ struct BlitProperties {
|
|||
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
TagNodeBase *outputTimestampPacket = nullptr;
|
||||
TagNodeBase *multiRootDeviceEventSync = nullptr;
|
||||
BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::BufferToHostPtr;
|
||||
CsrDependencies csrDependencies;
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
|
|
|
@ -127,7 +127,7 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandSize(const Vec3<size_t>
|
|||
|
||||
sizePerBlit += estimatePostBlitCommandSize();
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDependencies) +
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDependencies) +
|
||||
(sizePerBlit * nBlits) +
|
||||
timestampCmdSize +
|
||||
estimatePreBlitCommandSize();
|
||||
|
@ -143,6 +143,9 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropert
|
|||
auto isImage = blitProperties.isImageOperation();
|
||||
size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandSize(blitProperties.copySize, blitProperties.csrDependencies, updateTimestampPacket,
|
||||
profilingEnabled, isImage, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
|
||||
if (blitProperties.multiRootDeviceEventSync != nullptr) {
|
||||
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
|
||||
}
|
||||
}
|
||||
size += BlitCommandsHelper<GfxFamily>::getWaCmdsSize(blitPropertiesContainer);
|
||||
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo());
|
||||
|
|
|
@ -144,17 +144,11 @@ struct TimestampPacketHelper {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependenciesForForTaskCountContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
|
||||
auto &taskCountContainer = csrDependencies.taskCountContainer;
|
||||
|
||||
for (auto &[taskCountPreviousRootDevice, tagAddressPreviousRootDevice] : taskCountContainer) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream,
|
||||
static_cast<uint64_t>(tagAddressPreviousRootDevice),
|
||||
static_cast<uint32_t>(taskCountPreviousRootDevice),
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
static void programCsrDependenciesForForMultiRootDeviceSyncContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
|
||||
for (auto timestampPacketContainer : csrDependencies.multiRootTimeStampSyncContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphore<GfxFamily>(cmdStream, *node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,8 +211,8 @@ struct TimestampPacketHelper {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForTaskCountContainer(const CsrDependencies &csrDependencies) {
|
||||
return csrDependencies.taskCountContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
static size_t getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(const CsrDependencies &csrDependencies) {
|
||||
return csrDependencies.multiRootTimeStampSyncContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||
|
||||
#include <optional>
|
||||
|
@ -94,6 +95,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
|||
};
|
||||
|
||||
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
|
||||
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
|
||||
|
||||
CompletionStamp flushTask(
|
||||
LinearStream &commandStream,
|
||||
|
|
|
@ -15,10 +15,13 @@
|
|||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/fixtures/command_stream_receiver_fixture.inl"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/batch_buffer_helper.h"
|
||||
|
@ -32,6 +35,7 @@
|
|||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
@ -2463,3 +2467,89 @@ HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingTbxModeThenE
|
|||
ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX_WITH_AUB;
|
||||
EXPECT_TRUE(ultCsr.isTbxMode());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, GivenTwoRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForTwoDevicesCreated) {
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>(defaultHwInfo.get(), true, 2u);
|
||||
auto devices = DeviceFactory::createDevices(*executionEnvironment.release());
|
||||
const RootDeviceIndicesContainer indices = {0u, 1u};
|
||||
auto csr = devices[0]->getDefaultEngine().commandStreamReceiver;
|
||||
auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices);
|
||||
class MockTagAllocatorBase : public TagAllocatorBase {
|
||||
public:
|
||||
using TagAllocatorBase::maxRootDeviceIndex;
|
||||
};
|
||||
EXPECT_EQ(reinterpret_cast<MockTagAllocatorBase *>(allocator.get())->maxRootDeviceIndex, 1u);
|
||||
}
|
||||
HWTEST_F(CommandStreamReceiverHwTest, GivenFiveRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForFiveDevicesCreated) {
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>(defaultHwInfo.get(), true, 4u);
|
||||
auto devices = DeviceFactory::createDevices(*executionEnvironment.release());
|
||||
const RootDeviceIndicesContainer indices = {0u, 1u, 2u, 3u};
|
||||
auto csr = devices[0]->getDefaultEngine().commandStreamReceiver;
|
||||
auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices);
|
||||
class MockTagAllocatorBase : public TagAllocatorBase {
|
||||
public:
|
||||
using TagAllocatorBase::maxRootDeviceIndex;
|
||||
};
|
||||
EXPECT_EQ(reinterpret_cast<MockTagAllocatorBase *>(allocator.get())->maxRootDeviceIndex, 3u);
|
||||
}
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushAdded) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
|
||||
commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr,
|
||||
commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(),
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0,
|
||||
0, 0, 0, 0, 0, 0, 0);
|
||||
auto tag = mockTagAllocator->getTag();
|
||||
blitProperties.multiRootDeviceEventSync = tag;
|
||||
|
||||
BlitPropertiesContainer container;
|
||||
container.push_back(blitProperties);
|
||||
commandStreamReceiver.flushBcsTask(container, true, false, *pDevice);
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
bool nodeAddressFound = false;
|
||||
while (cmdIterator != hwParser.cmdList.end()) {
|
||||
auto flush = genCmdCast<MI_FLUSH_DW *>(*cmdIterator);
|
||||
if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) {
|
||||
nodeAddressFound = true;
|
||||
break;
|
||||
}
|
||||
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end());
|
||||
}
|
||||
EXPECT_TRUE(nodeAddressFound);
|
||||
}
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushNotAdded) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
|
||||
commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr,
|
||||
commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(),
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0,
|
||||
0, 0, 0, 0, 0, 0, 0);
|
||||
auto tag = mockTagAllocator->getTag();
|
||||
|
||||
BlitPropertiesContainer container;
|
||||
container.push_back(blitProperties);
|
||||
commandStreamReceiver.flushBcsTask(container, true, false, *pDevice);
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
bool nodeAddressFound = false;
|
||||
while (cmdIterator != hwParser.cmdList.end()) {
|
||||
auto flush = genCmdCast<MI_FLUSH_DW *>(*cmdIterator);
|
||||
if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) {
|
||||
nodeAddressFound = true;
|
||||
break;
|
||||
}
|
||||
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end());
|
||||
}
|
||||
EXPECT_FALSE(nodeAddressFound);
|
||||
}
|
|
@ -15,6 +15,7 @@
|
|||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
|
@ -663,3 +664,25 @@ HWTEST2_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlu
|
|||
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_EQ(hwParser.cmdList.end(), cmdIterator);
|
||||
}
|
||||
|
||||
HWTEST_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlushDwIsProgramed) {
|
||||
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
|
||||
auto tag = mockTagAllocator->getTag();
|
||||
BlitProperties blitProperties{};
|
||||
blitProperties.copySize = {1, 1, 1};
|
||||
BlitPropertiesContainer blitPropertiesContainer1;
|
||||
blitPropertiesContainer1.push_back(blitProperties);
|
||||
blitPropertiesContainer1.push_back(blitProperties);
|
||||
blitPropertiesContainer1.push_back(blitProperties);
|
||||
|
||||
auto estimatedSizeWithoutNode = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
|
||||
blitPropertiesContainer1, false, true, false, pDevice->getRootDeviceEnvironment());
|
||||
blitProperties.multiRootDeviceEventSync = tag;
|
||||
BlitPropertiesContainer blitPropertiesContainer2;
|
||||
blitPropertiesContainer2.push_back(blitProperties);
|
||||
blitPropertiesContainer2.push_back(blitProperties);
|
||||
blitPropertiesContainer2.push_back(blitProperties);
|
||||
auto estimatedSizeWithNode = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
|
||||
blitPropertiesContainer2, false, true, false, pDevice->getRootDeviceEnvironment());
|
||||
EXPECT_NE(estimatedSizeWithoutNode, estimatedSizeWithNode);
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -301,3 +301,35 @@ HWTEST_F(DeviceTimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacke
|
|||
|
||||
EXPECT_FALSE(tag->canBeReleased());
|
||||
}
|
||||
|
||||
using TimestampPacketHelperTests = Test<DeviceFixture>;
|
||||
|
||||
HWTEST_F(TimestampPacketHelperTests, givenTagNodesInMultiRootSyncContainerWhenProgramingDependensiecThenSemaforesAreProgrammed) {
|
||||
StackVec<char, 4096> buffer(4096);
|
||||
LinearStream cmdStream(buffer.begin(), buffer.size());
|
||||
CsrDependencies deps;
|
||||
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(0, pDevice->getMemoryManager());
|
||||
TimestampPacketContainer container = {};
|
||||
container.add(mockTagAllocator->getTag());
|
||||
deps.multiRootTimeStampSyncContainer.push_back(&container);
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
|
||||
EXPECT_EQ(cmdStream.getUsed(), sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketHelperTests, givenEmptyContainerMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) {
|
||||
StackVec<char, 4096> buffer(4096);
|
||||
LinearStream cmdStream(buffer.begin(), buffer.size());
|
||||
CsrDependencies deps;
|
||||
TimestampPacketContainer container = {};
|
||||
deps.multiRootTimeStampSyncContainer.push_back(&container);
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
|
||||
EXPECT_EQ(cmdStream.getUsed(), 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketHelperTests, givenEmptyMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) {
|
||||
StackVec<char, 4096> buffer(4096);
|
||||
LinearStream cmdStream(buffer.begin(), buffer.size());
|
||||
CsrDependencies deps;
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
|
||||
EXPECT_EQ(cmdStream.getUsed(), 0u);
|
||||
}
|
Loading…
Reference in New Issue