mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Use MI_SEMAPHORE_WAIT command for event synchronization
Related-To: NEO-5508 Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
2b956651a7
commit
b01b8ba5ac
@ -585,46 +585,6 @@ bool CommandQueue::validateCapabilityForOperation(cl_command_queue_capabilities_
|
||||
return operationValid && waitListValid && outEventValid;
|
||||
}
|
||||
|
||||
void CommandQueue::waitForEventsFromDifferentRootDeviceIndex(cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
StackVec<cl_event, 8> &waitListCurrentRootDeviceIndex, bool &isEventWaitListFromPreviousRootDevice) {
|
||||
isEventWaitListFromPreviousRootDevice = false;
|
||||
|
||||
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
|
||||
CommandQueue *commandQueuePreviousRootDevice = nullptr;
|
||||
auto maxTaskCountPreviousRootDevice = 0u;
|
||||
|
||||
if (this->getDevice().getRootDeviceIndex() != rootDeviceIndex) {
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitList[eventId]);
|
||||
|
||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() == rootDeviceIndex) {
|
||||
maxTaskCountPreviousRootDevice = std::max(maxTaskCountPreviousRootDevice, event->peekTaskCount());
|
||||
commandQueuePreviousRootDevice = event->getCommandQueue();
|
||||
isEventWaitListFromPreviousRootDevice = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxTaskCountPreviousRootDevice) {
|
||||
commandQueuePreviousRootDevice->getCommandStreamReceiver(false).waitForCompletionWithTimeout(false, 0, maxTaskCountPreviousRootDevice);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isEventWaitListFromPreviousRootDevice) {
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitList[eventId]);
|
||||
|
||||
if (event->getCommandQueue()) {
|
||||
if (event->getCommandQueue()->getDevice().getRootDeviceIndex() == this->getDevice().getRootDeviceIndex()) {
|
||||
waitListCurrentRootDeviceIndex.push_back(static_cast<cl_event>(eventWaitList[eventId]));
|
||||
}
|
||||
} else {
|
||||
waitListCurrentRootDeviceIndex.push_back(static_cast<cl_event>(eventWaitList[eventId]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_uint CommandQueue::getQueueFamilyIndex() const {
|
||||
if (isQueueFamilySelected()) {
|
||||
return queueFamilyIndex;
|
||||
|
@ -303,8 +303,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
bool validateCapability(cl_command_queue_capabilities_intel capability) const;
|
||||
bool validateCapabilitiesForEventWaitList(cl_uint numEventsInWaitList, const cl_event *waitList) const;
|
||||
bool validateCapabilityForOperation(cl_command_queue_capabilities_intel capability, cl_uint numEventsInWaitList, const cl_event *waitList, const cl_event *outEvent) const;
|
||||
void waitForEventsFromDifferentRootDeviceIndex(cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
StackVec<cl_event, 8> &waitListCurrentRootDeviceIndex, bool &isEventWaitListFromPreviousRootDevice);
|
||||
cl_uint getQueueFamilyIndex() const;
|
||||
cl_uint getQueueIndexWithinFamily() const { return queueIndexWithinFamily; }
|
||||
bool isQueueFamilySelected() const { return queueFamilySelected; }
|
||||
|
@ -159,16 +159,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
return;
|
||||
}
|
||||
|
||||
StackVec<cl_event, 8> waitListCurrentRootDeviceIndex;
|
||||
bool isEventWaitListFromPreviousRootDevice = false;
|
||||
|
||||
if (context->getRootDeviceIndices().size() > 1u) {
|
||||
waitForEventsFromDifferentRootDeviceIndex(numEventsInWaitList, eventWaitList, waitListCurrentRootDeviceIndex, isEventWaitListFromPreviousRootDevice);
|
||||
}
|
||||
|
||||
const cl_event *eventWaitListCurrentRootDevice = isEventWaitListFromPreviousRootDevice ? waitListCurrentRootDeviceIndex.data() : eventWaitList;
|
||||
cl_uint numEventsInWaitListCurrentRootDevice = isEventWaitListFromPreviousRootDevice ? static_cast<cl_uint>(waitListCurrentRootDeviceIndex.size()) : numEventsInWaitList;
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
@ -187,7 +177,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
auto blockQueue = false;
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitListCurrentRootDevice, eventWaitListCurrentRootDevice, blockQueue, commandType);
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
|
||||
|
||||
if (parentKernel && !blockQueue) {
|
||||
while (!devQueueHw->isEMCriticalSectionFree())
|
||||
@ -203,14 +193,16 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
EventsRequest eventsRequest(numEventsInWaitListCurrentRootDevice, eventWaitListCurrentRootDevice, event);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
CsrDependencies csrDeps;
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, getGpgpuCommandStreamReceiver());
|
||||
|
||||
bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo);
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependencies(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
|
||||
size_t nodesCount = 0u;
|
||||
@ -227,7 +219,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
if (nodesCount > 0) {
|
||||
obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, false);
|
||||
csrDeps.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,6 +227,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, csrDeps);
|
||||
|
||||
if (enqueueWithBlitAuxTranslation) {
|
||||
processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies,
|
||||
eventsRequest, blockQueue);
|
||||
@ -269,7 +263,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
if (flushDependenciesForNonKernelCommand) {
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, csrDeps, getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(
|
||||
commandStream,
|
||||
csrDeps,
|
||||
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,10 +322,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
auto maxTaskCountCurrentRootDevice = this->taskCount;
|
||||
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitListCurrentRootDevice; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitListCurrentRootDevice[eventId]);
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitList[eventId]);
|
||||
|
||||
if (!event->isUserEvent() && !event->isExternallySynchronized()) {
|
||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() == this->getDevice().getRootDeviceIndex()) {
|
||||
maxTaskCountCurrentRootDevice = std::max(maxTaskCountCurrentRootDevice, event->peekTaskCount());
|
||||
}
|
||||
}
|
||||
@ -467,12 +464,12 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
|
||||
auto blitProperties = ClBlitProperties::constructProperties(blitDirection, *blitCommandStreamReceiver,
|
||||
multiDispatchInfo.peekBuiltinOpParams());
|
||||
if (!queueBlocked) {
|
||||
eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *blitCommandStreamReceiver,
|
||||
CsrDependencies::DependenciesType::All);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, *blitCommandStreamReceiver,
|
||||
CsrDependencies::DependenciesType::All);
|
||||
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.barrierNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
||||
}
|
||||
|
||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||
@ -537,7 +534,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const Multi
|
||||
|
||||
if (!queueBlocked) {
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All);
|
||||
BlitProperties::setupDependenciesForAuxTranslation(blitPropertiesContainer, timestampPacketDependencies,
|
||||
*this->timestampPacketContainer, csrDeps,
|
||||
getGpgpuCommandStreamReceiver(), *getBcsCommandStreamReceiver());
|
||||
@ -550,7 +547,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps) {
|
||||
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(*commandStream, csrDeps, getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(
|
||||
*commandStream,
|
||||
csrDeps,
|
||||
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
|
||||
uint64_t postSyncAddress = 0;
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
@ -813,7 +813,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
@ -1027,7 +1027,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
1u); //numDevicesInContext
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
@ -1106,7 +1106,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
CsrDependencies csrDeps;
|
||||
|
||||
eventsRequest.fillCsrDependencies(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All);
|
||||
auto allocator = getBcsCommandStreamReceiver()->getTimestampPacketAllocator();
|
||||
|
||||
if (isCacheFlushForBcsRequired() && isGpgpuSubmissionForBcsRequired(blockQueue)) {
|
||||
@ -1118,7 +1118,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
}
|
||||
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, true);
|
||||
csrDeps.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
|
||||
LinearStream *gpgpuCommandStream = {};
|
||||
size_t gpgpuCommandStreamStart = {};
|
||||
|
@ -219,6 +219,9 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
|
||||
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDeps);
|
||||
|
||||
return expectedSizeCS;
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
}
|
||||
|
||||
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(*commandStream, csrDependencies, numSupportedDevices);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDependencies, numSupportedDevices);
|
||||
|
||||
dsh->align(EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -17,7 +17,7 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void EventsRequest::fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
||||
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||
if (event->isUserEvent()) {
|
||||
@ -35,7 +35,26 @@ void EventsRequest::fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamR
|
||||
(CsrDependencies::DependenciesType::All == depsType);
|
||||
|
||||
if (pushDependency) {
|
||||
csrDeps.push_back(timestampPacketContainer);
|
||||
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||
if (event->isUserEvent()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
|
||||
auto taskCountPreviousRootDevice = event->peekTaskCount();
|
||||
auto tagAddressPreviousRootDevice = event->getCommandQueue()->getCommandStreamReceiver(false).getTagAddress();
|
||||
|
||||
csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast<uint64_t>(tagAddressPreviousRootDevice)});
|
||||
|
||||
auto graphicsAllocation = event->getCommandQueue()->getCommandStreamReceiver(false).getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
|
||||
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -43,7 +62,6 @@ void EventsRequest::fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamR
|
||||
TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking,
|
||||
size_t *offsetPtr, size_t *sizePtr, void *ptr, bool doTransferOnCpu, uint32_t rootDeviceIndex)
|
||||
: memObj(memObj), ptr(ptr), cmdType(cmdType), mapFlags(mapFlags), blocking(blocking), doTransferOnCpu(doTransferOnCpu) {
|
||||
|
||||
// no size or offset passed for unmap operation
|
||||
if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) {
|
||||
if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) {
|
||||
|
@ -24,7 +24,8 @@ struct EventsRequest {
|
||||
EventsRequest(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *outEvent)
|
||||
: numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {}
|
||||
|
||||
void fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const;
|
||||
void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const;
|
||||
void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const;
|
||||
|
||||
cl_uint numEventsInWaitList;
|
||||
const cl_event *eventWaitList;
|
||||
|
@ -205,7 +205,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
auto &bcsCsr = *commandQueue.getBcsCommandStreamReceiver();
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
|
||||
BlitProperties::setupDependenciesForAuxTranslation(kernelOperation->blitPropertiesContainer, *timestampPacketDependencies,
|
||||
*currentTimestampPacketNodes, csrDeps,
|
||||
@ -246,7 +246,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
kernel->areMultipleSubDevicesInContext()); //areMultipleSubDevicesInContext
|
||||
|
||||
if (timestampPacketDependencies) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes;
|
||||
}
|
||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
|
||||
@ -303,10 +303,10 @@ void CommandWithoutKernel::dispatchBlitOperation() {
|
||||
|
||||
UNRECOVERABLE_IF(kernelOperation->blitPropertiesContainer.size() != 1);
|
||||
auto &blitProperties = *kernelOperation->blitPropertiesContainer.begin();
|
||||
eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->cacheFlushNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->barrierNodes);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->cacheFlushNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
|
||||
auto bcsTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled());
|
||||
@ -372,7 +372,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled());
|
||||
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
|
||||
gtpinNotifyPreFlushTask(&commandQueue);
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_svm_manager.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@ -1944,7 +1946,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFl
|
||||
*commandStreamReceiver.getTagAddress() = 2u;
|
||||
}
|
||||
|
||||
TEST(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToMarkerThenCsrsAreWaitingForEventsFromPreviousDevices) {
|
||||
using MultiRootDeviceCommandStreamReceiverTests = CommandStreamReceiverFlushTaskTests;
|
||||
|
||||
HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithoutSubmissionThenCsIsWaitingForEventsFromPreviousDevices) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto deviceFactory = std::make_unique<UltClDeviceFactory>(4, 0);
|
||||
auto device1 = deviceFactory->rootDevices[1];
|
||||
auto device2 = deviceFactory->rootDevices[2];
|
||||
@ -1968,11 +1974,10 @@ TEST(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDev
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
|
||||
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
Event event6(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21);
|
||||
Event event7(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21);
|
||||
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
@ -1987,42 +1992,34 @@ TEST(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDev
|
||||
&event4,
|
||||
&event5,
|
||||
&event6,
|
||||
&event7,
|
||||
&userEvent1,
|
||||
&userEvent2,
|
||||
};
|
||||
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event3,
|
||||
&event4,
|
||||
};
|
||||
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(0u, mockCsr1->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(0u, mockCsr2->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(0u, mockCsr3->waitForCompletionWithTimeoutCalled);
|
||||
}
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
{
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
EXPECT_EQ(0u, mockCsr1->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(1u, mockCsr2->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(1u, mockCsr3->waitForCompletionWithTimeoutCalled);
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
|
||||
EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
@ -2031,20 +2028,250 @@ TEST(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDev
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(1u, mockCsr1->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(1u, mockCsr2->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(2u, mockCsr3->waitForCompletionWithTimeoutCalled);
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(3u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
|
||||
EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event5,
|
||||
&userEvent1,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
pCmdQ3->enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(2u, mockCsr1->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(2u, mockCsr2->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(2u, mockCsr3->waitForCompletionWithTimeoutCalled);
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ3->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(1u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
}
|
||||
|
||||
using MultiRootDeviceCommandStreamReceiverBufferTests = MultiRootDeviceFixture;
|
||||
|
||||
HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithSubmissionThenCsIsWaitingForEventsFromPreviousDevices) {
|
||||
REQUIRE_SVM_OR_SKIP(device1);
|
||||
REQUIRE_SVM_OR_SKIP(device2);
|
||||
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
cl_int retVal = 0;
|
||||
size_t offset = 0;
|
||||
size_t size = 1;
|
||||
|
||||
auto pCmdQ1 = context.get()->getSpecialQueue(1u);
|
||||
auto pCmdQ2 = context.get()->getSpecialQueue(2u);
|
||||
|
||||
std::unique_ptr<MockProgram> program(Program::createBuiltInFromSource<MockProgram>("FillBufferBytes", context.get(), context.get()->getDevices(), &retVal));
|
||||
program->build(program->getDevices(), nullptr, false);
|
||||
std::unique_ptr<MockKernel> kernel(Kernel::create<MockKernel>(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context.get()->getDevice(0), &retVal));
|
||||
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
userEvent2.setStatus(CL_COMPLETE);
|
||||
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event3,
|
||||
&event4,
|
||||
&event5,
|
||||
&userEvent1,
|
||||
&userEvent2,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
{
|
||||
kernel->setSvmKernelExecInfo(&svmAlloc);
|
||||
|
||||
retVal = pCmdQ1->enqueueKernel(
|
||||
kernel.get(),
|
||||
1,
|
||||
&offset,
|
||||
&size,
|
||||
&size,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(2u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
{
|
||||
kernel->setSvmKernelExecInfo(&svmAlloc);
|
||||
|
||||
retVal = pCmdQ2->enqueueKernel(
|
||||
kernel.get(),
|
||||
1,
|
||||
&offset,
|
||||
&size,
|
||||
&size,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(2u, semaphores.size());
|
||||
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
|
||||
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
|
||||
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToMarkerThenMiSemaphoreWaitCommandSizeIsIncluded) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
||||
auto device1 = deviceFactory->rootDevices[1];
|
||||
auto device2 = deviceFactory->rootDevices[2];
|
||||
|
||||
auto mockCsr1 = new MockCommandStreamReceiver(*device1->executionEnvironment, device1->getRootDeviceIndex(), device1->getDeviceBitfield());
|
||||
auto mockCsr2 = new MockCommandStreamReceiver(*device2->executionEnvironment, device2->getRootDeviceIndex(), device2->getDeviceBitfield());
|
||||
|
||||
device1->resetCommandStreamReceiver(mockCsr1);
|
||||
device2->resetCommandStreamReceiver(mockCsr2);
|
||||
|
||||
cl_device_id devices[] = {device1, device2};
|
||||
|
||||
auto context = std::make_unique<MockContext>(ClDeviceVector(devices, 2), false);
|
||||
|
||||
auto pCmdQ1 = context.get()->getSpecialQueue(1u);
|
||||
auto pCmdQ2 = context.get()->getSpecialQueue(2u);
|
||||
|
||||
MockKernelWithInternals mockKernel(ClDeviceVector(devices, 2));
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
|
||||
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
|
||||
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
|
||||
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
UserEvent userEvent2(&pCmdQ2->getContext());
|
||||
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
userEvent2.setStatus(CL_COMPLETE);
|
||||
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event3,
|
||||
&event4,
|
||||
&userEvent1,
|
||||
&userEvent2,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getCommandStreamReceiver(false));
|
||||
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
|
||||
{
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2,
|
||||
&event3,
|
||||
&event4,
|
||||
&event5,
|
||||
&event6,
|
||||
&userEvent1,
|
||||
};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
|
||||
pCmdQ2->enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
nullptr);
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getCommandStreamReceiver(false));
|
||||
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(3u, semaphores.size());
|
||||
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -804,8 +804,8 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd
|
||||
|
||||
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
csrDependencies.push_back(×tamp0);
|
||||
csrDependencies.push_back(×tamp1);
|
||||
csrDependencies.timestampPacketContainer.push_back(×tamp0);
|
||||
csrDependencies.timestampPacketContainer.push_back(×tamp1);
|
||||
|
||||
size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
|
||||
|
||||
|
@ -220,8 +220,8 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph
|
||||
|
||||
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
blitProperties.csrDependencies.push_back(×tamp0);
|
||||
blitProperties.csrDependencies.push_back(×tamp1);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp0);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp1);
|
||||
|
||||
blitBuffer(&csr, blitProperties, true);
|
||||
|
||||
@ -278,8 +278,8 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands
|
||||
|
||||
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), 1);
|
||||
MockTimestampPacketContainer timestamp2(*csr.getTimestampPacketAllocator(), 1);
|
||||
blitProperties1.csrDependencies.push_back(×tamp1);
|
||||
blitProperties2.csrDependencies.push_back(×tamp2);
|
||||
blitProperties1.csrDependencies.timestampPacketContainer.push_back(×tamp1);
|
||||
blitProperties2.csrDependencies.timestampPacketContainer.push_back(×tamp2);
|
||||
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties1);
|
||||
@ -1248,8 +1248,8 @@ HWTEST_F(BcsTests, givenBlitterDirectSubmissionEnabledWhenProgrammingBlitterThen
|
||||
|
||||
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
blitProperties.csrDependencies.push_back(×tamp0);
|
||||
blitProperties.csrDependencies.push_back(×tamp1);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp0);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp1);
|
||||
|
||||
blitBuffer(&csr, blitProperties, true);
|
||||
|
||||
@ -1564,4 +1564,4 @@ TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) {
|
||||
EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x4000u);
|
||||
EXPECT_EQ(BlitterConstants::maxBlitSetWidth, 0x1FF80u);
|
||||
EXPECT_EQ(BlitterConstants::maxBlitSetHeight, 0x1FFC0u);
|
||||
}
|
||||
}
|
||||
|
@ -449,14 +449,14 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat
|
||||
|
||||
EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(
|
||||
csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0);
|
||||
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
for (auto timestampPacketContainer : csrDeps) {
|
||||
for (auto timestampPacketContainer : csrDeps.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(*node);
|
||||
}
|
||||
@ -499,13 +499,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
|
||||
|
||||
EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0);
|
||||
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
for (auto timestampPacketContainer : csrDeps) {
|
||||
for (auto timestampPacketContainer : csrDeps.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(*node);
|
||||
}
|
||||
@ -531,8 +531,8 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWithEventsWithoutTimestampsWhen
|
||||
|
||||
EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDepsEmpty;
|
||||
eventsRequest.fillCsrDependencies(csrDepsEmpty, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
EXPECT_EQ(0u, csrDepsEmpty.size());
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDepsEmpty, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
EXPECT_EQ(0u, csrDepsEmpty.timestampPacketContainer.size());
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
|
||||
@ -559,12 +559,12 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWithEventsWithoutTimestampsWhen
|
||||
cl_event waitlist2[] = {&event1, &eventWithEmptyTimestampContainer2, &event3, &eventWithEmptyTimestampContainer4, &event5};
|
||||
EventsRequest eventsRequest2(numEventsOnWaitlist, waitlist2, nullptr);
|
||||
CsrDependencies csrDepsSize3;
|
||||
eventsRequest2.fillCsrDependencies(csrDepsSize3, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
eventsRequest2.fillCsrDependenciesForTimestampPacketContainer(csrDepsSize3, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
EXPECT_EQ(3u, csrDepsSize3.size());
|
||||
EXPECT_EQ(3u, csrDepsSize3.timestampPacketContainer.size());
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
for (auto timestampPacketContainer : csrDepsSize3) {
|
||||
for (auto timestampPacketContainer : csrDepsSize3.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(*node);
|
||||
}
|
||||
@ -794,11 +794,11 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrT
|
||||
|
||||
auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
eventsRequest.fillCsrDependencies(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr);
|
||||
auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
for (auto timestampPacketContainer : flags.csrDependencies) {
|
||||
for (auto timestampPacketContainer : flags.csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(*node);
|
||||
}
|
||||
@ -842,11 +842,11 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDiff
|
||||
|
||||
auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
eventsRequest.fillCsrDependencies(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr);
|
||||
auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
size_t sizeForNodeDependency = 0;
|
||||
for (auto timestampPacketContainer : flags.csrDependencies) {
|
||||
for (auto timestampPacketContainer : flags.csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(*node);
|
||||
}
|
||||
@ -991,8 +991,8 @@ HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDiffere
|
||||
EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr);
|
||||
|
||||
CsrDependencies csrDependencies;
|
||||
eventsRequest.fillCsrDependencies(csrDependencies, csr1, CsrDependencies::DependenciesType::All);
|
||||
EXPECT_EQ(static_cast<size_t>(eventsOnWaitlist), csrDependencies.size());
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDependencies, csr1, CsrDependencies::DependenciesType::All);
|
||||
EXPECT_EQ(static_cast<size_t>(eventsOnWaitlist), csrDependencies.timestampPacketContainer.size());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
|
||||
@ -1177,7 +1177,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
|
||||
|
||||
EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
@ -1260,7 +1260,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
|
||||
|
||||
EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependencies(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
@ -1769,12 +1769,12 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWi
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
|
||||
|
||||
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
|
||||
auto expectedQueueSemaphoresCount = 1u;
|
||||
auto expectedQueueSemaphoresCount = 2u;
|
||||
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) {
|
||||
expectedQueueSemaphoresCount += 2;
|
||||
}
|
||||
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0), 0);
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node1.getNode(0), 0);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) {
|
||||
@ -1812,12 +1812,12 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierW
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
|
||||
|
||||
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
|
||||
auto expectedQueueSemaphoresCount = 1u;
|
||||
auto expectedQueueSemaphoresCount = 2u;
|
||||
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) {
|
||||
expectedQueueSemaphoresCount += 2;
|
||||
}
|
||||
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0), 0);
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node1.getNode(0), 0);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) {
|
||||
|
@ -327,7 +327,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
||||
@ -1016,7 +1016,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
programEnginePrologue(commandStream);
|
||||
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
|
||||
if (blitProperties.outputTimestampPacket && profilingEnabled) {
|
||||
BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -12,7 +12,7 @@
|
||||
namespace NEO {
|
||||
|
||||
void CsrDependencies::makeResident(CommandStreamReceiver &commandStreamReceiver) const {
|
||||
for (auto ×tampPacketContainer : *this) {
|
||||
for (auto ×tampPacketContainer : timestampPacketContainer) {
|
||||
timestampPacketContainer->makeResident(commandStreamReceiver);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -13,7 +13,7 @@ namespace NEO {
|
||||
class TimestampPacketContainer;
|
||||
class CommandStreamReceiver;
|
||||
|
||||
class CsrDependencies : public StackVec<TimestampPacketContainer *, 32> {
|
||||
class CsrDependencies {
|
||||
public:
|
||||
enum class DependenciesType {
|
||||
OnCsr,
|
||||
@ -21,6 +21,9 @@ class CsrDependencies : public StackVec<TimestampPacketContainer *, 32> {
|
||||
All
|
||||
};
|
||||
|
||||
StackVec<std::pair<uint32_t, uint64_t>, 32> taskCountContainer;
|
||||
StackVec<TimestampPacketContainer *, 32> timestampPacketContainer;
|
||||
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -144,15 +144,15 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer
|
||||
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
|
||||
|
||||
// wait for barrier and events before AuxToNonAux
|
||||
blitPropertiesContainer[0].csrDependencies.push_back(×tampPacketDependencies.barrierNodes);
|
||||
blitPropertiesContainer[0].csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
||||
|
||||
for (auto dep : depsFromEvents) {
|
||||
blitPropertiesContainer[0].csrDependencies.push_back(dep);
|
||||
for (auto dep : depsFromEvents.timestampPacketContainer) {
|
||||
blitPropertiesContainer[0].csrDependencies.timestampPacketContainer.push_back(dep);
|
||||
}
|
||||
|
||||
// wait for NDR before NonAuxToAux
|
||||
blitPropertiesContainer[numObjects].csrDependencies.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||
blitPropertiesContainer[numObjects].csrDependencies.push_back(&kernelTimestamps);
|
||||
blitPropertiesContainer[numObjects].csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||
blitPropertiesContainer[numObjects].csrDependencies.timestampPacketContainer.push_back(&kernelTimestamps);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -6,8 +6,8 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/csr_deps.h"
|
||||
#include "shared/source/helpers/aux_translation.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
@ -183,14 +183,29 @@ struct TimestampPacketHelper {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependencies(LinearStream &cmdStream, const CsrDependencies &csrDependencies, uint32_t numSupportedDevices) {
|
||||
for (auto timestampPacketContainer : csrDependencies) {
|
||||
static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies, uint32_t numSupportedDevices) {
|
||||
for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node, numSupportedDevices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependenciesForForTaskCountContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
|
||||
auto taskCountContainer = csrDependencies.taskCountContainer;
|
||||
|
||||
for (auto &[taskCountPreviousRootDevice, tagAddressPreviousRootDevice] : taskCountContainer) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream,
|
||||
static_cast<uint64_t>(tagAddressPreviousRootDevice),
|
||||
taskCountPreviousRootDevice,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
|
||||
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
|
||||
const TimestampPacketDependencies *timestampPacketDependencies,
|
||||
@ -241,7 +256,7 @@ struct TimestampPacketHelper {
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies) {
|
||||
size_t totalCommandsSize = 0;
|
||||
for (auto timestampPacketContainer : csrDependencies) {
|
||||
for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
totalCommandsSize += getRequiredCmdStreamSizeForNodeDependency<GfxFamily>(*node);
|
||||
}
|
||||
@ -249,6 +264,11 @@ struct TimestampPacketHelper {
|
||||
|
||||
return totalCommandsSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForTaskCountContainer(const CsrDependencies &csrDependencies) {
|
||||
return csrDependencies.taskCountContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
Reference in New Issue
Block a user