Optimize timestamp packet dependencies
- Clear dependencies even if last engine changed - Do no program semaphore waiting for blit when blit is submitted with gpgpu - Track barrier timestamps to correctly synchronize blits in OOQ Related-To: NEO-6444 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
parent
f20236c7f2
commit
e3bb526067
|
@ -681,7 +681,6 @@ void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint
|
||||||
|
|
||||||
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
|
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
|
||||||
const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
|
const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
|
||||||
DEBUG_BREAK_IF(!state.isValid());
|
|
||||||
return state.taskCount;
|
return state.taskCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -706,10 +705,6 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp
|
||||||
|
|
||||||
previousNodes.swapNodes(*timestampPacketContainer);
|
previousNodes.swapNodes(*timestampPacketContainer);
|
||||||
|
|
||||||
if ((previousNodes.peekNodes().size() > 0) && (previousNodes.peekNodes()[0]->getAllocator() != allocator)) {
|
|
||||||
clearAllDependencies = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clearAllDependencies) {
|
if (clearAllDependencies) {
|
||||||
previousNodes.moveNodesToNewContainer(*deferredTimestampPackets);
|
previousNodes.moveNodesToNewContainer(*deferredTimestampPackets);
|
||||||
}
|
}
|
||||||
|
@ -1007,4 +1002,61 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandQueue::setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies ×tampPacketDependencies) {
|
||||||
|
if (!getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we have exactly 1 barrier node.
|
||||||
|
if (timestampPacketDependencies.barrierNodes.peekNodes().empty()) {
|
||||||
|
timestampPacketDependencies.barrierNodes.add(getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isOOQEnabled()) {
|
||||||
|
// Barrier node will be signalled on gpgpuCsr. Save it for later use on blitters.
|
||||||
|
for (auto currentBcsIndex = 0u; currentBcsIndex < bcsTimestampPacketContainers.size(); currentBcsIndex++) {
|
||||||
|
const auto currentBcsEngineType = EngineHelpers::mapBcsIndexToEngineType(currentBcsIndex, true);
|
||||||
|
if (currentBcsEngineType == engineType) {
|
||||||
|
// Node is already added to barrierNodes for this engine, no need to save it.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save latest timestamp (override previous, if any).
|
||||||
|
TimestampPacketContainer newContainer{};
|
||||||
|
newContainer.assignAndIncrementNodesRefCounts(timestampPacketDependencies.barrierNodes);
|
||||||
|
bcsTimestampPacketContainers[currentBcsIndex].lastBarrierToWaitFor.swapNodes(newContainer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandQueue::processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies) {
|
||||||
|
BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)];
|
||||||
|
bcsContainers.lastBarrierToWaitFor.moveNodesToNewContainer(blitDependencies.barrierNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandQueue::setLastBcsPacket(aub_stream::EngineType bcsEngineType) {
|
||||||
|
if (isOOQEnabled()) {
|
||||||
|
TimestampPacketContainer dummyContainer{};
|
||||||
|
dummyContainer.assignAndIncrementNodesRefCounts(*this->timestampPacketContainer);
|
||||||
|
|
||||||
|
BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)];
|
||||||
|
bcsContainers.lastSignalledPacket.swapNodes(dummyContainer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandQueue::fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps) {
|
||||||
|
for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) {
|
||||||
|
if (bcsContainers.lastSignalledPacket.peekNodes().empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
csrDeps.timestampPacketContainer.push_back(&bcsContainers.lastSignalledPacket);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandQueue::clearLastBcsPackets() {
|
||||||
|
for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) {
|
||||||
|
bcsContainers.lastSignalledPacket.moveNodesToNewContainer(*deferredTimestampPackets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -325,6 +325,12 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; }
|
void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; }
|
||||||
|
|
||||||
|
void setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies ×tampPacketDependencies);
|
||||||
|
void processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies);
|
||||||
|
void setLastBcsPacket(aub_stream::EngineType bcsEngineType);
|
||||||
|
void fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps);
|
||||||
|
void clearLastBcsPackets();
|
||||||
|
|
||||||
// taskCount of last task
|
// taskCount of last task
|
||||||
uint32_t taskCount = 0;
|
uint32_t taskCount = 0;
|
||||||
|
|
||||||
|
@ -409,6 +415,11 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
|
std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
|
||||||
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
||||||
|
struct BcsTimestampPacketContainers {
|
||||||
|
TimestampPacketContainer lastBarrierToWaitFor;
|
||||||
|
TimestampPacketContainer lastSignalledPacket;
|
||||||
|
};
|
||||||
|
std::array<BcsTimestampPacketContainers, bcsInfoMaskSize> bcsTimestampPacketContainers;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PtrType>
|
template <typename PtrType>
|
||||||
|
|
|
@ -249,6 +249,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||||
timestampPacketDependencies, eventsRequest, blockQueue);
|
timestampPacketDependencies, eventsRequest, blockQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!blockQueue && isOOQEnabled()) {
|
||||||
|
setupBarrierTimestampForBcsEngines(getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), timestampPacketDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||||
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
|
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
|
||||||
|
@ -536,8 +540,6 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
|
||||||
device->getHardwareInfo(),
|
device->getHardwareInfo(),
|
||||||
args);
|
args);
|
||||||
}
|
}
|
||||||
|
|
||||||
TimestampPacketHelper::programSemaphore<GfxFamily>(*commandStream, *currentTimestampPacketNode);
|
|
||||||
}
|
}
|
||||||
return blitProperties;
|
return blitProperties;
|
||||||
}
|
}
|
||||||
|
@ -898,8 +900,13 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
||||||
|
|
||||||
|
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||||
|
|
||||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
|
||||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
||||||
|
}
|
||||||
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -937,6 +944,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
dispatchFlags,
|
dispatchFlags,
|
||||||
getDevice());
|
getDevice());
|
||||||
|
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
clearLastBcsPackets();
|
||||||
|
}
|
||||||
|
|
||||||
if (gtpinIsGTPinInitialized()) {
|
if (gtpinIsGTPinInitialized()) {
|
||||||
gtpinNotifyFlushTask(completionStamp.taskCount);
|
gtpinNotifyFlushTask(completionStamp.taskCount);
|
||||||
}
|
}
|
||||||
|
@ -1119,8 +1130,13 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||||
false, //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
false); //textureCacheFlush
|
false); //textureCacheFlush
|
||||||
|
|
||||||
|
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||||
|
|
||||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
||||||
|
}
|
||||||
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1133,6 +1149,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||||
taskLevel,
|
taskLevel,
|
||||||
dispatchFlags,
|
dispatchFlags,
|
||||||
getDevice());
|
getDevice());
|
||||||
|
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
clearLastBcsPackets();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
|
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
|
||||||
|
@ -1208,9 +1228,10 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
|
if (!blockQueue) {
|
||||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
setupBarrierTimestampForBcsEngines(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
|
||||||
}
|
}
|
||||||
|
processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
|
||||||
|
|
||||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||||
|
@ -1243,6 +1264,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||||
}
|
}
|
||||||
|
|
||||||
this->latestSentEnqueueType = enqueueProperties.operation;
|
this->latestSentEnqueueType = enqueueProperties.operation;
|
||||||
|
|
||||||
|
setLastBcsPacket(bcsCsr.getOsContext().getEngineType());
|
||||||
}
|
}
|
||||||
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
||||||
|
|
||||||
|
|
|
@ -215,6 +215,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
commandQueue.getGpgpuCommandStreamReceiver(), *bcsCsrForAuxTranslation);
|
commandQueue.getGpgpuCommandStreamReceiver(), *bcsCsrForAuxTranslation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
|
||||||
|
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||||
|
|
||||||
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired(), commandQueue.getDevice().getHardwareInfo());
|
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired(), commandQueue.getDevice().getHardwareInfo());
|
||||||
|
@ -254,8 +258,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||||
|
|
||||||
if (timestampPacketDependencies) {
|
if (timestampPacketDependencies) {
|
||||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
||||||
|
}
|
||||||
dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes;
|
dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes;
|
||||||
}
|
}
|
||||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
|
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
|
||||||
|
@ -289,6 +298,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
dispatchFlags,
|
dispatchFlags,
|
||||||
commandQueue.getDevice());
|
commandQueue.getDevice());
|
||||||
|
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
commandQueue.clearLastBcsPackets();
|
||||||
|
}
|
||||||
|
|
||||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||||
const auto newTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
const auto newTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||||
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
|
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
|
||||||
|
@ -330,6 +343,7 @@ void CommandWithoutKernel::dispatchBlitOperation() {
|
||||||
|
|
||||||
const auto newTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
const auto newTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||||
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
||||||
|
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
|
||||||
}
|
}
|
||||||
|
|
||||||
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
|
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
|
||||||
|
@ -361,6 +375,10 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
|
||||||
|
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
|
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
|
||||||
DispatchFlags dispatchFlags(
|
DispatchFlags dispatchFlags(
|
||||||
{}, //csrDependencies
|
{}, //csrDependencies
|
||||||
|
@ -397,8 +415,13 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||||
|
|
||||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
||||||
|
}
|
||||||
makeTimestampPacketsResident(commandStreamReceiver);
|
makeTimestampPacketsResident(commandStreamReceiver);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -413,6 +436,10 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||||
dispatchFlags,
|
dispatchFlags,
|
||||||
commandQueue.getDevice());
|
commandQueue.getDevice());
|
||||||
|
|
||||||
|
if (isHandlingBarrier) {
|
||||||
|
commandQueue.clearLastBcsPackets();
|
||||||
|
}
|
||||||
|
|
||||||
if (kernelOperation->blitEnqueue) {
|
if (kernelOperation->blitEnqueue) {
|
||||||
dispatchBlitOperation();
|
dispatchBlitOperation();
|
||||||
}
|
}
|
||||||
|
|
|
@ -236,6 +236,12 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||||
return commandItor;
|
return commandItor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Command>
|
||||||
|
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||||
|
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||||
|
EXPECT_TRUE(commandItor == itorEnd);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
|
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
|
||||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||||
|
@ -965,13 +971,10 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacket
|
||||||
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(bcsCommands.begin(), bcsCommands.end());
|
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(bcsCommands.begin(), bcsCommands.end());
|
||||||
|
|
||||||
cmdFound = expectMiFlush<MI_FLUSH_DW>(cmdFound++, bcsCommands.end());
|
cmdFound = expectMiFlush<MI_FLUSH_DW>(cmdFound++, bcsCommands.end());
|
||||||
auto miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
|
|
||||||
const auto bcsSignalAddress = miflushDwCmd->getDestinationAddress();
|
|
||||||
|
|
||||||
cmdFound = expectCommand<WALKER_TYPE>(ccsCommands.begin(), ccsCommands.end());
|
cmdFound = expectCommand<WALKER_TYPE>(ccsCommands.begin(), ccsCommands.end());
|
||||||
|
|
||||||
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
|
expectNoCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
|
||||||
verifySemaphore<FamilyType>(cmdFound, bcsSignalAddress);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct BlitEnqueueWithDebugCapabilityTests : public BlitEnqueueTests<0> {
|
struct BlitEnqueueWithDebugCapabilityTests : public BlitEnqueueTests<0> {
|
||||||
|
@ -1726,7 +1729,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenDontClearDependencies) {
|
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
|
||||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||||
const bool clearDependencies = true;
|
const bool clearDependencies = true;
|
||||||
|
|
||||||
|
@ -1736,12 +1739,6 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionT
|
||||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
TimestampPacketContainer previousNodes;
|
|
||||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
|
||||||
EXPECT_EQ(1u, previousNodes.peekNodes().size());
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
TimestampPacketContainer previousNodes;
|
TimestampPacketContainer previousNodes;
|
||||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||||
|
@ -1506,20 +1507,23 @@ HWTEST_F(CommandQueueHwTest, WhenForcePerDssBackedBufferProgrammingSetThenDispat
|
||||||
EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer);
|
EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool ooq>
|
||||||
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
|
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
|
||||||
using ContextFixture::SetUp;
|
using ContextFixture::SetUp;
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
REQUIRE_FULL_BLITTER_OR_SKIP(defaultHwInfo.get());
|
hwInfo = *::defaultHwInfo;
|
||||||
|
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||||
|
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
|
||||||
|
|
||||||
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
||||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||||
ClDeviceFixture::SetUp();
|
ClDeviceFixture::SetUpImpl(&hwInfo);
|
||||||
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
|
||||||
cl_device_id device = pClDevice;
|
cl_device_id device = pClDevice;
|
||||||
ContextFixture::SetUp(1, &device);
|
ContextFixture::SetUp(1, &device);
|
||||||
CommandQueueHwFixture::SetUp(pClDevice, 0);
|
cl_command_queue_properties queueProperties = ooq ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
|
||||||
|
CommandQueueHwFixture::SetUp(pClDevice, queueProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
|
@ -1528,10 +1532,14 @@ struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwF
|
||||||
ClDeviceFixture::TearDown();
|
ClDeviceFixture::TearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HardwareInfo hwInfo{};
|
||||||
DebugManagerStateRestore state{};
|
DebugManagerStateRestore state{};
|
||||||
};
|
};
|
||||||
|
|
||||||
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
using IoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<false>;
|
||||||
|
using OoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<true>;
|
||||||
|
|
||||||
|
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
||||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||||
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||||
|
@ -1561,7 +1569,7 @@ HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenG
|
||||||
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
|
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
|
||||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||||
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||||
|
@ -1586,3 +1594,185 @@ HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenG
|
||||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||||
EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandThenWaitForBarrierOnBlit) {
|
||||||
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
|
||||||
|
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
DebugManagerStateRestore restore{};
|
||||||
|
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||||
|
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||||
|
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t gws = 1;
|
||||||
|
BufferDefaults::context = context;
|
||||||
|
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||||
|
char ptr[1] = {};
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||||
|
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
|
||||||
|
uint64_t barrierNodeAddress = 0u;
|
||||||
|
{
|
||||||
|
HardwareParse ccsHwParser;
|
||||||
|
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||||
|
|
||||||
|
const auto pipeControlItor = find<PIPE_CONTROL *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||||
|
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||||
|
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||||
|
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||||
|
|
||||||
|
// There shouldn't be any semaphores before the barrier
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), pipeControlItor);
|
||||||
|
EXPECT_EQ(pipeControlItor, semaphoreItor);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
HardwareParse bcsHwParser;
|
||||||
|
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), 0u);
|
||||||
|
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||||
|
auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||||
|
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||||
|
|
||||||
|
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end());
|
||||||
|
EXPECT_EQ(bcsHwParser.cmdList.end(), pipeControlItor);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) {
|
||||||
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
|
||||||
|
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
DebugManagerStateRestore restore{};
|
||||||
|
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||||
|
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||||
|
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t gws = 1;
|
||||||
|
BufferDefaults::context = context;
|
||||||
|
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||||
|
char ptr[1] = {};
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||||
|
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||||
|
auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed();
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
|
||||||
|
uint64_t barrierNodeAddress = 0u;
|
||||||
|
{
|
||||||
|
HardwareParse ccsHwParser;
|
||||||
|
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||||
|
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||||
|
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||||
|
EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||||
|
|
||||||
|
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, ccsHwParser.cmdList.end());
|
||||||
|
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||||
|
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||||
|
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||||
|
|
||||||
|
// There shouldn't be any more semaphores before the barrier
|
||||||
|
EXPECT_EQ(pipeControlItor, find<MI_SEMAPHORE_WAIT *>(std::next(semaphoreItor), pipeControlItor));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
HardwareParse bcsHwParser;
|
||||||
|
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart);
|
||||||
|
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||||
|
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||||
|
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||||
|
EXPECT_EQ(bcsHwParser.cmdList.end(), find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlockedBlitAfterBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) {
|
||||||
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
|
||||||
|
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
DebugManagerStateRestore restore{};
|
||||||
|
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||||
|
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
UserEvent userEvent;
|
||||||
|
cl_event userEventWaitlist[] = {&userEvent};
|
||||||
|
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||||
|
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t gws = 1;
|
||||||
|
BufferDefaults::context = context;
|
||||||
|
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||||
|
char ptr[1] = {};
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||||
|
uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||||
|
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||||
|
auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed();
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 1, userEventWaitlist, nullptr));
|
||||||
|
|
||||||
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
|
||||||
|
uint64_t barrierNodeAddress = 0u;
|
||||||
|
{
|
||||||
|
HardwareParse ccsHwParser;
|
||||||
|
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||||
|
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||||
|
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||||
|
EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||||
|
|
||||||
|
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, ccsHwParser.cmdList.end());
|
||||||
|
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||||
|
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||||
|
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||||
|
|
||||||
|
// There shouldn't be any more semaphores before the barrier
|
||||||
|
EXPECT_EQ(pipeControlItor, find<MI_SEMAPHORE_WAIT *>(std::next(semaphoreItor), pipeControlItor));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
HardwareParse bcsHwParser;
|
||||||
|
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart);
|
||||||
|
|
||||||
|
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||||
|
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||||
|
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||||
|
EXPECT_EQ(bcsHwParser.cmdList.end(), find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||||
|
}
|
||||||
|
|
|
@ -1837,6 +1837,204 @@ TEST(CommandQueue, givenSupportForOutEventAndOutEventIsPassedWhenValidatingSuppo
|
||||||
EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent));
|
EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct CommandQueueWithTimestampPacketTests : ::testing::Test {
|
||||||
|
void SetUp() override {
|
||||||
|
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugManagerStateRestore restore{};
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledThenEnsureBarrierNodeIsPresent) {
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{context};
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// No pending barrier, skip
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||||
|
EXPECT_EQ(0u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
|
||||||
|
// Add barrier node
|
||||||
|
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
auto node1 = dependencies.barrierNodes.peekNodes()[0];
|
||||||
|
|
||||||
|
// Do not add new node, if it exists
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
auto node2 = dependencies.barrierNodes.peekNodes()[0];
|
||||||
|
EXPECT_EQ(node2, node1);
|
||||||
|
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForOtherBcses) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
auto barrierNode = dependencies.barrierNodes.peekNodes()[0];
|
||||||
|
|
||||||
|
for (auto currentBcsIndex = 0u; currentBcsIndex < queue.bcsTimestampPacketContainers.size(); currentBcsIndex++) {
|
||||||
|
auto &containers = queue.bcsTimestampPacketContainers[currentBcsIndex];
|
||||||
|
if (currentBcsIndex == 0) {
|
||||||
|
EXPECT_EQ(0u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||||
|
EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_EQ(queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnNonBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForBcses) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto engineType : {aub_stream::EngineType::ENGINE_RCS,
|
||||||
|
aub_stream::EngineType::ENGINE_CCS}) {
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(engineType, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
auto barrierNode = dependencies.barrierNodes.peekNodes()[0];
|
||||||
|
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size());
|
||||||
|
EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1u + queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenSavedBarrierWhenProcessBarrierTimestampForBcsEngineCalledThenMoveSaveBarrierPacketToBarrierNodes) {
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{context};
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
|
||||||
|
// No saved barriers
|
||||||
|
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||||
|
EXPECT_TRUE(dependencies.barrierNodes.peekNodes().empty());
|
||||||
|
|
||||||
|
// Save barrier
|
||||||
|
TagNodeBase *node = queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
|
||||||
|
queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.add(node);
|
||||||
|
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
EXPECT_EQ(node, dependencies.barrierNodes.peekNodes()[0]);
|
||||||
|
EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnComputeEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||||
|
|
||||||
|
for (auto engineType : {aub_stream::EngineType::ENGINE_RCS,
|
||||||
|
aub_stream::EngineType::ENGINE_CCS}) {
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(engineType, dependencies);
|
||||||
|
|
||||||
|
TimestampPacketDependencies blitDependencies{};
|
||||||
|
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, blitDependencies);
|
||||||
|
EXPECT_EQ(1u, blitDependencies.barrierNodes.peekNodes().size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnBcsEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||||
|
|
||||||
|
TimestampPacketDependencies dependencies{};
|
||||||
|
queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||||
|
queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies);
|
||||||
|
EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSettingLastBcsPacketThenDoNotSaveThePacket) {
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{context};
|
||||||
|
|
||||||
|
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||||
|
EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSettingLastBcsPacketThenSaveOnlyOneLastPacket) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
|
||||||
|
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||||
|
EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes());
|
||||||
|
EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
queue.timestampPacketContainer->moveNodesToNewContainer(*queue.getDeferredTimestampPackets());
|
||||||
|
|
||||||
|
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||||
|
EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes());
|
||||||
|
EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenFillingCsrDependenciesThenMovePacketToCsrDependencies) {
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{context};
|
||||||
|
queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
|
||||||
|
CsrDependencies csrDeps;
|
||||||
|
queue.fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||||
|
EXPECT_EQ(1u, queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().size());
|
||||||
|
EXPECT_EQ(&queue.bcsTimestampPacketContainers[0].lastSignalledPacket, csrDeps.timestampPacketContainer[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenClearingPacketsThenClearThePacket) {
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{context};
|
||||||
|
queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
|
||||||
|
queue.clearLastBcsPackets();
|
||||||
|
EXPECT_EQ(0u, queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueWithTimestampPacketTests, givenQueueWhenSettingAndQueryingLastBcsPacketThenReturnCorrectResults) {
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
MockContext context{};
|
||||||
|
MockCommandQueue queue{&context, context.getDevice(0), props, false};
|
||||||
|
queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||||
|
|
||||||
|
queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS);
|
||||||
|
|
||||||
|
CsrDependencies csrDeps;
|
||||||
|
queue.fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||||
|
EXPECT_FALSE(csrDeps.timestampPacketContainer.empty());
|
||||||
|
|
||||||
|
queue.clearLastBcsPackets();
|
||||||
|
for (auto &containers : queue.bcsTimestampPacketContainers) {
|
||||||
|
EXPECT_TRUE(containers.lastSignalledPacket.peekNodes().empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
using KernelExecutionTypesTests = DispatchFlagsTests;
|
using KernelExecutionTypesTests = DispatchFlagsTests;
|
||||||
HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) {
|
HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) {
|
||||||
using CsrType = MockCsrHw2<FamilyType>;
|
using CsrType = MockCsrHw2<FamilyType>;
|
||||||
|
|
|
@ -489,7 +489,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBu
|
||||||
EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation);
|
EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
|
HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWithGpgpuSubmissionWhenProgrammingCommandStreamThenDoNotAddSemaphoreWaitOnGpgpu) {
|
||||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
|
||||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||||
|
@ -503,7 +503,6 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
|
||||||
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
||||||
|
|
||||||
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0);
|
|
||||||
|
|
||||||
HardwareParse hwParser;
|
HardwareParse hwParser;
|
||||||
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||||
|
@ -515,15 +514,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
semaphoresCount++;
|
semaphoresCount++;
|
||||||
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
|
|
||||||
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPECT_EQ(1u, semaphoresCount);
|
EXPECT_EQ(0u, semaphoresCount);
|
||||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
|
HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWithGpgpuSubmissionWhenProgrammingCommandStreamThenDoNotAddSemaphoreWaitOnGpgpu) {
|
||||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
|
||||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||||
|
@ -537,7 +534,6 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||||
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
||||||
|
|
||||||
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0);
|
|
||||||
|
|
||||||
HardwareParse hwParser;
|
HardwareParse hwParser;
|
||||||
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||||
|
@ -549,11 +545,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
semaphoresCount++;
|
semaphoresCount++;
|
||||||
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
|
|
||||||
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPECT_EQ(1u, semaphoresCount);
|
EXPECT_EQ(0u, semaphoresCount);
|
||||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ class MockCommandQueue : public CommandQueue {
|
||||||
public:
|
public:
|
||||||
using CommandQueue::bcsEngines;
|
using CommandQueue::bcsEngines;
|
||||||
using CommandQueue::bcsEngineTypes;
|
using CommandQueue::bcsEngineTypes;
|
||||||
|
using CommandQueue::bcsTimestampPacketContainers;
|
||||||
using CommandQueue::blitEnqueueAllowed;
|
using CommandQueue::blitEnqueueAllowed;
|
||||||
using CommandQueue::blitEnqueueImageAllowed;
|
using CommandQueue::blitEnqueueImageAllowed;
|
||||||
using CommandQueue::bufferCpuCopyAllowed;
|
using CommandQueue::bufferCpuCopyAllowed;
|
||||||
|
|
|
@ -31,5 +31,10 @@ uint32_t getBcsIndex(aub_stream::EngineType engineType) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
aub_stream::EngineType mapBcsIndexToEngineType(uint32_t index, bool includeMainCopyEngine) {
|
||||||
|
DEBUG_BREAK_IF(index != 0);
|
||||||
|
return aub_stream::ENGINE_BCS;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace EngineHelpers
|
} // namespace EngineHelpers
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -54,8 +54,6 @@ class TagNodeBase : public NonCopyableOrMovableClass {
|
||||||
|
|
||||||
bool isProfilingCapable() const { return profilingCapable; }
|
bool isProfilingCapable() const { return profilingCapable; }
|
||||||
|
|
||||||
const TagAllocatorBase *getAllocator() const { return allocator; }
|
|
||||||
|
|
||||||
// TagType specific calls
|
// TagType specific calls
|
||||||
virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0;
|
virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue