Correct event reset

Related-To: NEO-7391
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2022-10-27 22:14:39 +00:00 committed by Compute-Runtime-Automation
parent 623f471321
commit af3f370ff0
19 changed files with 206 additions and 84 deletions

View File

@ -159,12 +159,10 @@ void CommandList::migrateSharedAllocations() {
}
bool CommandList::isTimestampEventForMultiTile(Event *signalEvent) {
if (this->partitionCount > 1 &&
signalEvent) {
if (signalEvent->isEventTimestampFlagSet()) {
return true;
}
if (this->partitionCount > 1 && signalEvent && signalEvent->isEventTimestampFlagSet()) {
return true;
}
return false;
}

View File

@ -267,9 +267,9 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *threadGroupDimensions);
void appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
void appendEventForProfiling(Event *event, bool beforeWalker, bool workloadPartition);
void appendEventForProfiling(Event *event, bool beforeWalker);
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
void appendSignalEventPostWalker(Event *event, bool workloadPartition);
void appendSignalEventPostWalker(Event *event);
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
void appendComputeBarrierCommand();
NEO::PipeControlArgs createBarrierFlags();

View File

@ -254,6 +254,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
Event *event = nullptr;
if (hEvent) {
event = Event::fromHandle(hEvent);
if (!launchParams.isKernelSplitOperation) {
event->resetKernelCountAndPacketUsedCount();
}
}
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
@ -285,6 +288,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
Event *event = nullptr;
if (signalEvent) {
event = Event::fromHandle(signalEvent);
event->resetKernelCountAndPacketUsedCount();
}
CmdListKernelLaunchParams launchParams = {};
@ -312,11 +316,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, true);
launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams);
appendSignalEventPostWalker(event, false);
appendSignalEventPostWalker(event);
return ret;
}
@ -345,7 +349,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, true);
const bool haveLaunchArguments = pLaunchArgumentsBuffer != nullptr;
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pNumLaunchArguments);
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
@ -362,7 +366,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
}
}
appendSignalEventPostWalker(event, false);
appendSignalEventPostWalker(event);
return ret;
}
@ -393,8 +397,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
if (event->isEventTimestampFlagSet()) {
packetsToReset = event->getMaxPacketsCount();
}
event->resetPackets();
event->resetCompletion();
event->resetPackets(false);
event->resetCompletionStatus();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
const auto &hwInfo = this->device->getHwInfo();
if (isCopyOnly()) {
@ -469,11 +473,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
signalEvent = Event::fromHandle(hSignalEvent);
}
bool workloadPartition = setupTimestampEventForMultiTile(signalEvent);
appendEventForProfiling(signalEvent, true, workloadPartition);
appendEventForProfiling(signalEvent, true);
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
appendSignalEventPostWalker(signalEvent, workloadPartition);
appendSignalEventPostWalker(signalEvent);
return ZE_RESULT_SUCCESS;
}
@ -1035,14 +1037,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, true);
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
if (copyRegionPreferred) {
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
} else {
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
}
appendSignalEventPostWalker(signalEvent, false);
appendSignalEventPostWalker(signalEvent);
return ZE_RESULT_SUCCESS;
}
@ -1067,9 +1069,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
commandContainer.addToResidencyContainer(src);
commandContainer.addToResidencyContainer(clearColorAllocation);
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
appendSignalEventPostWalker(signalEvent, false);
appendSignalEventPostWalker(signalEvent);
return ZE_RESULT_SUCCESS;
}
@ -1753,7 +1755,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
return ret;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, true);
NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr,
size,
neoDevice->getRootDeviceIndex(),
@ -1779,19 +1781,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
*commandContainer.getCommandStream(),
size,
*neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
appendSignalEventPostWalker(signalEvent, false);
appendSignalEventPostWalker(signalEvent);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, bool workloadPartition) {
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event) {
if (event == nullptr) {
return;
}
if (event->isEventTimestampFlagSet()) {
appendEventForProfiling(event, false, workloadPartition);
appendEventForProfiling(event, false);
} else {
event->resetKernelCountAndPacketUsedCount();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
uint64_t baseAddr = event->getGpuAddress(this->device);
if (event->isUsingContextEndOffset()) {
@ -1829,9 +1832,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
const auto &hwInfo = this->device->getHwInfo();
if (!beforeWalker) {
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
} else {
NEO::MiFlushArgs args;
const auto &hwInfo = this->device->getHwInfo();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
}
appendWriteKernelTimestamp(event, beforeWalker, false, false);
@ -1946,6 +1951,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
auto event = Event::fromHandle(hEvent);
event->resetKernelCountAndPacketUsedCount();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
uint64_t baseAddr = event->getGpuAddress(this->device);
@ -2121,10 +2127,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, bool beforeWalker, bool workloadPartition) {
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, bool beforeWalker) {
if (!event) {
return;
}
if (isCopyOnly()) {
appendEventForProfilingCopyCommand(event, beforeWalker);
} else {
@ -2135,6 +2142,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
bool workloadPartition = setupTimestampEventForMultiTile(event);
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
} else {
const auto &hwInfo = this->device->getHwInfo();
@ -2147,6 +2156,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
uint64_t baseAddr = event->getGpuAddress(this->device);
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo);
bool workloadPartition = isTimestampEventForMultiTile(event);
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
}
}
@ -2170,8 +2180,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
signalEvent = Event::fromHandle(hSignalEvent);
}
bool workloadPartition = setupTimestampEventForMultiTile(signalEvent);
appendEventForProfiling(signalEvent, true, workloadPartition);
appendEventForProfiling(signalEvent, true);
const auto &hwInfo = this->device->getHwInfo();
if (isCopyOnly()) {
@ -2195,7 +2204,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
args);
}
appendSignalEventPostWalker(signalEvent, workloadPartition);
appendSignalEventPostWalker(signalEvent);
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
commandContainer.addToResidencyContainer(allocationStruct.alloc);
@ -2525,8 +2534,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
signalEvent = Event::fromHandle(hSignalEvent);
}
bool workloadPartition = setupTimestampEventForMultiTile(signalEvent);
appendEventForProfiling(signalEvent, true, workloadPartition);
appendEventForProfiling(signalEvent, true);
if (isCopyOnly()) {
NEO::MiFlushArgs args;
@ -2535,7 +2543,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendComputeBarrierCommand();
}
appendSignalEventPostWalker(signalEvent, workloadPartition);
appendSignalEventPostWalker(signalEvent);
return ZE_RESULT_SUCCESS;
}

View File

@ -50,7 +50,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(*kernelInfo));
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, true);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
@ -175,7 +175,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
appendSignalEventPostWalker(event, false);
appendSignalEventPostWalker(event);
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer();
@ -242,9 +242,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) {
if (beforeWalker) {
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, true);
} else {
appendSignalEventPostWalker(event, false);
appendSignalEventPostWalker(event);
}
}

View File

@ -289,9 +289,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, false, true);
} else if (event) {
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
event->setPacketsInUse(partitionCount);
if (l3FlushEnable) {
programEventL3Flush<gfxCoreFamily>(event, this->device, partitionCount, commandContainer);
}
@ -432,15 +430,14 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) {
if (isCopyOnly() || singlePacketEvent) {
if (beforeWalker) {
bool workloadPartition = setupTimestampEventForMultiTile(event);
appendEventForProfiling(event, true, workloadPartition);
appendEventForProfiling(event, true);
} else {
bool workloadPartition = isTimestampEventForMultiTile(event);
appendSignalEventPostWalker(event, workloadPartition);
appendSignalEventPostWalker(event);
}
} else {
if (event) {
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
event->zeroKernelCount();
} else {
if (event->getKernelCount() > 1 && getDcFlushRequired(!!event->signalScope)) {

View File

@ -62,7 +62,8 @@ struct Event : _ze_event_handle_t {
virtual uint32_t getPacketsInUse() = 0;
virtual uint32_t getPacketsUsedInLastKernel() = 0;
virtual uint64_t getPacketAddress(Device *device) = 0;
virtual void resetPackets() = 0;
virtual void resetPackets(bool resetAllPackets) = 0;
virtual void resetKernelCountAndPacketUsedCount() = 0;
void *getHostAddress() { return hostAddress; }
virtual void setPacketsInUse(uint32_t value) = 0;
uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; }
@ -120,7 +121,7 @@ struct Event : _ze_event_handle_t {
l3FlushAppliedOnKernel.set(kernelCount - 1);
}
void resetCompletion() {
void resetCompletionStatus() {
this->isCompleted = false;
}
@ -212,8 +213,10 @@ struct EventImp : public Event {
uint64_t getGpuAddress(Device *device) override;
void resetPackets() override;
void resetDeviceCompletionData();
void resetPackets(bool resetAllPackets) override;
void resetDeviceCompletionData(bool resetAllPackets);
void resetKernelCountAndPacketUsedCount() override;
uint64_t getPacketAddress(Device *device) override;
uint32_t getPacketsInUse() override;
uint32_t getPacketsUsedInLastKernel() override;

View File

@ -53,7 +53,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
// do not reset even if it has been imported, since event pool
// might have been imported after events being already signaled
if (eventPoolImp->isImportedIpcPool == false) {
event->resetDeviceCompletionData();
event->resetDeviceCompletionData(true);
}
return event;
@ -295,20 +295,24 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::reset() {
this->resetCompletion();
this->resetDeviceCompletionData();
this->resetCompletionStatus();
this->resetDeviceCompletionData(false);
this->l3FlushAppliedOnKernel.reset();
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
void EventImp<TagSizeT>::resetDeviceCompletionData() {
this->kernelCount = this->maxKernelCount;
for (uint32_t i = 0; i < kernelCount; i++) {
this->kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
void EventImp<TagSizeT>::resetDeviceCompletionData(bool resetAllPackets) {
if (resetAllPackets) {
this->kernelCount = this->maxKernelCount;
for (uint32_t i = 0; i < kernelCount; i++) {
this->kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
}
this->hostEventSetValue(Event::STATE_INITIAL);
this->resetPackets();
this->resetPackets(resetAllPackets);
}
template <typename TagSizeT>
@ -395,11 +399,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
}
template <typename TagSizeT>
void EventImp<TagSizeT>::resetPackets() {
for (uint32_t i = 0; i < kernelCount; i++) {
kernelEventCompletionData[i].setPacketsUsed(1);
void EventImp<TagSizeT>::resetPackets(bool resetAllPackets) {
if (resetAllPackets) {
resetKernelCountAndPacketUsedCount();
}
kernelCount = 1;
cpuStartTimestamp = 0;
gpuStartTimestamp = 0;
gpuEndTimestamp = 0;
@ -425,6 +428,14 @@ void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
}
template <typename TagSizeT>
void EventImp<TagSizeT>::resetKernelCountAndPacketUsedCount() {
for (auto i = 0u; i < this->kernelCount; i++) {
this->kernelEventCompletionData[i].setPacketsUsed(1);
}
this->kernelCount = 1;
}
template <typename TagSizeT>
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
uint64_t address = getGpuAddress(device);

View File

@ -65,6 +65,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::indirectAllocationsAllowed;
using BaseClass::initialize;
using BaseClass::isTimestampEventForMultiTile;
using BaseClass::partitionCount;
using BaseClass::patternAllocations;
using BaseClass::pipeControlMultiKernelEventSync;

View File

@ -121,7 +121,8 @@ class MockEvent : public ::L0::Event {
}
uint32_t getPacketsUsedInLastKernel() override { return 1; }
uint32_t getPacketsInUse() override { return 1; }
void resetPackets() override {}
void resetPackets(bool resetAllPackets) override {}
void resetKernelCountAndPacketUsedCount() override {}
void setPacketsInUse(uint32_t value) override {}
uint64_t getPacketAddress(L0::Device *) override { return 0; }
void setGpuStartTimestamp() override {}

View File

@ -1997,7 +1997,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed();
commandList->appendSignalEventPostWalker(nullptr, false);
commandList->appendSignalEventPostWalker(nullptr);
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
}

View File

@ -307,6 +307,45 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
using CommandListAppendLaunchKernelResetKernelCount = Test<DeviceFixture>;
HWTEST2_F(CommandListAppendLaunchKernelResetKernelCount, givenIsKernelSplitOperationFalseWhenAppendLaunchKernelThenResetKernelCount, IsAtLeastXeHpCore) {
Mock<::L0::Kernel> kernel;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
ze_group_count_t groupCount{1, 1, 1};
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
CmdListKernelLaunchParams launchParams = {};
{
event->zeroKernelCount();
event->increaseKernelCount();
event->increaseKernelCount();
launchParams.isKernelSplitOperation = true;
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, event->getKernelCount());
}
{
launchParams.isKernelSplitOperation = false;
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getKernelCount());
}
}
HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendPageFaultThenSuccessIsReturned) {
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1);

View File

@ -570,6 +570,28 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventWithScopeThenPi
EXPECT_NE(cmdList.end(), itor);
}
using CommandListTimestampEvent = Test<DeviceFixture>;
HWTEST2_F(CommandListTimestampEvent, WhenIsTimestampEventForMultiTileThenCorrectResultIsReturned, IsAtLeastSkl) {
auto cmdList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
cmdList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
MockEvent mockEvent;
cmdList->partitionCount = 1u;
EXPECT_FALSE(cmdList->isTimestampEventForMultiTile(nullptr));
cmdList->partitionCount = 2u;
EXPECT_FALSE(cmdList->isTimestampEventForMultiTile(nullptr));
mockEvent.setEventTimestampFlag(false);
EXPECT_FALSE(cmdList->isTimestampEventForMultiTile(&mockEvent));
mockEvent.setEventTimestampFlag(true);
EXPECT_TRUE(cmdList->isTimestampEventForMultiTile(&mockEvent));
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t returnValue;

View File

@ -246,7 +246,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
event->setPacketsInUse(16u);
commandList->appendEventReset(event->toHandle());
ASSERT_EQ(1u, event->getPacketsInUse());
ASSERT_EQ(16u, event->getPacketsInUse());
auto contextOffset = event->getContextEndOffset();
auto baseAddr = event->getGpuAddress(device);
@ -353,7 +353,7 @@ HWTEST2_F(CommandListAppendEventReset,
commandList->partitionCount = packets;
returnValue = commandList->appendEventReset(event->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(2u, event->getPacketsInUse());
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
auto &hwInfo = device->getNEODevice()->getHardwareInfo();

View File

@ -318,7 +318,7 @@ HWTEST2_F(CommandListAppendSignalEvent,
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->partitionCount = packets;
commandList->appendSignalEventPostWalker(event.get(), false);
commandList->appendSignalEventPostWalker(event.get());
EXPECT_EQ(packets, event->getPacketsInUse());
auto gpuAddress = event->getGpuAddress(device);

View File

@ -1490,19 +1490,19 @@ HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWh
constexpr uint32_t waStartValue = 2u;
constexpr uint32_t waEndValue = 15u;
//1st kernel 1st packet
// 1st kernel 1st packet
packetData[0].contextStart = kernelStartValue;
packetData[0].contextEnd = kernelEndValue;
packetData[0].globalStart = kernelStartValue;
packetData[0].globalEnd = kernelEndValue;
//1st kernel 2nd packet for L3 Flush
// 1st kernel 2nd packet for L3 Flush
packetData[1].contextStart = waStartValue;
packetData[1].contextEnd = waEndValue;
packetData[1].globalStart = waStartValue;
packetData[1].globalEnd = waEndValue;
//2nd kernel 1st packet
// 2nd kernel 1st packet
packetData[2].contextStart = kernelStartValue;
packetData[2].contextEnd = kernelEndValue;
packetData[2].globalStart = kernelStartValue;
@ -1534,19 +1534,19 @@ HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWh
constexpr uint32_t waStartValue = 2u;
constexpr uint32_t waEndValue = 15u;
//1st kernel 1st packet
// 1st kernel 1st packet
packetData[0].contextStart = kernelStartValue;
packetData[0].contextEnd = kernelEndValue;
packetData[0].globalStart = kernelStartValue;
packetData[0].globalEnd = kernelEndValue;
//2nd kernel 1st packet
// 2nd kernel 1st packet
packetData[1].contextStart = kernelStartValue;
packetData[1].contextEnd = kernelEndValue;
packetData[1].globalStart = kernelStartValue;
packetData[1].globalEnd = kernelEndValue;
//2nd kernel 2nd packet for L3 Flush
// 2nd kernel 2nd packet for L3 Flush
packetData[2].contextStart = waStartValue;
packetData[2].contextEnd = waEndValue;
packetData[2].globalStart = waStartValue;
@ -1573,19 +1573,19 @@ HWTEST2_F(TimestampEventCreateMultiKernel, givenOverflowingTimeStampDataOnTwoKer
uint32_t maxTimeStampValue = std::numeric_limits<uint32_t>::max();
//1st kernel 1st packet (overflowing context timestamp)
// 1st kernel 1st packet (overflowing context timestamp)
packetData[0].contextStart = maxTimeStampValue - 1;
packetData[0].contextEnd = maxTimeStampValue + 1;
packetData[0].globalStart = maxTimeStampValue - 2;
packetData[0].globalEnd = maxTimeStampValue - 1;
//2nd kernel 1st packet (overflowing global timestamp)
// 2nd kernel 1st packet (overflowing global timestamp)
packetData[1].contextStart = maxTimeStampValue - 2;
packetData[1].contextEnd = maxTimeStampValue - 1;
packetData[1].globalStart = maxTimeStampValue - 1;
packetData[1].globalEnd = maxTimeStampValue + 1;
//2nd kernel 2nd packet (overflowing context timestamp)
// 2nd kernel 2nd packet (overflowing context timestamp)
memcpy(&packetData[2], &packetData[0], sizeof(MockTimestampPackets32::Packet));
packetData[2].contextStart = maxTimeStampValue;
packetData[2].contextEnd = maxTimeStampValue + 2;
@ -2325,6 +2325,48 @@ TEST_F(EventTests, WhenResetEventThenZeroCpuTimestamps) {
EXPECT_EQ(event->gpuEndTimestamp, 0u);
}
TEST_F(EventTests, WhenEventResetIsCalledThenKernelCountAndPacketsUsedHaveNotBeenReset) {
auto event = std::make_unique<MockEventCompletion>(eventPool, 1u, device);
event->gpuStartTimestamp = 10u;
event->gpuEndTimestamp = 20u;
event->zeroKernelCount();
EXPECT_EQ(ZE_RESULT_SUCCESS, event->reset());
EXPECT_EQ(0u, event->getKernelCount());
EXPECT_EQ(0u, event->getPacketsInUse());
EXPECT_EQ(event->gpuStartTimestamp, 0u);
EXPECT_EQ(event->gpuEndTimestamp, 0u);
}
TEST_F(EventTests, GivenResetAllPacketsWhenResetPacketsThenOneKernelCountAndOnePacketUsed) {
auto event = std::make_unique<MockEventCompletion>(eventPool, 1u, device);
event->gpuStartTimestamp = 10u;
event->gpuEndTimestamp = 20u;
event->zeroKernelCount();
auto resetAllPackets = true;
event->resetPackets(resetAllPackets);
EXPECT_EQ(1u, event->getKernelCount());
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(event->gpuStartTimestamp, 0u);
EXPECT_EQ(event->gpuEndTimestamp, 0u);
}
TEST_F(EventTests, GivenResetAllPacketsFalseWhenResetPacketsThenKernelCountAndPacketsUsedHaveNotBeenReset) {
auto event = std::make_unique<MockEventCompletion>(eventPool, 1u, device);
event->gpuStartTimestamp = 10u;
event->gpuEndTimestamp = 20u;
event->zeroKernelCount();
auto resetAllPackets = false;
event->resetPackets(resetAllPackets);
EXPECT_EQ(0u, event->getKernelCount());
EXPECT_EQ(0u, event->getPacketsInUse());
EXPECT_EQ(event->gpuStartTimestamp, 0u);
EXPECT_EQ(event->gpuEndTimestamp, 0u);
}
TEST_F(EventSynchronizeTest, whenEventSetCsrThenCorrectCsrSet) {
auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
const auto mockCsr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());

View File

@ -495,7 +495,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), true, false);
commandList->appendEventForProfiling(event.get(), true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@ -531,7 +531,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@ -566,7 +566,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));

View File

@ -59,7 +59,7 @@ PVCTEST_F(CommandListEventFenceTestsPvc, givenCommandListWithProfilingEventAfter
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(

View File

@ -411,7 +411,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@ -441,7 +441,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendSignalEventPostWalker(event.get(), false);
commandList->appendSignalEventPostWalker(event.get());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(

View File

@ -89,7 +89,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), true, false);
commandList->appendEventForProfiling(event.get(), true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@ -125,7 +125,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@ -160,7 +160,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));