feature: replace in-order sync allocation with TimestampPacket

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-05-24 15:23:48 +00:00
committed by Compute-Runtime-Automation
parent f8b375cae5
commit 814de81aca
16 changed files with 157 additions and 105 deletions

View File

@@ -170,7 +170,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
void appendWaitOnInOrderDependency(bool relaxedOrderingAllowed);
void appendSignalInOrderDependencyCounter();
void appendSignalInOrderDependencyTimestampPacket();
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,

View File

@@ -1367,7 +1367,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter();
appendSignalInOrderDependencyTimestampPacket();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -2203,7 +2203,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (signalInOrderCompletion) {
obtainNewTimestampPacketNode();
appendSignalInOrderDependencyCounter();
appendSignalInOrderDependencyTimestampPacket();
}
makeResidentDummyAllocation();
@@ -2220,7 +2220,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyTimestampPacket() {
NEO::TimestampPacketHelper::nonStallingContextEndNodeSignal<GfxFamily>(*commandContainer.getCommandStream(), *this->timestampPacketContainer->peekNodes()[0], false);
}

View File

@@ -174,8 +174,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
protected:
using BaseClass::deferredTimestampPackets;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::timestampPacketContainer;
void printKernelsPrintfOutput(bool hangDetected);

View File

@@ -730,17 +730,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
inputRet = executeCommandListImmediate(performMigration);
}
}
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->setCsr(this->csr);
}
if (isInOrderExecutionEnabled()) {
inOrderDependencyCounter++;
auto signalEvent = Event::fromHandle(hSignalEvent);
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->enableInOrderExecMode(*inOrderDependencyCounterAllocation, inOrderDependencyCounter);
if (signalEvent) {
signalEvent->setCsr(this->csr);
if (isInOrderExecutionEnabled()) {
signalEvent->enableInOrderExecMode(*this->timestampPacketContainer);
}
}
return inputRet;
}
@@ -819,7 +819,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
}
if (isInOrderExecutionEnabled()) {
this->dependenciesPresent = false; // wait only for waitlist and in-order sync value
this->dependenciesPresent = false; // wait only for waitlist and in-order TimestampPacket value
}
if (numWaitEvents > 0) {
@@ -1065,28 +1065,32 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0);
auto numEvents = numWaitEvents;
if (this->isInOrderExecutionEnabled()) {
numEvents += static_cast<uint32_t>(this->timestampPacketContainer->peekNodes().size());
}
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
using TSPacketType = typename GfxFamily::TimestampPacketType;
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
uint64_t timeDiff = 0;
ze_result_t status = ZE_RESULT_NOT_READY;
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto waitValue = this->inOrderDependencyCounter;
auto node = this->timestampPacketContainer->peekNodes()[0];
lastHangCheckTime = std::chrono::high_resolution_clock::now();
waitStartTime = lastHangCheckTime;
do {
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
this->csr->downloadAllocation(*node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex()));
if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
if (NEO::WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(node->getContextEndAddress(0)), 1, std::not_equal_to<TSPacketType>())) {
status = ZE_RESULT_SUCCESS;
break;
}

View File

@@ -316,7 +316,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
Event::State::STATE_CLEARED,
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
appendSignalInOrderDependencyCounter();
appendSignalInOrderDependencyTimestampPacket();
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
@@ -407,9 +407,12 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
uint64_t writeValue = 0;
if (this->inOrderExecutionEnabled) {
obtainNewTimestampPacketNode();
auto node = this->timestampPacketContainer->peekNodes()[0];
postSyncMode = NEO::PostSyncMode::ImmediateData;
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
writeValue = this->inOrderDependencyCounter + 1;
gpuWriteAddress = node->getGpuAddress() + node->getContextEndOffset();
writeValue = 0;
}
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);

View File

@@ -78,8 +78,6 @@ ze_result_t CommandListImp::destroy() {
}
}
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation);
delete this;
return ZE_RESULT_SUCCESS;
}
@@ -240,18 +238,6 @@ void CommandListImp::enableInOrderExecution() {
timestampPacketContainer = std::make_unique<NEO::TimestampPacketContainer>();
deferredTimestampPackets = std::make_unique<NEO::TimestampPacketContainer>();
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
commandContainer.addToResidencyContainer(inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
inOrderExecutionEnabled = true;
}

View File

@@ -41,8 +41,6 @@ struct CommandListImp : CommandList {
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
std::unique_ptr<NEO::TimestampPacketContainer> deferredTimestampPackets;
std::unique_ptr<NEO::TimestampPacketContainer> timestampPacketContainer;
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
uint32_t inOrderDependencyCounter = 0;
bool inOrderExecutionEnabled = false;
~CommandListImp() override = default;

View File

@@ -391,10 +391,14 @@ void Event::setIsCompleted() {
}
}
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue) {
void Event::enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes) {
inOrderExecEvent = true;
inOrderExecSignalValue = signalValue;
inOrderExecDataAllocation = &inOrderDependenciesAllocation;
if (!inOrderTimestampPacket) {
inOrderTimestampPacket = std::make_unique<NEO::TimestampPacketContainer>();
}
inOrderTimestampPacket->assignAndIncrementNodesRefCounts(inOrderSyncNodes);
}
} // namespace L0

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/helpers/timestamp_packet_container.h"
#include "shared/source/helpers/timestamp_packet_size_control.h"
#include "shared/source/memory_manager/multi_graphics_allocation.h"
@@ -203,7 +204,7 @@ struct Event : _ze_event_handle_t {
void setMetricStreamer(MetricStreamer *metricStreamer) {
this->metricStreamer = metricStreamer;
}
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue);
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
protected:
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
@@ -235,13 +236,12 @@ struct Event : _ze_event_handle_t {
Device *device = nullptr;
EventPool *eventPool = nullptr;
Kernel *kernelWithPrintf = nullptr;
NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr;
std::unique_ptr<NEO::TimestampPacketContainer> inOrderTimestampPacket;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;
uint32_t maxPacketCount = 0;
uint32_t totalEventSize = 0;
uint32_t inOrderExecSignalValue = 0;
ze_event_scope_flags_t signalScope = 0u;
ze_event_scope_flags_t waitScope = 0u;

View File

@@ -130,9 +130,9 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
auto hostAddress = static_cast<uint32_t *>(this->inOrderExecDataAllocation->getUnderlyingBuffer());
auto hostAddress = static_cast<TagSizeT const *>(this->inOrderTimestampPacket->peekNodes()[0]->getContextEndAddress(0));
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint32_t>())) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(hostAddress, 1, std::not_equal_to<TagSizeT>())) {
return ZE_RESULT_NOT_READY;
}
@@ -207,7 +207,10 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
for (auto &csr : csrs) {
csr->downloadAllocation(this->getAllocation(this->device));
if (inOrderExecEvent) {
csr->downloadAllocation(*this->inOrderExecDataAllocation);
auto node = this->inOrderTimestampPacket->peekNodes()[0];
auto nodeAlloc = node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex());
csr->downloadAllocation(*nodeAlloc);
}
}
}
@@ -378,9 +381,8 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::reset() {
if (inOrderExecEvent) {
inOrderExecDataAllocation = nullptr;
inOrderExecSignalValue = 0;
inOrderExecEvent = false;
inOrderTimestampPacket->releaseNodes();
}
this->resetCompletionStatus();
this->resetDeviceCompletionData(false);

View File

@@ -161,8 +161,6 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::getDcFlushRequired;
using BaseClass::getHostPtrAlloc;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;

View File

@@ -667,9 +667,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
struct MockEvent : public EventImp<uint32_t> {
using EventImp<uint32_t>::inOrderExecDataAllocation;
using EventImp<uint32_t>::inOrderTimestampPacket;
using EventImp<uint32_t>::inOrderExecEvent;
using EventImp<uint32_t>::inOrderExecSignalValue;
};
void SetUp() override {
@@ -679,6 +678,12 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
createKernel();
}
void TearDown() override {
events.clear();
CommandListAppendLaunchKernel::TearDown();
}
std::unique_ptr<L0::EventPool> createEvents(uint32_t numEvents, bool timestampEvent) {
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
@@ -723,15 +728,26 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
return cmdList;
}
template <typename GfxFamily>
void setTimestampPacketContextEndValue(TagNodeBase *node, typename GfxFamily::TimestampPacketType contextEndValue) {
typename GfxFamily::TimestampPacketType data[] = {1, 1, contextEndValue, 1};
node->assignDataToAllTimestamps(0, data);
}
template <GFXCORE_FAMILY gfxCoreFamily>
TagNodeBase *getLatestTsNode(WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *immCmdList) {
return immCmdList->timestampPacketContainer->peekNodes()[0];
}
DebugManagerStateRestore restorer;
uint32_t createdCmdLists = 0;
std::vector<std::unique_ptr<MockEvent>> events;
std::vector<std::unique_ptr<Mock<CommandQueue>>> mockCmdQs;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
std::vector<std::unique_ptr<MockEvent>> events;
};
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEventState, IsAtLeastXeHpCore) {
@@ -739,17 +755,19 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven
auto eventPool = createEvents(3, false);
EXPECT_EQ(nullptr, events[0]->inOrderTimestampPacket.get());
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_TRUE(events[0]->inOrderExecEvent);
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation);
ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get());
EXPECT_EQ(1u, events[0]->inOrderTimestampPacket->peekNodes().size());
events[0]->reset();
EXPECT_FALSE(events[0]->inOrderExecEvent);
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr);
ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get());
EXPECT_EQ(0u, events[0]->inOrderTimestampPacket->peekNodes().size());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) {
@@ -763,7 +781,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
auto offset = cmdStream->getUsed();
auto previousNode = immCmdList->timestampPacketContainer->peekNodes()[0];
auto previousNode = getLatestTsNode(immCmdList.get());
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
@@ -833,27 +851,22 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleTimestam
auto immCmdList = createImmCmdList<gfxCoreFamily>();
EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation);
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType());
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
ultCsr->storeMakeResidentAllocations = true;
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto node0 = immCmdList->timestampPacketContainer->peekNodes()[0];
auto node0 = getLatestTsNode(immCmdList.get());
ultCsr->getTimestampPacketAllocator()->getTag();
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto node1 = immCmdList->timestampPacketContainer->peekNodes()[0];
auto node1 = getLatestTsNode(immCmdList.get());
ultCsr->getTimestampPacketAllocator()->getTag();
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto node2 = immCmdList->timestampPacketContainer->peekNodes()[0];
auto node2 = getLatestTsNode(immCmdList.get());
ultCsr->getTimestampPacketAllocator()->getTag();
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); // not used anymore
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
@@ -902,14 +915,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGettingNewNodeThenSwapWithDef
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
uint64_t nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress();
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa1 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress();
EXPECT_NE(nodeGpuVa0, nodeGpuVa1);
@@ -918,7 +931,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGettingNewNodeThenSwapWithDef
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(2u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa2 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
uint64_t nodeGpuVa2 = getLatestTsNode(immCmdList.get())->getGpuAddress();
EXPECT_NE(nodeGpuVa0, nodeGpuVa2);
EXPECT_NE(nodeGpuVa1, nodeGpuVa2);
@@ -953,7 +966,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation());
EXPECT_EQ(nodeGpuVa0, postSync.getDestinationAddress());
@@ -979,7 +992,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa1 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
auto node = getLatestTsNode(immCmdList.get());
uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress();
EXPECT_NE(nodeGpuVa0, nodeGpuVa1);
EXPECT_EQ(nodeGpuVa0, immCmdList->deferredTimestampPackets->peekNodes()[0]->getGpuAddress());
@@ -1000,20 +1014,17 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(++semaphoreCmd);
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(nodeGpuVa1 + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset(), sdiCmd->getAddress());
EXPECT_EQ(nodeGpuVa1 + node->getContextEndOffset(), sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getDataDword0());
}
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto node = getLatestTsNode(immCmdList.get());
*hostAddress = 1;
setTimestampPacketContextEndValue<FamilyType>(node, 1);
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
*hostAddress = 2;
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
*hostAddress = 3;
setTimestampPacketContextEndValue<FamilyType>(node, 0x12345);
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
}
@@ -1059,7 +1070,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
auto node = getLatestTsNode(immCmdList.get());
uint64_t nodeGpuVa0 = node->getGpuAddress() + node->getContextEndOffset();
EXPECT_EQ(nodeGpuVa0, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
@@ -1136,7 +1148,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
auto node = getLatestTsNode(immCmdList.get());
;
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
@@ -1166,14 +1180,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenS
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto node = getLatestTsNode(immCmdList.get());
auto gpuAddress = node->getGpuAddress() + node->getContextEndOffset();
auto lowAddress = static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL);
auto highAddress = static_cast<uint32_t>(gpuAddress >> 32);
EXPECT_EQ(lowAddress, pcCmd->getAddress());
EXPECT_EQ(highAddress, pcCmd->getAddressHigh());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation());
EXPECT_EQ(2u, pcCmd->getImmediateData());
EXPECT_EQ(0u, pcCmd->getImmediateData());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) {
@@ -1183,8 +1199,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddress = 0;
auto node = getLatestTsNode(immCmdList.get());
setTimestampPacketContextEndValue<FamilyType>(node, 1);
const uint32_t failCounter = 3;
uint32_t callCounter = 0;
@@ -1193,7 +1210,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
callCounter++;
if (callCounter >= failCounter && !forceFail) {
(*hostAddress)++;
setTimestampPacketContextEndValue<FamilyType>(node, 0x123);
}
};
@@ -1203,7 +1220,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
EXPECT_EQ(1u, callCounter);
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(0u, *hostAddress);
EXPECT_EQ(1u, node->getContextEndValue(0));
}
// timeout - not ready
@@ -1213,7 +1230,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
EXPECT_TRUE(callCounter > 1);
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
EXPECT_EQ(0u, *hostAddress);
EXPECT_EQ(1u, node->getContextEndValue(0));
}
// gpu hang
@@ -1224,7 +1241,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
EXPECT_TRUE(callCounter > 1);
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
EXPECT_EQ(0u, *hostAddress);
EXPECT_EQ(1u, node->getContextEndValue(0));
}
// success
@@ -1237,7 +1254,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
EXPECT_EQ(failCounter, callCounter);
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(1u, *hostAddress);
EXPECT_EQ(0x123u, node->getContextEndValue(0));
}
}
@@ -1250,16 +1267,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
auto eventHandle = events[0]->toHandle();
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddress = 0;
const uint32_t failCounter = 3;
uint32_t callCounter = 0;
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
callCounter++;
if (callCounter >= failCounter) {
(*hostAddress)++;
auto node = getLatestTsNode<gfxCoreFamily>(immCmdList.get());
setTimestampPacketContextEndValue<FamilyType>(node, 0x123);
}
};
@@ -1278,8 +1293,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, nullptr, 1, &eventHandle, false);
auto node = getLatestTsNode<gfxCoreFamily>(immCmdList.get());
EXPECT_EQ(3u, callCounter);
EXPECT_EQ(1u, *hostAddress);
EXPECT_EQ(0x123u, node->getContextEndValue(0));
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
@@ -1293,9 +1310,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
void *deviceAlloc = nullptr;
@@ -1351,7 +1365,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
auto node = getLatestTsNode(immCmdList.get());
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
@@ -1406,7 +1421,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
auto node = getLatestTsNode(immCmdList.get());
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());

View File

@@ -2864,16 +2864,23 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAl
downloadAllocationTrack[&gfxAllocation]++;
};
uint32_t storage = 1;
auto node = ultCsr->getTimestampPacketAllocator()->getTag();
NEO::TimestampPacketContainer container;
container.add(node);
NEO::MockGraphicsAllocation allocation(&storage, sizeof(storage));
event->enableInOrderExecMode(allocation, 1);
typename FamilyType::TimestampPacketType data[] = {0, 0, 0, 0};
node->assignDataToAllTimestamps(0, data);
event->enableInOrderExecMode(container);
auto allocation = node->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
auto result = event->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, downloadAllocationTrack[&allocation]);
EXPECT_NE(0u, downloadAllocationTrack[allocation]);
EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount);
}