mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 02:18:05 +08:00
feature: replace in-order sync allocation with TimestampPacket
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f8b375cae5
commit
814de81aca
@@ -170,7 +170,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
void appendWaitOnInOrderDependency(bool relaxedOrderingAllowed);
|
||||
void appendSignalInOrderDependencyCounter();
|
||||
void appendSignalInOrderDependencyTimestampPacket();
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
|
||||
|
||||
@@ -1367,7 +1367,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
appendSignalInOrderDependencyCounter();
|
||||
appendSignalInOrderDependencyTimestampPacket();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2203,7 +2203,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
if (signalInOrderCompletion) {
|
||||
obtainNewTimestampPacketNode();
|
||||
|
||||
appendSignalInOrderDependencyCounter();
|
||||
appendSignalInOrderDependencyTimestampPacket();
|
||||
}
|
||||
|
||||
makeResidentDummyAllocation();
|
||||
@@ -2220,7 +2220,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyTimestampPacket() {
|
||||
NEO::TimestampPacketHelper::nonStallingContextEndNodeSignal<GfxFamily>(*commandContainer.getCommandStream(), *this->timestampPacketContainer->peekNodes()[0], false);
|
||||
}
|
||||
|
||||
|
||||
@@ -174,8 +174,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
|
||||
protected:
|
||||
using BaseClass::deferredTimestampPackets;
|
||||
using BaseClass::inOrderDependencyCounter;
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
using BaseClass::timestampPacketContainer;
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
|
||||
@@ -730,17 +730,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
||||
inputRet = executeCommandListImmediate(performMigration);
|
||||
}
|
||||
}
|
||||
if (hSignalEvent) {
|
||||
Event::fromHandle(hSignalEvent)->setCsr(this->csr);
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
inOrderDependencyCounter++;
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
if (hSignalEvent) {
|
||||
Event::fromHandle(hSignalEvent)->enableInOrderExecMode(*inOrderDependencyCounterAllocation, inOrderDependencyCounter);
|
||||
if (signalEvent) {
|
||||
signalEvent->setCsr(this->csr);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
signalEvent->enableInOrderExecMode(*this->timestampPacketContainer);
|
||||
}
|
||||
}
|
||||
|
||||
return inputRet;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
this->dependenciesPresent = false; // wait only for waitlist and in-order sync value
|
||||
this->dependenciesPresent = false; // wait only for waitlist and in-order TimestampPacket value
|
||||
}
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
@@ -1065,28 +1065,32 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
|
||||
auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0);
|
||||
auto numEvents = numWaitEvents;
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
numEvents += static_cast<uint32_t>(this->timestampPacketContainer->peekNodes().size());
|
||||
}
|
||||
|
||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
|
||||
using TSPacketType = typename GfxFamily::TimestampPacketType;
|
||||
|
||||
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
|
||||
uint64_t timeDiff = 0;
|
||||
|
||||
ze_result_t status = ZE_RESULT_NOT_READY;
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto waitValue = this->inOrderDependencyCounter;
|
||||
auto node = this->timestampPacketContainer->peekNodes()[0];
|
||||
|
||||
lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
||||
waitStartTime = lastHangCheckTime;
|
||||
|
||||
do {
|
||||
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
|
||||
this->csr->downloadAllocation(*node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex()));
|
||||
|
||||
if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
|
||||
if (NEO::WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(node->getContextEndAddress(0)), 1, std::not_equal_to<TSPacketType>())) {
|
||||
status = ZE_RESULT_SUCCESS;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -316,7 +316,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
Event::State::STATE_CLEARED,
|
||||
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
|
||||
appendSignalInOrderDependencyCounter();
|
||||
appendSignalInOrderDependencyTimestampPacket();
|
||||
}
|
||||
|
||||
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
|
||||
@@ -407,9 +407,12 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
uint64_t writeValue = 0;
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
obtainNewTimestampPacketNode();
|
||||
auto node = this->timestampPacketContainer->peekNodes()[0];
|
||||
|
||||
postSyncMode = NEO::PostSyncMode::ImmediateData;
|
||||
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
writeValue = this->inOrderDependencyCounter + 1;
|
||||
gpuWriteAddress = node->getGpuAddress() + node->getContextEndOffset();
|
||||
writeValue = 0;
|
||||
}
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);
|
||||
|
||||
@@ -78,8 +78,6 @@ ze_result_t CommandListImp::destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation);
|
||||
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -240,18 +238,6 @@ void CommandListImp::enableInOrderExecution() {
|
||||
timestampPacketContainer = std::make_unique<NEO::TimestampPacketContainer>();
|
||||
deferredTimestampPackets = std::make_unique<NEO::TimestampPacketContainer>();
|
||||
|
||||
auto device = this->device->getNEODevice();
|
||||
|
||||
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
|
||||
|
||||
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
|
||||
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
|
||||
|
||||
commandContainer.addToResidencyContainer(inOrderDependencyCounterAllocation);
|
||||
|
||||
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
|
||||
inOrderExecutionEnabled = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,6 @@ struct CommandListImp : CommandList {
|
||||
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
|
||||
std::unique_ptr<NEO::TimestampPacketContainer> deferredTimestampPackets;
|
||||
std::unique_ptr<NEO::TimestampPacketContainer> timestampPacketContainer;
|
||||
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
|
||||
uint32_t inOrderDependencyCounter = 0;
|
||||
bool inOrderExecutionEnabled = false;
|
||||
|
||||
~CommandListImp() override = default;
|
||||
|
||||
@@ -391,10 +391,14 @@ void Event::setIsCompleted() {
|
||||
}
|
||||
}
|
||||
|
||||
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue) {
|
||||
void Event::enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes) {
|
||||
inOrderExecEvent = true;
|
||||
inOrderExecSignalValue = signalValue;
|
||||
inOrderExecDataAllocation = &inOrderDependenciesAllocation;
|
||||
|
||||
if (!inOrderTimestampPacket) {
|
||||
inOrderTimestampPacket = std::make_unique<NEO::TimestampPacketContainer>();
|
||||
}
|
||||
|
||||
inOrderTimestampPacket->assignAndIncrementNodesRefCounts(inOrderSyncNodes);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/timestamp_packet_container.h"
|
||||
#include "shared/source/helpers/timestamp_packet_size_control.h"
|
||||
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
||||
|
||||
@@ -203,7 +204,7 @@ struct Event : _ze_event_handle_t {
|
||||
void setMetricStreamer(MetricStreamer *metricStreamer) {
|
||||
this->metricStreamer = metricStreamer;
|
||||
}
|
||||
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue);
|
||||
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
|
||||
|
||||
protected:
|
||||
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
||||
@@ -235,13 +236,12 @@ struct Event : _ze_event_handle_t {
|
||||
Device *device = nullptr;
|
||||
EventPool *eventPool = nullptr;
|
||||
Kernel *kernelWithPrintf = nullptr;
|
||||
NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr;
|
||||
std::unique_ptr<NEO::TimestampPacketContainer> inOrderTimestampPacket;
|
||||
|
||||
uint32_t maxKernelCount = 0;
|
||||
uint32_t kernelCount = 1u;
|
||||
uint32_t maxPacketCount = 0;
|
||||
uint32_t totalEventSize = 0;
|
||||
uint32_t inOrderExecSignalValue = 0;
|
||||
|
||||
ze_event_scope_flags_t signalScope = 0u;
|
||||
ze_event_scope_flags_t waitScope = 0u;
|
||||
|
||||
@@ -130,9 +130,9 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
|
||||
auto hostAddress = static_cast<uint32_t *>(this->inOrderExecDataAllocation->getUnderlyingBuffer());
|
||||
auto hostAddress = static_cast<TagSizeT const *>(this->inOrderTimestampPacket->peekNodes()[0]->getContextEndAddress(0));
|
||||
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint32_t>())) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(hostAddress, 1, std::not_equal_to<TagSizeT>())) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
@@ -207,7 +207,10 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||
for (auto &csr : csrs) {
|
||||
csr->downloadAllocation(this->getAllocation(this->device));
|
||||
if (inOrderExecEvent) {
|
||||
csr->downloadAllocation(*this->inOrderExecDataAllocation);
|
||||
auto node = this->inOrderTimestampPacket->peekNodes()[0];
|
||||
auto nodeAlloc = node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex());
|
||||
|
||||
csr->downloadAllocation(*nodeAlloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -378,9 +381,8 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::reset() {
|
||||
if (inOrderExecEvent) {
|
||||
inOrderExecDataAllocation = nullptr;
|
||||
inOrderExecSignalValue = 0;
|
||||
inOrderExecEvent = false;
|
||||
inOrderTimestampPacket->releaseNodes();
|
||||
}
|
||||
this->resetCompletionStatus();
|
||||
this->resetDeviceCompletionData(false);
|
||||
|
||||
@@ -161,8 +161,6 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::inOrderDependencyCounter;
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
|
||||
@@ -667,9 +667,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
|
||||
|
||||
struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
struct MockEvent : public EventImp<uint32_t> {
|
||||
using EventImp<uint32_t>::inOrderExecDataAllocation;
|
||||
using EventImp<uint32_t>::inOrderTimestampPacket;
|
||||
using EventImp<uint32_t>::inOrderExecEvent;
|
||||
using EventImp<uint32_t>::inOrderExecSignalValue;
|
||||
};
|
||||
|
||||
void SetUp() override {
|
||||
@@ -679,6 +678,12 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
createKernel();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
events.clear();
|
||||
|
||||
CommandListAppendLaunchKernel::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<L0::EventPool> createEvents(uint32_t numEvents, bool timestampEvent) {
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
@@ -723,15 +728,26 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
return cmdList;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void setTimestampPacketContextEndValue(TagNodeBase *node, typename GfxFamily::TimestampPacketType contextEndValue) {
|
||||
typename GfxFamily::TimestampPacketType data[] = {1, 1, contextEndValue, 1};
|
||||
|
||||
node->assignDataToAllTimestamps(0, data);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
TagNodeBase *getLatestTsNode(WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *immCmdList) {
|
||||
return immCmdList->timestampPacketContainer->peekNodes()[0];
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
uint32_t createdCmdLists = 0;
|
||||
std::vector<std::unique_ptr<MockEvent>> events;
|
||||
std::vector<std::unique_ptr<Mock<CommandQueue>>> mockCmdQs;
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
ze_group_count_t groupCount = {3, 2, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
std::vector<std::unique_ptr<MockEvent>> events;
|
||||
};
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEventState, IsAtLeastXeHpCore) {
|
||||
@@ -739,17 +755,19 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven
|
||||
|
||||
auto eventPool = createEvents(3, false);
|
||||
|
||||
EXPECT_EQ(nullptr, events[0]->inOrderTimestampPacket.get());
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_TRUE(events[0]->inOrderExecEvent);
|
||||
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation);
|
||||
ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get());
|
||||
EXPECT_EQ(1u, events[0]->inOrderTimestampPacket->peekNodes().size());
|
||||
|
||||
events[0]->reset();
|
||||
|
||||
EXPECT_FALSE(events[0]->inOrderExecEvent);
|
||||
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
|
||||
EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr);
|
||||
ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get());
|
||||
EXPECT_EQ(0u, events[0]->inOrderTimestampPacket->peekNodes().size());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) {
|
||||
@@ -763,7 +781,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto previousNode = immCmdList->timestampPacketContainer->peekNodes()[0];
|
||||
auto previousNode = getLatestTsNode(immCmdList.get());
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
@@ -833,27 +851,22 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleTimestam
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation);
|
||||
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType());
|
||||
|
||||
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->storeMakeResidentAllocations = true;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
auto node0 = immCmdList->timestampPacketContainer->peekNodes()[0];
|
||||
auto node0 = getLatestTsNode(immCmdList.get());
|
||||
ultCsr->getTimestampPacketAllocator()->getTag();
|
||||
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
auto node1 = immCmdList->timestampPacketContainer->peekNodes()[0];
|
||||
auto node1 = getLatestTsNode(immCmdList.get());
|
||||
ultCsr->getTimestampPacketAllocator()->getTag();
|
||||
|
||||
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
|
||||
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
auto node2 = immCmdList->timestampPacketContainer->peekNodes()[0];
|
||||
auto node2 = getLatestTsNode(immCmdList.get());
|
||||
ultCsr->getTimestampPacketAllocator()->getTag();
|
||||
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); // not used anymore
|
||||
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]);
|
||||
@@ -902,14 +915,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGettingNewNodeThenSwapWithDef
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
|
||||
uint64_t nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa1 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
|
||||
uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress();
|
||||
|
||||
EXPECT_NE(nodeGpuVa0, nodeGpuVa1);
|
||||
|
||||
@@ -918,7 +931,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGettingNewNodeThenSwapWithDef
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(2u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa2 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
|
||||
uint64_t nodeGpuVa2 = getLatestTsNode(immCmdList.get())->getGpuAddress();
|
||||
|
||||
EXPECT_NE(nodeGpuVa0, nodeGpuVa2);
|
||||
EXPECT_NE(nodeGpuVa1, nodeGpuVa2);
|
||||
@@ -953,7 +966,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
|
||||
nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation());
|
||||
EXPECT_EQ(nodeGpuVa0, postSync.getDestinationAddress());
|
||||
@@ -979,7 +992,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa1 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress();
|
||||
EXPECT_NE(nodeGpuVa0, nodeGpuVa1);
|
||||
EXPECT_EQ(nodeGpuVa0, immCmdList->deferredTimestampPackets->peekNodes()[0]->getGpuAddress());
|
||||
|
||||
@@ -1000,20 +1014,17 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(++semaphoreCmd);
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(nodeGpuVa1 + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(nodeGpuVa1 + node->getContextEndOffset(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
|
||||
*hostAddress = 1;
|
||||
setTimestampPacketContextEndValue<FamilyType>(node, 1);
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
|
||||
|
||||
*hostAddress = 2;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
|
||||
|
||||
*hostAddress = 3;
|
||||
setTimestampPacketContextEndValue<FamilyType>(node, 0x12345);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
|
||||
}
|
||||
|
||||
@@ -1059,7 +1070,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa0 = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
uint64_t nodeGpuVa0 = node->getGpuAddress() + node->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(nodeGpuVa0, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
@@ -1136,7 +1148,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
;
|
||||
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
@@ -1166,14 +1180,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenS
|
||||
|
||||
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
|
||||
|
||||
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
|
||||
auto gpuAddress = node->getGpuAddress() + node->getContextEndOffset();
|
||||
auto lowAddress = static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL);
|
||||
auto highAddress = static_cast<uint32_t>(gpuAddress >> 32);
|
||||
|
||||
EXPECT_EQ(lowAddress, pcCmd->getAddress());
|
||||
EXPECT_EQ(highAddress, pcCmd->getAddressHigh());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation());
|
||||
EXPECT_EQ(2u, pcCmd->getImmediateData());
|
||||
EXPECT_EQ(0u, pcCmd->getImmediateData());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) {
|
||||
@@ -1183,8 +1199,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
|
||||
setTimestampPacketContextEndValue<FamilyType>(node, 1);
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
uint32_t callCounter = 0;
|
||||
@@ -1193,7 +1210,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
|
||||
callCounter++;
|
||||
if (callCounter >= failCounter && !forceFail) {
|
||||
(*hostAddress)++;
|
||||
setTimestampPacketContextEndValue<FamilyType>(node, 0x123);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1203,7 +1220,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
EXPECT_EQ(1u, callCounter);
|
||||
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
EXPECT_EQ(1u, node->getContextEndValue(0));
|
||||
}
|
||||
|
||||
// timeout - not ready
|
||||
@@ -1213,7 +1230,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
EXPECT_EQ(1u, node->getContextEndValue(0));
|
||||
}
|
||||
|
||||
// gpu hang
|
||||
@@ -1224,7 +1241,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
EXPECT_EQ(1u, node->getContextEndValue(0));
|
||||
}
|
||||
|
||||
// success
|
||||
@@ -1237,7 +1254,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
EXPECT_EQ(failCounter, callCounter);
|
||||
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(0x123u, node->getContextEndValue(0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1250,16 +1267,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
uint32_t callCounter = 0;
|
||||
|
||||
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
|
||||
callCounter++;
|
||||
if (callCounter >= failCounter) {
|
||||
(*hostAddress)++;
|
||||
auto node = getLatestTsNode<gfxCoreFamily>(immCmdList.get());
|
||||
setTimestampPacketContextEndValue<FamilyType>(node, 0x123);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1278,8 +1293,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
|
||||
|
||||
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, nullptr, 1, &eventHandle, false);
|
||||
|
||||
auto node = getLatestTsNode<gfxCoreFamily>(immCmdList.get());
|
||||
|
||||
EXPECT_EQ(3u, callCounter);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(0x123u, node->getContextEndValue(0));
|
||||
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
|
||||
@@ -1293,9 +1310,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
void *deviceAlloc = nullptr;
|
||||
@@ -1351,7 +1365,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
@@ -1406,7 +1421,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
uint64_t nodeGpuVa = immCmdList->timestampPacketContainer->peekNodes()[0]->getGpuAddress() + immCmdList->timestampPacketContainer->peekNodes()[0]->getContextEndOffset();
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
|
||||
@@ -2864,16 +2864,23 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAl
|
||||
downloadAllocationTrack[&gfxAllocation]++;
|
||||
};
|
||||
|
||||
uint32_t storage = 1;
|
||||
auto node = ultCsr->getTimestampPacketAllocator()->getTag();
|
||||
NEO::TimestampPacketContainer container;
|
||||
container.add(node);
|
||||
|
||||
NEO::MockGraphicsAllocation allocation(&storage, sizeof(storage));
|
||||
event->enableInOrderExecMode(allocation, 1);
|
||||
typename FamilyType::TimestampPacketType data[] = {0, 0, 0, 0};
|
||||
|
||||
node->assignDataToAllTimestamps(0, data);
|
||||
|
||||
event->enableInOrderExecMode(container);
|
||||
|
||||
auto allocation = node->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
|
||||
|
||||
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
|
||||
auto result = event->hostSynchronize(timeout);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_NE(0u, downloadAllocationTrack[&allocation]);
|
||||
EXPECT_NE(0u, downloadAllocationTrack[allocation]);
|
||||
EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user