mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-01 12:33:12 +08:00
fix: improve TBX downloading after L0 Event sync
Related-To: HSD-18038498579 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4b01058706
commit
696b02bfd3
@@ -1015,9 +1015,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
}
|
||||
|
||||
if (this->isTbxMode && (status == ZE_RESULT_SUCCESS)) {
|
||||
mainQueueCsr->downloadAllocations();
|
||||
mainQueueCsr->downloadAllocations(true);
|
||||
if (isCopyOffloadEnabled()) {
|
||||
copyOffloadCsr->downloadAllocations();
|
||||
copyOffloadCsr->downloadAllocations(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ struct EventImp : public Event {
|
||||
|
||||
protected:
|
||||
ze_result_t waitForUserFence(uint64_t timeout);
|
||||
void downloadAllTbxAllocations();
|
||||
|
||||
bool handlePreQueryStatusOperationsAndCheckCompletion();
|
||||
bool tbxDownload(NEO::CommandStreamReceiver &csr, bool &downloadedAllocation, bool &downloadedInOrdedAllocation);
|
||||
|
||||
@@ -250,12 +250,38 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::downloadAllTbxAllocations() {
|
||||
for (auto &csr : csrs) {
|
||||
csr->downloadAllocations(true);
|
||||
}
|
||||
|
||||
for (auto &subDevice : this->device->getNEODevice()->getRootDevice()->getSubDevices()) {
|
||||
for (auto const &engine : subDevice->getAllEngines()) {
|
||||
auto osContextId = engine.commandStreamReceiver->getOsContext().getContextId();
|
||||
|
||||
auto poolAllocation = getPoolAllocation(this->device);
|
||||
bool isUsed = (poolAllocation && poolAllocation->isUsedByOsContext(osContextId));
|
||||
|
||||
if (inOrderExecInfo) {
|
||||
if (inOrderExecInfo->getDeviceCounterAllocation()) {
|
||||
isUsed |= inOrderExecInfo->getDeviceCounterAllocation()->isUsedByOsContext(osContextId);
|
||||
} else {
|
||||
DEBUG_BREAK_IF(true); // external allocation - not able to download
|
||||
}
|
||||
}
|
||||
|
||||
if (isUsed) {
|
||||
engine.commandStreamReceiver->downloadAllocations(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::handleSuccessfulHostSynchronization() {
|
||||
if (this->tbxMode) {
|
||||
for (auto &csr : csrs) {
|
||||
csr->downloadAllocations();
|
||||
}
|
||||
downloadAllTbxAllocations();
|
||||
}
|
||||
this->setIsCompleted();
|
||||
unsetCmdQueue();
|
||||
|
||||
@@ -25,7 +25,7 @@ Fence *Fence::create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc) {
|
||||
|
||||
ze_result_t Fence::queryStatus() {
|
||||
auto csr = cmdQueue->getCsr();
|
||||
csr->downloadAllocations();
|
||||
csr->downloadAllocations(true);
|
||||
|
||||
auto *hostAddr = csr->getTagAddress();
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ struct WhiteBox<::L0::Event> : public ::L0::Event {
|
||||
using BaseClass::counterBasedMode;
|
||||
using BaseClass::csrs;
|
||||
using BaseClass::Event;
|
||||
using BaseClass::eventPoolAllocation;
|
||||
using BaseClass::gpuHangCheckPeriod;
|
||||
using BaseClass::hostAddress;
|
||||
using BaseClass::isFromIpcPool;
|
||||
|
||||
@@ -4332,14 +4332,82 @@ HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventUsedCreatedOnSubDeviceBu
|
||||
auto eventAllocation = event->getPoolAllocation(device);
|
||||
ultCsr0->makeResident(*eventAllocation);
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(event->getCompletionFieldHostAddress());
|
||||
*hostAddress = Event::STATE_SIGNALED;
|
||||
|
||||
event->hostSynchronize(1);
|
||||
|
||||
EXPECT_EQ(1u, ultCsr0->downloadAllocationsCalledCount);
|
||||
EXPECT_FALSE(ultCsr0->latestDownloadAllocationsBlocking);
|
||||
EXPECT_EQ(1u, downloadCounter0);
|
||||
|
||||
EXPECT_EQ(1u, ultCsr1->downloadAllocationsCalledCount);
|
||||
EXPECT_TRUE(ultCsr1->latestDownloadAllocationsBlocking);
|
||||
EXPECT_EQ(0u, downloadCounter1);
|
||||
|
||||
event->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventCounterBasedUsedCreatedOnSubDeviceButUsedOnDifferentSubdeviceWhenQueryingThenDownload, IsAtLeastXeHpCore) {
|
||||
neoDevice->getExecutionEnvironment()->calculateMaxOsContextCount();
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique<NEO::MockMemoryOperations>();
|
||||
|
||||
auto rootDevice = static_cast<MockDeviceImp *>(device);
|
||||
|
||||
ASSERT_TRUE(rootDevice->subDevices.size() > 1);
|
||||
|
||||
auto subDevice0 = rootDevice->subDevices[0];
|
||||
auto subDevice1 = rootDevice->subDevices[1];
|
||||
|
||||
auto ultCsr0 = static_cast<UltCommandStreamReceiver<FamilyType> *>(subDevice0->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
auto ultCsr1 = static_cast<UltCommandStreamReceiver<FamilyType> *>(subDevice1->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
ultCsr0->commandStreamReceiverType = CommandStreamReceiverType::tbx;
|
||||
ultCsr1->commandStreamReceiverType = CommandStreamReceiverType::tbx;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
|
||||
eventPoolDesc.count = 2;
|
||||
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
|
||||
auto event0 = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, subDevice1));
|
||||
auto event1 = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, subDevice1));
|
||||
event0->eventPoolAllocation = nullptr;
|
||||
event1->eventPoolAllocation = nullptr;
|
||||
|
||||
auto inOrderExecInfo0 = NEO::InOrderExecInfo::create(device->getDeviceInOrderCounterAllocator()->getTag(), nullptr, *device->getNEODevice(), 1, false);
|
||||
inOrderExecInfo0->setLastWaitedCounterValue(1);
|
||||
event0->updateInOrderExecState(inOrderExecInfo0, 1, 0);
|
||||
|
||||
auto inOrderExecInfo1 = NEO::InOrderExecInfo::createFromExternalAllocation(*device->getNEODevice(), 0x1, nullptr, nullptr, 1);
|
||||
inOrderExecInfo1->setLastWaitedCounterValue(1);
|
||||
event1->updateInOrderExecState(inOrderExecInfo1, 1, 0);
|
||||
|
||||
ultCsr0->makeResident(*inOrderExecInfo0->getDeviceCounterAllocation());
|
||||
|
||||
event0->hostSynchronize(1);
|
||||
|
||||
EXPECT_EQ(1u, ultCsr0->downloadAllocationsCalledCount);
|
||||
EXPECT_FALSE(ultCsr0->latestDownloadAllocationsBlocking);
|
||||
|
||||
EXPECT_EQ(1u, ultCsr1->downloadAllocationsCalledCount);
|
||||
EXPECT_TRUE(ultCsr1->latestDownloadAllocationsBlocking);
|
||||
|
||||
event1->hostSynchronize(1);
|
||||
|
||||
EXPECT_EQ(1u, ultCsr0->downloadAllocationsCalledCount);
|
||||
EXPECT_FALSE(ultCsr0->latestDownloadAllocationsBlocking);
|
||||
|
||||
EXPECT_EQ(2u, ultCsr1->downloadAllocationsCalledCount);
|
||||
EXPECT_TRUE(ultCsr1->latestDownloadAllocationsBlocking);
|
||||
|
||||
event0->destroy();
|
||||
event1->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenDynamicPacketEstimationWhenGettingMaxPacketFromSingleOneTileDeviceThenMaxFromThisDeviceSelected, IsAtLeastXeHpCore) {
|
||||
testSingleDevice();
|
||||
}
|
||||
|
||||
@@ -178,10 +178,10 @@ bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEngine
|
||||
}
|
||||
|
||||
if (waited) {
|
||||
getGpgpuCommandStreamReceiver().downloadAllocations();
|
||||
getGpgpuCommandStreamReceiver().downloadAllocations(true);
|
||||
for (const auto ©Engine : copyEnginesToWait) {
|
||||
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
|
||||
bcsCsr->downloadAllocations();
|
||||
bcsCsr->downloadAllocations(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -832,12 +832,12 @@ bool Event::areTimestampsCompleted() {
|
||||
}
|
||||
}
|
||||
}
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocations();
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocations(true);
|
||||
const auto &bcsStates = this->cmdQueue->peekActiveBcsStates();
|
||||
for (auto currentBcsIndex = 0u; currentBcsIndex < bcsStates.size(); currentBcsIndex++) {
|
||||
const auto &state = bcsStates[currentBcsIndex];
|
||||
if (state.isValid()) {
|
||||
this->cmdQueue->getBcsCommandStreamReceiver(state.engineType)->downloadAllocations();
|
||||
this->cmdQueue->getBcsCommandStreamReceiver(state.engineType)->downloadAllocations(true);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -1035,7 +1035,7 @@ bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddr
|
||||
pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset);
|
||||
}
|
||||
|
||||
downloadAllocations();
|
||||
downloadAllocations(true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -229,7 +229,7 @@ class CommandStreamReceiver {
|
||||
virtual WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait);
|
||||
WaitStatus baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams ¶ms, TaskCountType taskCountToWait);
|
||||
MOCKABLE_VIRTUAL bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait);
|
||||
virtual void downloadAllocations(){};
|
||||
virtual void downloadAllocations(bool blockingWait){};
|
||||
virtual void removeDownloadAllocation(GraphicsAllocation *alloc){};
|
||||
|
||||
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
|
||||
|
||||
@@ -45,7 +45,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
|
||||
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
|
||||
WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override;
|
||||
void downloadAllocations() override;
|
||||
void downloadAllocations(bool blockingWait) override;
|
||||
void downloadAllocationTbx(GraphicsAllocation &gfxAllocation);
|
||||
void removeDownloadAllocation(GraphicsAllocation *alloc) override;
|
||||
|
||||
|
||||
@@ -576,19 +576,34 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocationTbx(GraphicsAlloca
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations(bool blockingWait) {
|
||||
TaskCountType taskCountToWait = this->latestFlushedTaskCount;
|
||||
|
||||
volatile TagAddressType *pollAddress = this->getTagAddress();
|
||||
|
||||
for (uint32_t i = 0; i < this->activePartitions; i++) {
|
||||
while (*pollAddress < this->latestFlushedTaskCount) {
|
||||
while (*pollAddress < taskCountToWait) {
|
||||
if (!blockingWait) {
|
||||
return;
|
||||
}
|
||||
this->downloadAllocation(*this->getTagAllocation());
|
||||
}
|
||||
pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset);
|
||||
}
|
||||
auto lockCSR = this->obtainUniqueOwnership();
|
||||
|
||||
std::vector<GraphicsAllocation *> notReadyAllocations;
|
||||
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
this->downloadAllocation(*graphicsAllocation);
|
||||
|
||||
// Used again while waiting for completion. Another download will be needed.
|
||||
if (graphicsAllocation->getTaskCount(this->osContext->getContextId()) > taskCountToWait) {
|
||||
notReadyAllocations.push_back(graphicsAllocation);
|
||||
}
|
||||
}
|
||||
this->allocationsForDownload.clear();
|
||||
this->allocationsForDownload = std::set<GraphicsAllocation *>(notReadyAllocations.begin(), notReadyAllocations.end());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -272,8 +272,9 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
}
|
||||
void setPreemptionAllocation(GraphicsAllocation *allocation) { this->preemptionAllocation = allocation; }
|
||||
|
||||
void downloadAllocations() override {
|
||||
void downloadAllocations(bool blockingWait) override {
|
||||
downloadAllocationsCalledCount++;
|
||||
latestDownloadAllocationsBlocking = blockingWait;
|
||||
}
|
||||
|
||||
void downloadAllocationUlt(GraphicsAllocation &gfxAllocation) {
|
||||
@@ -558,6 +559,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
std::optional<SubmissionStatus> flushReturnValue{};
|
||||
CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::hardware;
|
||||
std::atomic<uint32_t> downloadAllocationsCalledCount = 0;
|
||||
std::atomic<bool> latestDownloadAllocationsBlocking = false;
|
||||
|
||||
bool renderStateCacheFlushed = false;
|
||||
bool renderStateCacheDcFlushForced = false;
|
||||
|
||||
@@ -177,7 +177,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
return commandStreamReceiverType;
|
||||
}
|
||||
|
||||
void downloadAllocations() override {
|
||||
void downloadAllocations(bool blockingWait) override {
|
||||
downloadAllocationsCalledCount++;
|
||||
}
|
||||
|
||||
|
||||
@@ -350,7 +350,7 @@ TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotCh
|
||||
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
auto numEvictionAllocsBefore = commandStreamReceiver->getEvictionAllocations().size();
|
||||
commandStreamReceiver->CommandStreamReceiver::downloadAllocations();
|
||||
commandStreamReceiver->CommandStreamReceiver::downloadAllocations(true);
|
||||
auto numEvictionAllocsAfter = commandStreamReceiver->getEvictionAllocations().size();
|
||||
EXPECT_EQ(numEvictionAllocsBefore, numEvictionAllocsAfter);
|
||||
EXPECT_EQ(0u, numEvictionAllocsAfter);
|
||||
|
||||
@@ -473,14 +473,48 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenDownloadAllocatoinsCalledThen
|
||||
MockGraphicsAllocation allocation1, allocation2, allocation3;
|
||||
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
|
||||
|
||||
allocation1.updateTaskCount(0, tbxCsr.getOsContext().getContextId());
|
||||
allocation2.updateTaskCount(0, tbxCsr.getOsContext().getContextId());
|
||||
allocation3.updateTaskCount(0, tbxCsr.getOsContext().getContextId());
|
||||
|
||||
EXPECT_EQ(0u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
tbxCsr.downloadAllocations();
|
||||
tbxCsr.downloadAllocations(true);
|
||||
EXPECT_EQ(1u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
|
||||
std::set<GraphicsAllocation *> expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3};
|
||||
EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size());
|
||||
}
|
||||
|
||||
HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenUpdatingTaskCountDuringWaitThenDontRemoveFromContainer) {
|
||||
MockTbxCsrRegisterDownloadedAllocations<FamilyType> tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()};
|
||||
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(pDevice->getDeviceBitfield()));
|
||||
|
||||
tbxCsr.setupContext(osContext);
|
||||
tbxCsr.initializeTagAllocation();
|
||||
*tbxCsr.getTagAddress() = 0u;
|
||||
tbxCsr.latestFlushedTaskCount = 1;
|
||||
|
||||
MockGraphicsAllocation allocation1, allocation2, allocation3;
|
||||
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
|
||||
|
||||
tbxCsr.makeResident(allocation1);
|
||||
tbxCsr.makeResident(allocation2);
|
||||
tbxCsr.makeResident(allocation3);
|
||||
|
||||
allocation2.updateTaskCount(2u, tbxCsr.getOsContext().getContextId());
|
||||
|
||||
tbxCsr.downloadAllocations(false);
|
||||
EXPECT_EQ(0u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
EXPECT_EQ(3u, tbxCsr.allocationsForDownload.size());
|
||||
|
||||
*tbxCsr.getTagAddress() = 1u;
|
||||
|
||||
tbxCsr.downloadAllocations(false);
|
||||
EXPECT_EQ(1u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
EXPECT_EQ(1u, tbxCsr.allocationsForDownload.size());
|
||||
EXPECT_NE(tbxCsr.allocationsForDownload.find(&allocation2), tbxCsr.allocationsForDownload.end());
|
||||
}
|
||||
|
||||
HWTEST_F(TbxCommandSteamSimpleTest, whenTbxCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) {
|
||||
MockTbxCsr<FamilyType> tbxCsr(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,4 +11,4 @@
|
||||
#include "shared/test/unit_test/gen12lp/compute_mode_tests_gen12lp.inl"
|
||||
#include "shared/test/unit_test/gen12lp/tbx_command_stream_receiver_tests_gen12lp.inl"
|
||||
#include "shared/test/unit_test/gen12lp/test_device_caps_gen12lp.inl"
|
||||
#include "shared/test/unit_test/gen12lp/test_sample_gen12lp.inl"
|
||||
#include "shared/test/unit_test/gen12lp/test_sample_gen12lp.inl"
|
||||
|
||||
Reference in New Issue
Block a user