mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
feature: wait only for 1 semaphore in atomic signalling mode
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
93c2634c93
commit
9c53d9a712
@@ -340,7 +340,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
|
||||
void addCmdForPatching(std::shared_ptr<InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
|
||||
|
||||
bool inOrderAtomicSignallingEnabled() const;
|
||||
bool inOrderAtomicSignallingEnabled() const override;
|
||||
uint64_t getInOrderIncrementValue() const;
|
||||
|
||||
InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
|
||||
|
||||
@@ -2420,7 +2420,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
|
||||
uint64_t gpuAddress = dependencyCounterAllocation.getGpuAddress() + offset;
|
||||
|
||||
for (uint32_t i = 0; i < this->partitionCount; i++) {
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true, isQwordInOrderCounter());
|
||||
|
||||
|
||||
@@ -1284,9 +1284,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
|
||||
|
||||
bool signaled = true;
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(inOrderExecInfo->getDeviceCounterAllocation().getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getHostAddress(), this->inOrderAllocationOffset);
|
||||
|
||||
for (uint32_t i = 0; i < this->partitionCount; i++) {
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
|
||||
@@ -234,9 +234,7 @@ void CommandListImp::enableInOrderExecution() {
|
||||
|
||||
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
|
||||
|
||||
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
|
||||
inOrderExecInfo = std::make_shared<InOrderExecInfo>(*inOrderDependencyCounterAllocation, *device->getMemoryManager(), (this->cmdListType == TYPE_REGULAR));
|
||||
inOrderExecInfo = std::make_shared<InOrderExecInfo>(*inOrderDependencyCounterAllocation, *device->getMemoryManager(), this->partitionCount, (this->cmdListType == TYPE_REGULAR), inOrderAtomicSignallingEnabled());
|
||||
}
|
||||
|
||||
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
|
||||
|
||||
@@ -44,6 +44,8 @@ struct CommandListImp : public CommandList {
|
||||
|
||||
~CommandListImp() override = default;
|
||||
|
||||
virtual bool inOrderAtomicSignallingEnabled() const = 0;
|
||||
|
||||
static constexpr int32_t cmdListDefaultEngineInstancedDevice = NEO::StreamProperty::initValue;
|
||||
static constexpr bool cmdListDefaultCoherency = false;
|
||||
static constexpr bool cmdListDefaultDisableOverdispatch = true;
|
||||
|
||||
@@ -163,11 +163,11 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(inOrderExecInfo->getDeviceCounterAllocation().getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getHostAddress(), this->inOrderAllocationOffset);
|
||||
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
|
||||
bool signaled = true;
|
||||
|
||||
for (uint32_t i = 0; i < this->getPacketsInUse(); i++) {
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
@@ -438,7 +438,7 @@ ze_result_t EventImp<TagSizeT>::waitForUserFence(uint64_t timeout) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->getDeviceCounterAllocation().getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->getHostAddress(), this->inOrderAllocationOffset));
|
||||
|
||||
if (!csrs[0]->waitUserFence(getInOrderExecSignalValueWithSubmissionCounter(), waitAddress, timeout)) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
|
||||
@@ -19,8 +19,15 @@ InOrderExecInfo::~InOrderExecInfo() {
|
||||
memoryManager.freeGraphicsMemory(&deviceCounterAllocation);
|
||||
}
|
||||
|
||||
InOrderExecInfo::InOrderExecInfo(NEO::GraphicsAllocation &deviceCounterAllocation, NEO::MemoryManager &memoryManager, bool regularCmdList)
|
||||
InOrderExecInfo::InOrderExecInfo(NEO::GraphicsAllocation &deviceCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling)
|
||||
: deviceCounterAllocation(deviceCounterAllocation), memoryManager(memoryManager), regularCmdList(regularCmdList) {
|
||||
|
||||
numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount;
|
||||
numHostPartitionsToWait = partitionCount;
|
||||
|
||||
hostAddress = reinterpret_cast<uint64_t *>(deviceCounterAllocation.getUnderlyingBuffer());
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
void InOrderExecInfo::reset() {
|
||||
|
||||
@@ -27,9 +27,10 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
InOrderExecInfo() = delete;
|
||||
|
||||
InOrderExecInfo(NEO::GraphicsAllocation &deviceCounterAllocation, NEO::MemoryManager &memoryManager, bool regularCmdList);
|
||||
InOrderExecInfo(NEO::GraphicsAllocation &deviceCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling);
|
||||
|
||||
NEO::GraphicsAllocation &getDeviceCounterAllocation() const { return deviceCounterAllocation; }
|
||||
uint64_t *getHostAddress() const { return hostAddress; }
|
||||
|
||||
uint64_t getCounterValue() const { return counterValue; }
|
||||
void addCounterValue(uint64_t addValue) { counterValue += addValue; }
|
||||
@@ -40,6 +41,9 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
bool isRegularCmdList() const { return regularCmdList; }
|
||||
|
||||
uint32_t getNumDevicePartitionsToWait() const { return numDevicePartitionsToWait; }
|
||||
uint32_t getNumHostPartitionsToWait() const { return numHostPartitionsToWait; }
|
||||
|
||||
void reset();
|
||||
|
||||
protected:
|
||||
@@ -47,6 +51,9 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
NEO::MemoryManager &memoryManager;
|
||||
uint64_t counterValue = 0;
|
||||
uint64_t regularCmdListSubmissionCounter = 0;
|
||||
uint64_t *hostAddress = nullptr;
|
||||
uint32_t numDevicePartitionsToWait = 0;
|
||||
uint32_t numHostPartitionsToWait = 0;
|
||||
bool regularCmdList = false;
|
||||
};
|
||||
|
||||
|
||||
@@ -293,6 +293,7 @@ struct MockCommandList : public CommandList {
|
||||
ADDMETHOD_NOBASE(close, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(patchInOrderCmds, (void));
|
||||
ADDMETHOD_CONST_NOBASE(inOrderAtomicSignallingEnabled, bool, false, (void));
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(ze_kernel_handle_t kernelHandle,
|
||||
|
||||
@@ -4164,6 +4164,47 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenSignalli
|
||||
EXPECT_EQ(0u, atomicCmd->getCsStall());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenWaitingForDependencyThenUseOnlyOneSemaphore, IsAtLeastXeHpCore) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
debugManager.flags.InOrderAtomicSignallingEnabled.set(1);
|
||||
|
||||
auto immCmdList1 = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
auto immCmdList2 = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto handle = events[0]->toHandle();
|
||||
|
||||
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, handle, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_EQ(partitionCount, immCmdList1->inOrderExecInfo->getCounterValue());
|
||||
|
||||
auto cmdStream = immCmdList2->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &handle, launchParams, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(2u + (ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired() ? 1 : 0), semaphores.size());
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
|
||||
// implicit dependency
|
||||
auto gpuAddress = immCmdList2->inOrderExecInfo->getDeviceCounterAllocation().getGpuAddress();
|
||||
|
||||
ASSERT_TRUE(verifyInOrderDependency<FamilyType>(itor, partitionCount, gpuAddress, immCmdList2->isQwordInOrderCounter()));
|
||||
|
||||
// event
|
||||
ASSERT_TRUE(verifyInOrderDependency<FamilyType>(itor, partitionCount, events[0]->inOrderExecInfo->getDeviceCounterAllocation().getGpuAddress(), immCmdList2->isQwordInOrderCounter()));
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgrammingWaitOnEventsThenHandleAllEventPackets, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
@@ -3274,7 +3274,8 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
|
||||
|
||||
auto syncAllocation = new NEO::MockGraphicsAllocation(&storage, sizeof(storage));
|
||||
|
||||
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(*syncAllocation, *neoDevice->getMemoryManager(), false);
|
||||
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(*syncAllocation, *neoDevice->getMemoryManager(), 1, false, false);
|
||||
*inOrderExecInfo->getHostAddress() = 1;
|
||||
|
||||
event->enableCounterBasedMode(true);
|
||||
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
|
||||
|
||||
Reference in New Issue
Block a user