feature: add 64-bit semaphore command

Related-To: NEO-15636

Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2025-12-03 17:12:39 +00:00
committed by Compute-Runtime-Automation
parent 6bd1076039
commit 2c3b6a8760
21 changed files with 331 additions and 110 deletions

View File

@@ -1920,7 +1920,6 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::addHostFunctionToPatchCommands(const NEO::HostFunction &hostFunction) {
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto additionalSize = 2u;
@@ -1932,7 +1931,7 @@ void CommandListCoreFamily<gfxCoreFamily>::addHostFunctionToPatchCommands(const
.type = CommandToPatch::HostFunctionId,
.isInOrder = hostFunction.isInOrder});
commandsToPatch.push_back({.pCommand = commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT)),
commandsToPatch.push_back({.pCommand = commandContainer.getCommandStream()->getSpace(NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait()),
.type = CommandToPatch::HostFunctionWait});
}
@@ -3276,13 +3275,13 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
}
}
auto semaphoreCommand = reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT)));
auto semaphoreCommand = reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandContainer.getCommandStream()->getSpace(NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait()));
if (!noopDispatch) {
NEO::EncodeSemaphore<GfxFamily>::programMiSemaphoreWait(semaphoreCommand, gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD,
false, true, isQwordInOrderCounter(), indirectMode, false);
} else {
memset(semaphoreCommand, 0, sizeof(MI_SEMAPHORE_WAIT));
memset(semaphoreCommand, 0, NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait());
}
if (patchingRequired && !isQwordInOrderCounter()) {

View File

@@ -58,8 +58,7 @@ void MutableSemaphoreWaitHw<GfxFamily>::restoreWithSemaphoreAddress(GpuAddress s
template <typename GfxFamily>
void MutableSemaphoreWaitHw<GfxFamily>::setSemaphoreValue(uint64_t value) {
auto semWaitCmd = reinterpret_cast<SemaphoreWait *>(semWait);
semWaitCmd->setSemaphoreDataDword(static_cast<uint32_t>(value));
NEO::EncodeSemaphore<GfxFamily>::setMiSemaphoreWaitValue(semWait, value);
}
} // namespace L0::MCL

View File

@@ -1837,7 +1837,7 @@ HWTEST_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceFor
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
constexpr uint32_t numEvents = 100;
constexpr size_t eventWaitSize = numEvents * NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait();
size_t eventWaitSize = numEvents * NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait();
size_t useSize = commandList->getCmdContainer().getCommandStream()->getMaxAvailableSpace() - (commonImmediateCommandSize + eventWaitSize) + 1;

View File

@@ -589,13 +589,12 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
auto cmdStream = immediateCommandList->getCmdContainer().getCommandStream();
constexpr size_t sizeBarrierCommands = sizeof(PIPE_CONTROL) +
sizeof(MI_ATOMIC) +
NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START);
size_t sizeBarrierCommands = sizeof(PIPE_CONTROL) +
sizeof(MI_ATOMIC) +
NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START);
constexpr size_t expectedSize = sizeBarrierCommands +
2 * sizeof(uint32_t);
size_t expectedSize = sizeBarrierCommands + 2 * sizeof(uint32_t);
size_t estimatedSize = immediateCommandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment());
size_t usedBeforeSize = cmdStream->getUsed();

View File

@@ -91,11 +91,6 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir
constexpr uint32_t numberOfBlts = 3;
constexpr size_t bltSize = (numberOfBlts * max2DBlitSize);
waArgs.isWaRequired = true;
auto expectedSize = (cmdsSizePerBlit * numberOfBlts) + debugCommandsSize + (2 * MemorySynchronizationCommands<FamilyType>::getSizeForAdditionalSynchronization(NEO::FenceType::release, pDevice->getRootDeviceEnvironment())) +
EncodeMiFlushDW<FamilyType>::getCommandSizeWithWa(waArgs) + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
MockGraphicsAllocation bufferMockAllocation(0, 1u, AllocationType::buffer, reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::localMemory, MemoryManager::maxOsContextCount);
MockGraphicsAllocation hostMockAllocation(0, 1u, AllocationType::externalHostPtr, reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::system64KBPages, MemoryManager::maxOsContextCount);
@@ -106,6 +101,19 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
waArgs.isWaRequired = true;
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto expectedSize = (cmdsSizePerBlit * numberOfBlts) + debugCommandsSize + (2 * MemorySynchronizationCommands<FamilyType>::getSizeForAdditionalSynchronization(NEO::FenceType::release, rootDeviceEnvironment)) +
EncodeMiFlushDW<FamilyType>::getCommandSizeWithWa(waArgs) + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
bool deviceToHostPostSyncFenceRequired = rootDeviceEnvironment.getProductHelper().isDeviceToHostCopySignalingFenceRequired() &&
!hostMockAllocation.isAllocatedInLocalMemoryPool() &&
bufferMockAllocation.isAllocatedInLocalMemoryPool();
if (deviceToHostPostSyncFenceRequired) {
expectedSize += MemorySynchronizationCommands<FamilyType>::getSizeForAdditionalSynchronization(NEO::FenceType::release, rootDeviceEnvironment);
}
expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
blitPropertiesContainer, false, true, false, false, pClDevice->getRootDeviceEnvironment());

View File

@@ -556,7 +556,7 @@ struct EncodeSemaphore {
static constexpr uint32_t invalidHardwareTag = -2;
static void programMiSemaphoreWaitCommand(LinearStream *commandStream,
MI_SEMAPHORE_WAIT *cmd,
void *cmdBuffer,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
@@ -586,13 +586,9 @@ struct EncodeSemaphore {
bool switchOnUnsuccessful,
void **outSemWaitCmd);
static void applyMiSemaphoreWaitCommand(LinearStream &commandStream,
std::list<void *> &commandsList);
static size_t getSizeMiSemaphoreWait();
static constexpr size_t getSizeMiSemaphoreWait() { return sizeof(MI_SEMAPHORE_WAIT); }
protected:
static void appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool indirect, bool useQwordData, bool switchOnUnsuccessful);
static void setMiSemaphoreWaitValue(void *cmd, uint64_t semaphoreValue);
};
template <typename GfxFamily>

View File

@@ -874,26 +874,9 @@ inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(con
return requiredSize;
}
template <typename Family>
void EncodeSemaphore<Family>::addMiSemaphoreWaitCommand(LinearStream &commandStream,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful,
void **outSemWaitCmd) {
auto semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
if (outSemWaitCmd != nullptr) {
*outSemWaitCmd = semaphoreCommand;
}
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, true, useQwordData, indirect, switchOnUnsuccessful);
}
template <typename Family>
void EncodeSemaphore<Family>::programMiSemaphoreWaitCommand(LinearStream *commandStream,
MI_SEMAPHORE_WAIT *semaphoreCommand,
void *cmdBuffer,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
@@ -902,17 +885,11 @@ void EncodeSemaphore<Family>::programMiSemaphoreWaitCommand(LinearStream *comman
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful) {
if (semaphoreCommand == nullptr) {
if (cmdBuffer == nullptr) {
DEBUG_BREAK_IF(commandStream == nullptr);
semaphoreCommand = commandStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
cmdBuffer = commandStream->getSpace(EncodeSemaphore<Family>::getSizeMiSemaphoreWait());
}
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, waitMode, useQwordData, indirect, switchOnUnsuccessful);
}
template <typename Family>
void EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(LinearStream &commandStream, std::list<void *> &commandsList) {
MI_SEMAPHORE_WAIT *semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
commandsList.push_back(semaphoreCommand);
programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(cmdBuffer), compareAddress, compareData, compareMode, registerPollMode, waitMode, useQwordData, indirect, switchOnUnsuccessful);
}
template <typename Family>

View File

@@ -26,9 +26,54 @@ void EncodeSurfaceState<Family>::convertSurfaceStateToPacked(R_SURFACE_STATE *su
}
template <typename Family>
void EncodeSemaphore<Family>::appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool indirect, bool useQwordData, bool switchOnUnsuccessful) {
void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool waitMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful) {
constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
localCmd.setCompareOperation(compareMode);
localCmd.setSemaphoreDataDword(static_cast<uint32_t>(compareData));
localCmd.setSemaphoreGraphicsAddress(compareAddress);
localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE);
localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL);
localCmd.setIndirectSemaphoreDataDword(indirect);
*cmd = localCmd;
}
template <typename Family>
void EncodeSemaphore<Family>::setMiSemaphoreWaitValue(void *cmd, uint64_t semaphoreValue) {
reinterpret_cast<Family::MI_SEMAPHORE_WAIT *>(cmd)->setSemaphoreDataDword(static_cast<uint32_t>(semaphoreValue));
}
template <typename Family>
size_t EncodeSemaphore<Family>::getSizeMiSemaphoreWait() {
return sizeof(MI_SEMAPHORE_WAIT);
}
template <typename Family>
void EncodeSemaphore<Family>::addMiSemaphoreWaitCommand(LinearStream &commandStream,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful,
void **outSemWaitCmd) {
auto semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
if (outSemWaitCmd != nullptr) {
*outSemWaitCmd = semaphoreCommand;
}
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, true, useQwordData, indirect, switchOnUnsuccessful);
}
template <typename Family>

View File

@@ -340,7 +340,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
void *semaphoreCmd = listCmdBufferStream->getSpace(EncodeSemaphore<Family>::getSizeMiSemaphoreWait());
args.additionalCommands->push_back(semaphoreCmd);
}
}
@@ -480,7 +481,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
void *semaphoreCmd = listCmdBufferStream->getSpace(EncodeSemaphore<Family>::getSizeMiSemaphoreWait());
args.additionalCommands->push_back(semaphoreCmd);
}
}
}
@@ -830,29 +832,6 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs
surfaceState->setCompressionFormat(compressionFormat);
}
template <typename Family>
void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool waitMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful) {
MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
localCmd.setCompareOperation(compareMode);
localCmd.setSemaphoreDataDword(static_cast<uint32_t>(compareData));
localCmd.setSemaphoreGraphicsAddress(compareAddress);
localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE);
localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL);
localCmd.setIndirectSemaphoreDataDword(indirect);
EncodeSemaphore<Family>::appendSemaphoreCommand(localCmd, compareData, indirect, useQwordData, switchOnUnsuccessful);
*cmd = localCmd;
}
template <typename Family>
inline void EncodeWA<Family>::encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline,
const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs) {}

View File

@@ -40,13 +40,11 @@ void HostFunctionHelper<GfxFamily>::programHostFunctionId(LinearStream *commandS
template <typename GfxFamily>
void HostFunctionHelper<GfxFamily>::programHostFunctionWaitForCompletion(LinearStream *commandStream, void *cmdBuffer, const HostFunctionStreamer &streamer) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto idGpuAddress = streamer.getHostFunctionIdGpuAddress();
auto waitValue = HostFunctionStatus::completed;
EncodeSemaphore<GfxFamily>::programMiSemaphoreWaitCommand(commandStream,
static_cast<MI_SEMAPHORE_WAIT *>(cmdBuffer),
cmdBuffer,
idGpuAddress,
waitValue,
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,

View File

@@ -311,7 +311,7 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired) {
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize()
: EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait();
semaphoreSize += getSizePrefetchMitigation();

View File

@@ -158,7 +158,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
{
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionStart);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getSemaphoreSectionSize()), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10 + 4, 0, true, isBcs);
EncodeAluHelper<GfxFamily, 4> aluHelper({{
@@ -181,7 +181,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingSchedulerSection(uint32_t value) {
LinearStream schedulerCmdStream(this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
LinearStream schedulerCmdStream(this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
// 1. Init section
@@ -208,9 +208,9 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
// skip patching End section
auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
memcpy_s(dst, RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize,
this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
memcpy_s(dst, RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize(),
this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
}
template <typename GfxFamily, typename Dispatcher>
@@ -313,8 +313,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
UNRECOVERABLE_IF(stream.getUsed() != RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
// Scheduler section
preinitializedRelaxedOrderingScheduler = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
LinearStream schedulerStream(preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
preinitializedRelaxedOrderingScheduler = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
LinearStream schedulerStream(preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress();
@@ -340,7 +340,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR5, 0, true, isBcs);
}
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::getTotalSize());
}
template <typename GfxFamily, typename Dispatcher>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -93,15 +93,23 @@ struct DynamicSchedulerSizeAndOffsetSection {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
static constexpr uint64_t initSectionSize = (3 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START);
static constexpr size_t initSectionSize = (3 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START);
static constexpr uint64_t semaphoreSectionStart = initSectionSize;
static constexpr uint64_t semaphoreSectionSize = EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() + EncodeMiPredicate<GfxFamily>::getCmdSize();
static constexpr size_t semaphoreSectionStart = initSectionSize;
static constexpr uint64_t endSectionStart = semaphoreSectionStart + semaphoreSectionSize;
static constexpr uint64_t endSectionSize = sizeof(MI_LOAD_REGISTER_IMM) + EncodeMiPredicate<GfxFamily>::getCmdSize();
static size_t getSemaphoreSectionSize() {
return EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() + EncodeMiPredicate<GfxFamily>::getCmdSize();
}
static constexpr uint64_t totalSize = endSectionStart + endSectionSize;
static size_t getEndSectionStart() {
return semaphoreSectionStart + getSemaphoreSectionSize();
}
static constexpr size_t endSectionSize = sizeof(MI_LOAD_REGISTER_IMM) + EncodeMiPredicate<GfxFamily>::getCmdSize();
static size_t getTotalSize() {
return getEndSectionStart() + endSectionSize;
}
};
} // namespace RelaxedOrderingHelper

View File

@@ -312,7 +312,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
void *semaphoreCmd = listCmdBufferStream->getSpace(EncodeSemaphore<Family>::getSizeMiSemaphoreWait());
args.additionalCommands->push_back(semaphoreCmd);
}
auto buffer = listCmdBufferStream->getSpaceForCmd<DefaultWalkerType>();
@@ -329,7 +330,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
void *semaphoreCmd = listCmdBufferStream->getSpace(EncodeSemaphore<Family>::getSizeMiSemaphoreWait());
args.additionalCommands->push_back(semaphoreCmd);
}
}
@@ -582,6 +584,11 @@ void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
*cmd = localCmd;
}
template <typename Family>
void EncodeSemaphore<Family>::setMiSemaphoreWaitValue(void *cmd, uint64_t semaphoreValue) {
reinterpret_cast<Family::MI_SEMAPHORE_WAIT *>(cmd)->setSemaphoreDataDword(static_cast<uint32_t>(semaphoreValue));
}
template <typename GfxFamily>
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
}
@@ -706,12 +713,6 @@ template <typename Family>
void EncodeSurfaceState<Family>::convertSurfaceStateToPacked(R_SURFACE_STATE *surfaceState, ImageInfo &imgInfo) {
}
template <typename Family>
void EncodeSemaphore<Family>::appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool indirect, bool useQwordData, bool switchOnUnsuccessful) {
constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
}
template <typename Family>
template <bool isHeapless>
void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr) {
@@ -842,6 +843,28 @@ template <typename Family>
void EncodeSurfaceState<Family>::setAdditionalCacheSettings(R_SURFACE_STATE *surfaceState) {
}
template <typename Family>
size_t EncodeSemaphore<Family>::getSizeMiSemaphoreWait() {
return sizeof(MI_SEMAPHORE_WAIT);
}
template <typename Family>
void EncodeSemaphore<Family>::addMiSemaphoreWaitCommand(LinearStream &commandStream,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful,
void **outSemWaitCmd) {
auto semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
if (outSemWaitCmd != nullptr) {
*outSemWaitCmd = semaphoreCommand;
}
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, true, useQwordData, indirect, switchOnUnsuccessful);
}
} // namespace NEO
#include "shared/source/command_container/command_encoder_enablers.inl"

View File

@@ -8084,6 +8084,187 @@ typedef struct tagMI_SEMAPHORE_WAIT {
} MI_SEMAPHORE_WAIT;
STATIC_ASSERT(20 == sizeof(MI_SEMAPHORE_WAIT));
typedef struct tagMI_SEMAPHORE_WAIT_64 {
union tagTheStructure {
struct tagCommon {
// DWORD 0
uint32_t DwordLength : BITFIELD_RANGE(0, 7);
uint32_t SwTokenInfo : BITFIELD_RANGE(8, 10);
uint32_t QueueSwitchMode : BITFIELD_RANGE(11, 11);
uint32_t CompareOperation : BITFIELD_RANGE(12, 14);
uint32_t CommandControlledInhibitContextSwitch : BITFIELD_RANGE(15, 15);
uint32_t RegisterPollMode : BITFIELD_RANGE(16, 16);
uint32_t IndirectSemaphoreDataDword : BITFIELD_RANGE(17, 17);
uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(18, 18);
uint32_t _64BCompareDisable : BITFIELD_RANGE(19, 19);
uint32_t SemaphoreInterrupt : BITFIELD_RANGE(20, 20);
uint32_t FastModePoll : BITFIELD_RANGE(21, 21);
uint32_t MemoryType : BITFIELD_RANGE(22, 22);
uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28);
uint32_t CommandType : BITFIELD_RANGE(29, 31);
// DWORD 1
uint64_t SemaphoreDataDword;
// DWORD 3
uint64_t Reserved_96 : BITFIELD_RANGE(0, 1);
uint64_t SemaphoreAddress : BITFIELD_RANGE(2, 63);
// DWORD 5
uint64_t SemaphoreToken;
} Common;
uint32_t RawData[7];
} TheStructure;
typedef enum tagDWORD_LENGTH {
DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x5,
} DWORD_LENGTH;
typedef enum tagQUEUE_SWITCH_MODE {
QUEUE_SWITCH_MODE_SWITCH_AFTER_COMMAND_IS_PARSED = 0x0,
QUEUE_SWITCH_MODE_SWITCH_QUEUE_ON_UNSUCCESSFUL = 0x1,
} QUEUE_SWITCH_MODE;
typedef enum tagCOMPARE_OPERATION {
COMPARE_OPERATION_SAD_GREATER_THAN_SDD = 0x0,
COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD = 0x1,
COMPARE_OPERATION_SAD_LESS_THAN_SDD = 0x2,
COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD = 0x3,
COMPARE_OPERATION_SAD_EQUAL_SDD = 0x4,
COMPARE_OPERATION_SAD_NOT_EQUAL_SDD = 0x5,
} COMPARE_OPERATION;
typedef enum tagWAIT_MODE { // patched
WAIT_MODE_SIGNAL_MODE = 0x0,
WAIT_MODE_POLLING_MODE = 0x1,
} WAIT_MODE;
typedef enum tagREGISTER_POLL_MODE {
REGISTER_POLL_MODE_MEMORY_POLL = 0x0,
REGISTER_POLL_MODE_REGISTER_POLL = 0x1,
} REGISTER_POLL_MODE;
typedef enum tag_64B_COMPARE_DISABLE {
_64B_COMPARE_DISABLE_64B_COMPARE = 0x0,
_64B_COMPARE_DISABLE_32B_COMPARE = 0x1,
} _64B_COMPARE_DISABLE;
typedef enum tagMEMORY_TYPE {
MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS = 0x0,
MEMORY_TYPE_GLOBAL_GRAPHICS_ADDRESS = 0x1,
} MEMORY_TYPE;
typedef enum tagMI_COMMAND_OPCODE {
MI_COMMAND_OPCODE_MI_SEMAPHORE_WAIT = 0x3b, // patched (name only): for compatibility between MI_SEMAPHORE_WAIT and MI_SEMAPHORE_WAIT_64
} MI_COMMAND_OPCODE;
typedef enum tagCOMMAND_TYPE {
COMMAND_TYPE_MI_COMMAND = 0x0,
} COMMAND_TYPE;
inline void init() {
memset(&TheStructure, 0, sizeof(TheStructure));
TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1;
TheStructure.Common.QueueSwitchMode = QUEUE_SWITCH_MODE_SWITCH_AFTER_COMMAND_IS_PARSED;
TheStructure.Common.CompareOperation = COMPARE_OPERATION_SAD_GREATER_THAN_SDD;
TheStructure.Common.RegisterPollMode = REGISTER_POLL_MODE_MEMORY_POLL; // patched
TheStructure.Common._64BCompareDisable = _64B_COMPARE_DISABLE_64B_COMPARE;
TheStructure.Common.MemoryType = MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS;
TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_SEMAPHORE_WAIT; // patched
TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND;
}
static tagMI_SEMAPHORE_WAIT_64 sInit() {
MI_SEMAPHORE_WAIT_64 state;
state.init();
return state;
}
inline uint32_t &getRawData(const uint32_t index) {
UNRECOVERABLE_IF(index >= 7);
return TheStructure.RawData[index];
}
inline void setSwTokenInfo(const uint32_t value) {
UNRECOVERABLE_IF(value > 0x7);
TheStructure.Common.SwTokenInfo = value;
}
inline uint32_t getSwTokenInfo() const {
return TheStructure.Common.SwTokenInfo;
}
inline void setQueueSwitchMode(const QUEUE_SWITCH_MODE value) {
TheStructure.Common.QueueSwitchMode = value;
}
inline QUEUE_SWITCH_MODE getQueueSwitchMode() const {
return static_cast<QUEUE_SWITCH_MODE>(TheStructure.Common.QueueSwitchMode);
}
inline void setCompareOperation(const COMPARE_OPERATION value) {
TheStructure.Common.CompareOperation = value;
}
inline COMPARE_OPERATION getCompareOperation() const {
return static_cast<COMPARE_OPERATION>(TheStructure.Common.CompareOperation);
}
inline void setCommandControlledInhibitContextSwitch(const bool value) {
TheStructure.Common.CommandControlledInhibitContextSwitch = value;
}
inline bool getCommandControlledInhibitContextSwitch() const {
return TheStructure.Common.CommandControlledInhibitContextSwitch;
}
inline void setRegisterPollMode(const REGISTER_POLL_MODE value) {
TheStructure.Common.RegisterPollMode = value;
}
inline REGISTER_POLL_MODE getRegisterPollMode() const {
return static_cast<REGISTER_POLL_MODE>(TheStructure.Common.RegisterPollMode);
}
inline void setIndirectSemaphoreDataDword(const bool value) {
TheStructure.Common.IndirectSemaphoreDataDword = value;
}
inline bool getIndirectSemaphoreDataDword() const {
return TheStructure.Common.IndirectSemaphoreDataDword;
}
inline void setWorkloadPartitionIdOffsetEnable(const bool value) {
TheStructure.Common.WorkloadPartitionIdOffsetEnable = value;
}
inline bool getWorkloadPartitionIdOffsetEnable() const {
return TheStructure.Common.WorkloadPartitionIdOffsetEnable;
}
inline void set64BCompareDisable(const _64B_COMPARE_DISABLE value) {
TheStructure.Common._64BCompareDisable = value;
}
inline _64B_COMPARE_DISABLE get64BCompareDisable() const {
return static_cast<_64B_COMPARE_DISABLE>(TheStructure.Common._64BCompareDisable);
}
inline void setSemaphoreInterrupt(const bool value) {
TheStructure.Common.SemaphoreInterrupt = value;
}
inline bool getSemaphoreInterrupt() const {
return TheStructure.Common.SemaphoreInterrupt;
}
inline void setFastModePoll(const bool value) {
TheStructure.Common.FastModePoll = value;
}
inline bool getFastModePoll() const {
return TheStructure.Common.FastModePoll;
}
inline void setMemoryType(const MEMORY_TYPE value) {
TheStructure.Common.MemoryType = value;
}
inline MEMORY_TYPE getMemoryType() const {
return static_cast<MEMORY_TYPE>(TheStructure.Common.MemoryType);
}
inline void setSemaphoreDataDword(const uint64_t value) {
TheStructure.Common.SemaphoreDataDword = value;
}
inline uint64_t getSemaphoreDataDword() const {
return TheStructure.Common.SemaphoreDataDword;
}
typedef enum tagSEMAPHOREADDRESS {
SEMAPHOREADDRESS_BIT_SHIFT = 0x2,
SEMAPHOREADDRESS_ALIGN_SIZE = 0x4,
} SEMAPHOREADDRESS;
inline void setSemaphoreGraphicsAddress(const uint64_t value) {
TheStructure.Common.SemaphoreAddress = value >> SEMAPHOREADDRESS_BIT_SHIFT;
}
inline uint64_t getSemaphoreGraphicsAddress() const { // patched
return TheStructure.Common.SemaphoreAddress << SEMAPHOREADDRESS_BIT_SHIFT;
}
inline void setSemaphoreToken(const uint64_t value) {
TheStructure.Common.SemaphoreToken = value;
}
inline uint64_t getSemaphoreToken() const {
return TheStructure.Common.SemaphoreToken;
}
inline WAIT_MODE getWaitMode() const {
return WAIT_MODE_POLLING_MODE; // patched - 64-bit semaphore always pool
}
} MI_SEMAPHORE_WAIT_64;
STATIC_ASSERT(28 == sizeof(MI_SEMAPHORE_WAIT_64));
typedef struct tagMI_STORE_DATA_IMM {
union tagTheStructure {
struct tagCommon {

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/mt_helpers.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
@@ -248,8 +249,7 @@ struct PatchCmd {
}
}
auto semaphoreCmd = reinterpret_cast<typename GfxFamily::MI_SEMAPHORE_WAIT *>(cmd1);
semaphoreCmd->setSemaphoreDataDword(static_cast<uint32_t>(baseCounterValue + appendCounterValue));
EncodeSemaphore<GfxFamily>::setMiSemaphoreWaitValue(cmd1, static_cast<uint32_t>(baseCounterValue + appendCounterValue));
}
void patchComputeWalker(uint64_t appendCounterValue);

View File

@@ -137,6 +137,7 @@ struct Xe3pCoreFamily : public Xe3pCore {
static const STATE_COMPUTE_MODE cmdInitStateComputeMode;
static const _3DSTATE_BINDING_TABLE_POOL_ALLOC cmdInitStateBindingTablePoolAlloc;
static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
static const MI_SEMAPHORE_WAIT_64 cmdInitMiSemaphoreWait64;
static const RENDER_SURFACE_STATE cmdInitRenderSurfaceState;
static const POSTSYNC_DATA cmdInitPostSyncData;
static const MI_SET_PREDICATE cmdInitSetPredicate;

View File

@@ -1086,3 +1086,11 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenAggregatedEventUsageCounte
inOrderExecInfo->addAggregatedEventUsageCounter(7);
EXPECT_EQ(7u, inOrderExecInfo->getAggregatedEventUsageCounter());
}
HWTEST_F(CommandEncoderTests, givenMiSemaphoreWaitCommandWhenSettingSemaphoreValueThenValueIsSet) {
auto semaphoreCmd = FamilyType::cmdInitMiSemaphoreWait;
const uint32_t testValue = 0x12345678ul;
EncodeSemaphore<FamilyType>::setMiSemaphoreWaitValue(reinterpret_cast<void *>(&semaphoreCmd), testValue);
EXPECT_EQ(testValue, semaphoreCmd.getSemaphoreDataDword());
}

View File

@@ -1111,7 +1111,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
// 6. Scheduler loop check section
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionSize))) {
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::getSemaphoreSectionSize()))) {
return false;
}

View File

@@ -1247,7 +1247,7 @@ HWTEST2_F(DrmDirectSubmissionTest, givenRelaxedOrderingSchedulerRequiredWhenAski
expectedBaseSemaphoreSectionSize += MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(device->getRootDeviceEnvironment());
}
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::totalSize, directSubmission.getSizeSemaphoreSection(true));
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::getTotalSize(), directSubmission.getSizeSemaphoreSection(true));
EXPECT_EQ(expectedBaseSemaphoreSectionSize + EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait(), directSubmission.getSizeSemaphoreSection(false));
size_t expectedBaseEndSize = Dispatcher::getSizeStopCommandBuffer() +

View File

@@ -1045,7 +1045,7 @@ HWTEST2_F(WddmDirectSubmissionTest, givenRelaxedOrderingSchedulerRequiredWhenAsk
expectedBaseSemaphoreSectionSize += MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(device->getRootDeviceEnvironment());
}
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::totalSize, directSubmission.getSizeSemaphoreSection(true));
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::getTotalSize(), directSubmission.getSizeSemaphoreSection(true));
EXPECT_EQ(expectedBaseSemaphoreSectionSize + EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait(), directSubmission.getSizeSemaphoreSection(false));
size_t expectedBaseEndSize = Dispatcher::getSizeStopCommandBuffer() +