refactor: Passing information about the engine
Extension of the interface with information about the engine type passed to the function Related-To: NEO-10678 Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
This commit is contained in:
parent
31b2dcfe57
commit
ae139aeffd
|
@ -154,7 +154,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
const size_t *pOffsets, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
|
||||
virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0;
|
||||
virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) = 0;
|
||||
virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0;
|
||||
virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0;
|
||||
virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -14,7 +14,7 @@
|
|||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value) {
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
|
|
@ -162,7 +162,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
|||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
|
||||
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override;
|
||||
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) override;
|
||||
ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override;
|
||||
ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override;
|
||||
ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override;
|
||||
|
|
|
@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
|||
commandContainer.addToResidencyContainer(alloc);
|
||||
|
||||
for (uint32_t i = 0; i < numKernels; i++) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly());
|
||||
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
|
||||
pLaunchArgumentsBuffer[i],
|
||||
|
@ -2518,7 +2518,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
|||
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter());
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly());
|
||||
|
||||
} else {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
@ -2784,8 +2784,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
|
|||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
|
||||
|
@ -3593,8 +3593,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
|||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true);
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true);
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly());
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly());
|
||||
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
|
@ -3850,7 +3850,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
|
|||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
|
||||
NEO::CompareOperation::equal, true, false);
|
||||
NEO::CompareOperation::equal, true, false, isCopyOnly());
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddr,
|
||||
|
@ -4045,7 +4045,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
|
|||
|
||||
// Patch Primary Tile section skip (to Secondary Tile section)
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
|
||||
NEO::CompareOperation::notEqual, false, false);
|
||||
NEO::CompareOperation::notEqual, false, false, isCopyOnly());
|
||||
|
||||
// Secondary Tile section
|
||||
{
|
||||
|
@ -4059,7 +4059,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
|
|||
|
||||
// Patch Primary Tile section jump to end
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
|
||||
NEO::CompareOperation::equal, false, false);
|
||||
NEO::CompareOperation::equal, false, false, isCopyOnly());
|
||||
|
||||
// End section
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
|
|
|
@ -473,13 +473,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
partitionDataSize);
|
||||
partitionDataSize,
|
||||
isCopyOnly());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset());
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset(),
|
||||
isCopyOnly());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -568,7 +570,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
|
|||
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
|
||||
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/heap_base_address_model.h"
|
||||
#include "shared/source/helpers/pause_on_gpu_properties.h"
|
||||
|
@ -1005,7 +1006,8 @@ void CommandQueueHw<gfxCoreFamily>::programCommandQueueDebugCmdsForSourceLevelOr
|
|||
if (isDebugEnabled && !this->commandQueueDebugCmdsProgrammed) {
|
||||
if (this->device->getL0Debugger()) {
|
||||
this->device->getL0Debugger()->programSbaAddressLoad(cmdStream,
|
||||
device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress());
|
||||
device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress(),
|
||||
NEO::EngineHelpers::isBcs(this->csr->getOsContext().getEngineType()));
|
||||
this->commandQueueDebugCmdsProgrammed = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -517,7 +517,8 @@ struct MockCommandList : public CommandList {
|
|||
|
||||
ADDMETHOD_NOBASE(appendMILoadRegImm, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t reg,
|
||||
uint32_t value));
|
||||
uint32_t value,
|
||||
bool isBcs));
|
||||
|
||||
ADDMETHOD_NOBASE(appendMILoadRegReg, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t reg1,
|
||||
|
|
|
@ -1857,7 +1857,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
|
|||
lrrCmd++;
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, cmdList->inOrderExecInfo->getBaseDeviceAddress(), 2,
|
||||
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter));
|
||||
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter, false));
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
|
||||
|
|
|
@ -1969,7 +1969,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
|
|||
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, eventEndGpuVa, static_cast<uint64_t>(Event::STATE_CLEARED),
|
||||
NEO::CompareOperation::equal, true, false));
|
||||
NEO::CompareOperation::equal, true, false, false));
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
@ -6393,7 +6393,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
|
|||
}
|
||||
|
||||
// Primary Tile section skip - patching
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -6409,7 +6409,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
|
|||
}
|
||||
|
||||
// Jump to end from Primary Tile section - patching
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -386,6 +386,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
bool getHeaplessStateInitEnabled() const { return this->heaplessStateInitEnabled; }
|
||||
|
||||
bool isBcsSplitInitialized() const { return this->bcsSplitInitialized; }
|
||||
bool isBcs() const { return isCopyOnly; };
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
|
|
|
@ -269,7 +269,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
if (programBarrierInTaskStream) {
|
||||
CsrDependencies csrDeps{};
|
||||
fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps, false);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps, false, isCopyOnly);
|
||||
|
||||
setupBarrierTimestampForBcsEngines(getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), timestampPacketDependencies);
|
||||
getGpgpuCommandStreamReceiver().programStallingCommandsForBarrier(commandStream, ×tampPacketDependencies.barrierNodes, isDcFlushRequiredOnStallingCommandsOnNextFlush());
|
||||
|
@ -314,7 +314,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(commandStream);
|
||||
}
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps, relaxedOrderingEnabled);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps, relaxedOrderingEnabled, isCopyOnly);
|
||||
}
|
||||
|
||||
if (isNonStallingIoqBarrierWithDependencies) {
|
||||
|
@ -660,7 +660,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
|||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps) {
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDeps, false);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDeps, false, isCopyOnly);
|
||||
|
||||
uint64_t postSyncAddress = 0;
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -45,6 +45,7 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::csGprR1,
|
||||
mask,
|
||||
false,
|
||||
false);
|
||||
|
||||
// Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands
|
||||
|
|
|
@ -117,7 +117,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||
RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandStream);
|
||||
}
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDependencies, walkerArgs.relaxedOrderingEnabled);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDependencies, walkerArgs.relaxedOrderingEnabled, commandQueue.isBcs());
|
||||
|
||||
dsh->align(EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
|
@ -165,7 +165,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
|
||||
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
|
||||
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
|
||||
LriHelper<GfxFamily>::program(commandStream, registerOffset, registerData, EncodeSetMMIO<GfxFamily>::isRemapApplicable(registerOffset));
|
||||
LriHelper<GfxFamily>::program(commandStream, registerOffset, registerData, EncodeSetMMIO<GfxFamily>::isRemapApplicable(registerOffset), commandQueue.isBcs());
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(debugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -40,7 +40,8 @@ void HardwareInterface<Family>::dispatchWorkarounds(
|
|||
NEO::LriHelper<Family>::program(commandStream,
|
||||
0x7010,
|
||||
value,
|
||||
false);
|
||||
false,
|
||||
commandQueue.isBcs());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -586,7 +586,7 @@ void ConditionalBbStartTests<T>::whenDispatchingEqualModeThenResultsAreValidImpl
|
|||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::equal, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::equal, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
@ -599,7 +599,7 @@ void ConditionalBbStartTests<T>::whenDispatchingEqualModeThenResultsAreValidImpl
|
|||
// Greater
|
||||
{
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::equal, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::equal, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -609,7 +609,7 @@ void ConditionalBbStartTests<T>::whenDispatchingEqualModeThenResultsAreValidImpl
|
|||
|
||||
// Less
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::equal, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::equal, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -641,7 +641,7 @@ void ConditionalBbStartTests<T>::whenDispatchingNotEqualModeThenResultsAreValidI
|
|||
// Equal
|
||||
{
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -654,7 +654,7 @@ void ConditionalBbStartTests<T>::whenDispatchingNotEqualModeThenResultsAreValidI
|
|||
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
@ -668,7 +668,7 @@ void ConditionalBbStartTests<T>::whenDispatchingNotEqualModeThenResultsAreValidI
|
|||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::notEqual, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
@ -703,7 +703,7 @@ void ConditionalBbStartTests<T>::whenDispatchingGreaterOrEqualModeThenResultsAre
|
|||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
@ -718,7 +718,7 @@ void ConditionalBbStartTests<T>::whenDispatchingGreaterOrEqualModeThenResultsAre
|
|||
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
@ -730,7 +730,7 @@ void ConditionalBbStartTests<T>::whenDispatchingGreaterOrEqualModeThenResultsAre
|
|||
|
||||
// Less
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::greaterOrEqual, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -761,7 +761,7 @@ void ConditionalBbStartTests<T>::whenDispatchingLessModeThenResultsAreValidImpl(
|
|||
|
||||
// Equal
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::less, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::less, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -771,7 +771,7 @@ void ConditionalBbStartTests<T>::whenDispatchingLessModeThenResultsAreValidImpl(
|
|||
|
||||
// Greater
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::less, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::less, false, isQwordData, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
|
@ -783,7 +783,7 @@ void ConditionalBbStartTests<T>::whenDispatchingLessModeThenResultsAreValidImpl(
|
|||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::less, false, isQwordData);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::less, false, isQwordData, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -140,6 +140,11 @@ TEST(CommandQueue, WhenGettingErrorCodeFromTaskCountThenProperValueIsReturned) {
|
|||
EXPECT_EQ(CL_OUT_OF_RESOURCES, CommandQueue::getErrorCodeFromTaskCount(CompletionStamp::failed));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, GivenCommandQueueWhenIsBcsIsCalledThenIsCopyOnlyIsReturned) {
|
||||
MockCommandQueue cmdQ(nullptr, nullptr, 0, false);
|
||||
EXPECT_EQ(cmdQ.isBcs(), cmdQ.isCopyOnly);
|
||||
}
|
||||
|
||||
TEST(CommandQueue, WhenConstructingCommandQueueThenTaskLevelAndTaskCountAreZero) {
|
||||
MockCommandQueue cmdQ(nullptr, nullptr, 0, false);
|
||||
EXPECT_EQ(0u, cmdQ.taskLevel);
|
||||
|
|
|
@ -1171,7 +1171,7 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenEnqueueKernelWhenProgrammingDe
|
|||
auto eventNode = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false, false));
|
||||
|
||||
mockCmdQueueHw.enqueueBarrierWithWaitList(1, &outEvent, nullptr);
|
||||
|
||||
|
@ -1269,10 +1269,10 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenBarrierWithDependenciesWhenFlu
|
|||
auto eventNode = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false, false));
|
||||
|
||||
auto conditionalBbStart2 = reinterpret_cast<void *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(conditionalBbStart2, 0, compareAddress, 1, CompareOperation::equal, true, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(conditionalBbStart2, 0, compareAddress, 1, CompareOperation::equal, true, false, false));
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(conditionalBbStart2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
EXPECT_NE(nullptr, sdiCmd);
|
||||
|
|
|
@ -1068,8 +1068,7 @@ HWTEST2_F(RelaxedOrderingBcsTests, givenDependenciesWhenFlushingThenProgramCorre
|
|||
|
||||
auto eventNode = timestamp.peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::equal, true, false, true));
|
||||
}
|
||||
|
||||
HWTEST2_F(RelaxedOrderingBcsTests, givenDependenciesWhenFlushingThenProgramProgramRelaxedOrderingOnlyIfAllowed, IsAtLeastXeHpcCore) {
|
||||
|
|
|
@ -321,7 +321,7 @@ GEN12LPTEST_F(LriHelperTestsGen12Lp, whenProgrammingLriCommandThenExpectMmioRema
|
|||
expectedLri.setDataDword(data);
|
||||
expectedLri.setMmioRemapEnable(false);
|
||||
|
||||
LriHelper<FamilyType>::program(&stream, address, data, false);
|
||||
LriHelper<FamilyType>::program(&stream, address, data, false, false);
|
||||
MI_LOAD_REGISTER_IMM *lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(buffer.get());
|
||||
ASSERT_NE(nullptr, lri);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -50,7 +50,7 @@ GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenChangedL3ConfigWhenL3IsProgramm
|
|||
|
||||
uint32_t l3Config = 0x12345678;
|
||||
|
||||
csr.programL3(stream, l3Config);
|
||||
csr.programL3(stream, l3Config, false);
|
||||
|
||||
this->parseCommands<FamilyType>(stream);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -691,7 +691,7 @@ XE_HPC_CORETEST_F(LriHelperTestsXeHpcCore, whenProgrammingLriCommandThenExpectMm
|
|||
expectedLri.setDataDword(data);
|
||||
expectedLri.setMmioRemapEnable(true);
|
||||
|
||||
LriHelper<FamilyType>::program(&stream, address, data, true);
|
||||
LriHelper<FamilyType>::program(&stream, address, data, true, false);
|
||||
MI_LOAD_REGISTER_IMM *lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(buffer.get());
|
||||
ASSERT_NE(nullptr, lri);
|
||||
|
||||
|
|
|
@ -266,16 +266,17 @@ struct EncodeMathMMIO {
|
|||
|
||||
static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
|
||||
|
||||
static void encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress);
|
||||
static void encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress, bool isBcs);
|
||||
|
||||
static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal);
|
||||
static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal, bool isBcs);
|
||||
|
||||
static void encodeBitwiseAndVal(CommandContainer &container,
|
||||
uint32_t regOffset,
|
||||
uint32_t immVal,
|
||||
uint64_t dstAddress,
|
||||
bool workloadPartition,
|
||||
void **outCmdBuffer);
|
||||
void **outCmdBuffer,
|
||||
bool isBcs);
|
||||
|
||||
static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result);
|
||||
|
||||
|
@ -291,8 +292,8 @@ struct EncodeMathMMIO {
|
|||
AluRegisters secondOperandRegister,
|
||||
AluRegisters finalResultRegister);
|
||||
|
||||
static void encodeIncrement(LinearStream &cmdStream, AluRegisters operandRegister);
|
||||
static void encodeDecrement(LinearStream &cmdStream, AluRegisters operandRegister);
|
||||
static void encodeIncrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs);
|
||||
static void encodeDecrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs);
|
||||
static constexpr size_t getCmdSizeForIncrementOrDecrement() {
|
||||
return (EncodeAluHelper<GfxFamily, 4>::getCmdsSize() + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
}
|
||||
|
@ -303,7 +304,7 @@ struct EncodeMathMMIO {
|
|||
decrement = 1,
|
||||
};
|
||||
|
||||
static void encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType);
|
||||
static void encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType, bool isBcs);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -333,11 +334,11 @@ struct EncodeSetMMIO {
|
|||
static const size_t sizeMEM = sizeof(MI_LOAD_REGISTER_MEM);
|
||||
static const size_t sizeREG = sizeof(MI_LOAD_REGISTER_REG);
|
||||
|
||||
static void encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap);
|
||||
static void encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap, bool isBcs);
|
||||
static void encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address);
|
||||
static void encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset);
|
||||
|
||||
static void encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap);
|
||||
static void encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap, bool isBcs);
|
||||
static void encodeMEM(LinearStream &cmdStream, uint32_t offset, uint64_t address);
|
||||
static void encodeREG(LinearStream &cmdStream, uint32_t dstOffset, uint32_t srcOffset);
|
||||
|
||||
|
@ -492,10 +493,10 @@ struct EncodeBatchBufferStartOrEnd {
|
|||
static void programBatchBufferEnd(CommandContainer &container);
|
||||
static void programBatchBufferEnd(LinearStream &commandStream);
|
||||
|
||||
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
|
||||
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
|
||||
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs);
|
||||
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs);
|
||||
static void programConditionalRegRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect);
|
||||
static void programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, CompareOperation compareOperation, bool indirect);
|
||||
static void programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, CompareOperation compareOperation, bool indirect, bool isBcs);
|
||||
|
||||
static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart(bool useQwordData) {
|
||||
size_t size = (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
|
|
|
@ -100,7 +100,7 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
|||
} // namespace NEO
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) {
|
||||
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress, bool isBcs) {
|
||||
int logLws = 0;
|
||||
int i = val;
|
||||
while (val >> logLws) {
|
||||
|
@ -108,7 +108,7 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
|
|||
}
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, offset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, 0, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, 0, true, isBcs);
|
||||
|
||||
i = 0;
|
||||
while (i < logLws) {
|
||||
|
@ -134,9 +134,9 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
|
|||
* set, then (*firstOperand) is greater than secondOperand.
|
||||
*/
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand) {
|
||||
void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(container, RegisterOffsets::csGprR0, firstOperand);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, secondOperand, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, secondOperand, true, isBcs);
|
||||
|
||||
/* RegisterOffsets::csGprR* registers map to AluRegisters::gpr* registers */
|
||||
EncodeMath<Family>::greaterThan(container, AluRegisters::gpr0,
|
||||
|
@ -151,9 +151,9 @@ void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &contai
|
|||
*/
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress,
|
||||
bool workloadPartition, void **outCmdBuffer) {
|
||||
bool workloadPartition, void **outCmdBuffer, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true, isBcs);
|
||||
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::gpr13,
|
||||
AluRegisters::gpr14,
|
||||
AluRegisters::gpr12);
|
||||
|
@ -250,9 +250,9 @@ void EncodeMathMMIO<Family>::encodeAluAnd(MI_MATH_ALU_INST_INLINE *pAluParam,
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType) {
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7, 1, true);
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
void EncodeMathMMIO<Family>::encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType, bool isBcs) {
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7, 1, true, isBcs);
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
|
||||
|
||||
EncodeAluHelper<Family, 4> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, operandRegister);
|
||||
|
@ -265,13 +265,13 @@ void EncodeMathMMIO<Family>::encodeIncrementOrDecrement(LinearStream &cmdStream,
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeIncrement(LinearStream &cmdStream, AluRegisters operandRegister) {
|
||||
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::increment);
|
||||
void EncodeMathMMIO<Family>::encodeIncrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs) {
|
||||
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::increment, isBcs);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeDecrement(LinearStream &cmdStream, AluRegisters operandRegister) {
|
||||
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::decrement);
|
||||
void EncodeMathMMIO<Family>::encodeDecrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs) {
|
||||
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::decrement, isBcs);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -332,8 +332,8 @@ void EncodeMath<Family>::bitwiseAnd(CommandContainer &container,
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap) {
|
||||
EncodeSetMMIO<Family>::encodeIMM(*container.getCommandStream(), offset, data, remap);
|
||||
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeIMM(*container.getCommandStream(), offset, data, remap, isBcs);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
@ -347,11 +347,12 @@ inline void EncodeSetMMIO<Family>::encodeREG(CommandContainer &container, uint32
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeSetMMIO<Family>::encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap) {
|
||||
inline void EncodeSetMMIO<Family>::encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap, bool isBcs) {
|
||||
LriHelper<Family>::program(&cmdStream,
|
||||
offset,
|
||||
data,
|
||||
remap);
|
||||
remap,
|
||||
isBcs);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
@ -630,7 +631,7 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
|
|||
*/
|
||||
|
||||
if (groupSize[2] > 1) {
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, resultRegister, 3 << (8 * (dstPtr & 0b11)), true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, resultRegister, 3 << (8 * (dstPtr & 0b11)), true, false);
|
||||
} else {
|
||||
|
||||
constexpr uint32_t groupCount2Register = RegisterOffsets::csGprR1;
|
||||
|
@ -664,20 +665,20 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
|
|||
|
||||
if (offset) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(container, backupRegister, dstPtr);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, memoryMaskRegister, memoryMask, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, memoryMaskRegister, memoryMask, true, false);
|
||||
EncodeMath<Family>::bitwiseAnd(container, memoryMaskAluRegister, backupAluRegister, backupAluRegister);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, offsetRegister, offset, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, offsetRegister, offset, true, false);
|
||||
}
|
||||
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantOneRegister, 1, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantTwoRegister, 2, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantOneRegister, 1, true, false);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantTwoRegister, 2, true, false);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount2Register, RegisterOffsets::gpgpuDispatchDim[2]);
|
||||
|
||||
EncodeMath<Family>::greaterThan(container, groupCount2AluRegister, constantOneAluRegister, workDimEq3AluRegister);
|
||||
EncodeMath<Family>::bitwiseAnd(container, workDimEq3AluRegister, constantOneAluRegister, workDimEq3AluRegister);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, groupSize1Register, groupSize[1], true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, groupSize1Register, groupSize[1], true, false);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount1Register, RegisterOffsets::gpgpuDispatchDim[1]);
|
||||
|
||||
EncodeMath<Family>::addition(container, groupSize1AluRegister, groupCount1AluRegister, sumAluRegister);
|
||||
|
@ -897,7 +898,7 @@ void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &c
|
|||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, RegisterOffsets::gpgpuDispatchDim[i], lws[i], ptrOffset(crossThreadAddress, offsets[i]));
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, RegisterOffsets::gpgpuDispatchDim[i], lws[i], ptrOffset(crossThreadAddress, offsets[i]), false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -994,39 +995,39 @@ void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
|
|||
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress,
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress);
|
||||
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7 + 4, compareAddress + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
|
||||
}
|
||||
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true, isBcs);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true, isBcs);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg,
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7, compareReg);
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7 + 4, compareReg + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
|
||||
}
|
||||
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true, isBcs);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true, isBcs);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect);
|
||||
}
|
||||
|
@ -1040,12 +1041,12 @@ void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegRegBatchBufferSta
|
|||
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg,
|
||||
CompareOperation compareOperation, bool indirect) {
|
||||
CompareOperation compareOperation, bool indirect, bool isBcs) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR8, compareReg);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, 0, true, isBcs);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect);
|
||||
}
|
||||
|
|
|
@ -507,7 +507,7 @@ template <typename Family>
|
|||
void EncodeL3State<Family>::encode(CommandContainer &container, bool enableSLM) {
|
||||
auto offset = L3CNTLRegisterOffset<Family>::registerOffset;
|
||||
auto data = PreambleHelper<Family>::getL3Config(container.getDevice()->getHardwareInfo(), enableSLM);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, offset, data, false);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, offset, data, false, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -79,11 +79,12 @@ struct ImplicitScalingDispatch {
|
|||
static size_t getRegisterConfigurationSize();
|
||||
static void dispatchRegisterConfiguration(LinearStream &commandStream,
|
||||
uint64_t workPartitionSurfaceAddress,
|
||||
uint32_t addressOffset);
|
||||
uint32_t addressOffset,
|
||||
bool isBcs);
|
||||
|
||||
static size_t getOffsetRegisterSize();
|
||||
static void dispatchOffsetRegister(LinearStream &commandStream,
|
||||
uint32_t addressOffset);
|
||||
uint32_t addressOffset, bool isBcs);
|
||||
|
||||
static uint32_t getImmediateWritePostSyncOffset();
|
||||
static uint32_t getTimeStampPostSyncOffset();
|
||||
|
|
|
@ -44,7 +44,7 @@ inline size_t ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize()
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(LinearStream &commandStream, uint64_t workPartitionSurfaceAddress, uint32_t addressOffset) {
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(LinearStream &commandStream, uint64_t workPartitionSurfaceAddress, uint32_t addressOffset, bool isBcs) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -53,7 +53,7 @@ inline size_t ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStream &commandStream, uint32_t addressOffset) {
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStream &commandStream, uint32_t addressOffset, bool isBcs) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -218,11 +218,12 @@ inline size_t ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize()
|
|||
template <typename GfxFamily>
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(LinearStream &commandStream,
|
||||
uint64_t workPartitionSurfaceAddress,
|
||||
uint32_t addressOffset) {
|
||||
uint32_t addressOffset,
|
||||
bool isBcs) {
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
workPartitionSurfaceAddress);
|
||||
dispatchOffsetRegister(commandStream, addressOffset);
|
||||
dispatchOffsetRegister(commandStream, addressOffset, isBcs);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -232,11 +233,12 @@ inline size_t ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize() {
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStream &commandStream,
|
||||
uint32_t addressOffset) {
|
||||
uint32_t addressOffset, bool isBcs) {
|
||||
EncodeSetMMIO<GfxFamily>::encodeIMM(commandStream,
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
addressOffset,
|
||||
true);
|
||||
true,
|
||||
isBcs);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -202,7 +202,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
|
||||
protected:
|
||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programL3(LinearStream &csr, uint32_t &newL3Config);
|
||||
void programL3(LinearStream &csr, uint32_t &newL3Config, bool isBcs);
|
||||
void programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config);
|
||||
void programPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs);
|
||||
void programEpilogue(LinearStream &csr, Device &device, void **batchBufferEndLocation, DispatchFlags &dispatchFlags);
|
||||
|
|
|
@ -481,7 +481,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
auto &commandStreamCSR = this->getCS(estimatedSize);
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, false);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, false, EngineHelpers::isBcs(this->osContext->getEngineType()));
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
|
||||
programActivePartitionConfigFlushTask(commandStreamCSR);
|
||||
|
@ -494,7 +494,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
programHardwareContext(commandStreamCSR);
|
||||
programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs);
|
||||
programComputeMode(commandStreamCSR, dispatchFlags, hwInfo);
|
||||
programL3(commandStreamCSR, newL3Config);
|
||||
programL3(commandStreamCSR, newL3Config, EngineHelpers::isBcs(this->osContext->getEngineType()));
|
||||
programPreamble(commandStreamCSR, device, newL3Config);
|
||||
programMediaSampler(commandStreamCSR, dispatchFlags);
|
||||
addPipeControlBefore3dState(commandStreamCSR, dispatchFlags);
|
||||
|
@ -882,7 +882,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cm
|
|||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config) {
|
||||
if (!this->isPreambleSent) {
|
||||
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->preemptionAllocation);
|
||||
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->preemptionAllocation, EngineHelpers::isBcs(osContext->getEngineType()));
|
||||
this->isPreambleSent = true;
|
||||
this->lastSentL3Config = newL3Config;
|
||||
}
|
||||
|
@ -1045,7 +1045,7 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
|
|||
MiFlushArgs args{waArgs};
|
||||
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies, isRelaxedOrderingDispatch);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies, isRelaxedOrderingDispatch, EngineHelpers::isBcs(this->osContext->getEngineType()));
|
||||
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
|
||||
BlitCommandsHelper<GfxFamily>::encodeWa(commandStream, blitProperties, latestSentBcsWaValue);
|
||||
|
@ -1740,7 +1740,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
|
|||
bool sbaTrackingEnabled = debuggingEnabled;
|
||||
if (sbaTrackingEnabled) {
|
||||
device.getL0Debugger()->programSbaAddressLoad(csrCommandStream,
|
||||
device.getL0Debugger()->getSbaTrackingBuffer(this->getOsContext().getContextId())->getGpuAddress());
|
||||
device.getL0Debugger()->getSbaTrackingBuffer(this->getOsContext().getContextId())->getGpuAddress(),
|
||||
EngineHelpers::isBcs(this->osContext->getEngineType()));
|
||||
}
|
||||
|
||||
NEO::EncodeStateBaseAddress<GfxFamily>::setSbaTrackingForL0DebuggerIfEnabled(sbaTrackingEnabled,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -15,7 +15,7 @@ template <typename GfxFamily>
|
|||
bool CommandStreamReceiverHw<GfxFamily>::are4GbHeapsAvailable() const { return true; }
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, uint32_t &newL3Config) {
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, uint32_t &newL3Config, bool isBcs) {
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
if (csrSizeRequestFlags.l3ConfigChanged && this->isPreambleSent) {
|
||||
// Add a PIPE_CONTROL w/ CS_stall
|
||||
|
@ -24,7 +24,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, uin
|
|||
setClearSlmWorkAroundParameter(args);
|
||||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(csr, args);
|
||||
|
||||
PreambleHelper<GfxFamily>::programL3(&csr, newL3Config);
|
||||
PreambleHelper<GfxFamily>::programL3(&csr, newL3Config, isBcs);
|
||||
this->lastSentL3Config = newL3Config;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -20,7 +20,7 @@ template <typename GfxFamily>
|
|||
bool CommandStreamReceiverHw<GfxFamily>::are4GbHeapsAvailable() const { return is64bit; }
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, uint32_t &newL3Config) {}
|
||||
void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, uint32_t &newL3Config, bool isBcs) {}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredStateBaseAddressSize(const Device &device) const {
|
||||
|
@ -127,7 +127,8 @@ template <typename GfxFamily>
|
|||
inline void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig(LinearStream &csr) {
|
||||
if (this->staticWorkPartitioningEnabled) {
|
||||
uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress();
|
||||
ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(csr, workPartitionAddress, this->immWritePostSyncWriteOffset);
|
||||
ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(csr, workPartitionAddress,
|
||||
this->immWritePostSyncWriteOffset, EngineHelpers::isBcs(osContext->getEngineType()));
|
||||
}
|
||||
this->activePartitionsConfig = this->activePartitions;
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode
|
|||
regVal = PreemptionConfig<GfxFamily>::cmdLevelVal | PreemptionConfig<GfxFamily>::mask;
|
||||
}
|
||||
|
||||
LriHelper<GfxFamily>::program(&cmdStream, PreemptionConfig<GfxFamily>::mmioAddress, regVal, true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, PreemptionConfig<GfxFamily>::mmioAddress, regVal, true, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -101,7 +101,7 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
|
|||
void initSbaTrackingMode();
|
||||
|
||||
virtual size_t getSbaAddressLoadCommandsSize() = 0;
|
||||
virtual void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) = 0;
|
||||
virtual void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa, bool isBcs) = 0;
|
||||
|
||||
MOCKABLE_VIRTUAL bool attachZebinModuleToSegmentAllocations(const StackVec<NEO::GraphicsAllocation *, 32> &kernelAlloc, uint32_t &moduleHandle, uint32_t elfHandle);
|
||||
MOCKABLE_VIRTUAL bool removeZebinModule(uint32_t moduleHandle);
|
||||
|
@ -145,7 +145,7 @@ class DebuggerL0Hw : public DebuggerL0 {
|
|||
void captureStateBaseAddress(NEO::LinearStream &cmdStream, SbaAddresses sba, bool useFirstLevelBB) override;
|
||||
size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override;
|
||||
size_t getSbaAddressLoadCommandsSize() override;
|
||||
void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) override;
|
||||
void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa, bool isBcs) override;
|
||||
|
||||
void programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba, bool useFirstLevelBB);
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ size_t DebuggerL0Hw<GfxFamily>::getSbaAddressLoadCommandsSize() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DebuggerL0Hw<GfxFamily>::programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) {
|
||||
void DebuggerL0Hw<GfxFamily>::programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa, bool isBcs) {
|
||||
if (!singleAddressSpaceSbaTracking) {
|
||||
return;
|
||||
}
|
||||
|
@ -131,12 +131,14 @@ void DebuggerL0Hw<GfxFamily>::programSbaAddressLoad(NEO::LinearStream &cmdStream
|
|||
NEO::LriHelper<GfxFamily>::program(&cmdStream,
|
||||
DebuggerRegisterOffsets::csGprR15,
|
||||
low,
|
||||
true);
|
||||
true,
|
||||
isBcs);
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(&cmdStream,
|
||||
DebuggerRegisterOffsets::csGprR15 + 4,
|
||||
high,
|
||||
true);
|
||||
true,
|
||||
isBcs);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -75,7 +75,7 @@ void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommandsSingleAddressSpace(NEO::
|
|||
for (const auto &pair : fieldOffsetAndValue) {
|
||||
|
||||
// Store SBA field offset to R0
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(cmdStream, RegisterOffsets::csGprR0, static_cast<uint32_t>(pair.first), true);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(cmdStream, RegisterOffsets::csGprR0, static_cast<uint32_t>(pair.first), true, false);
|
||||
// Add GPR0 to GPR15, store result in GPR1
|
||||
NEO::EncodeMath<GfxFamily>::addition(cmdStream, AluRegisters::gpr0, static_cast<AluRegisters>(DebuggerAluRegisters::gpr15), AluRegisters::gpr1);
|
||||
|
||||
|
|
|
@ -124,6 +124,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
|
||||
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
|
||||
// 1. Init section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
|
||||
|
@ -131,18 +133,18 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false, isBcsEngine);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true, isBcsEngine);
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true, isBcsEngine);
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true, isBcsEngine);
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
|
@ -151,11 +153,11 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6, 8, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6 + 4, 0, true, isBcsEngine);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcsEngine);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 10> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
|
@ -181,19 +183,19 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr2);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr1, isBcsEngine);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr2, isBcsEngine);
|
||||
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false, isBcsEngine);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7, 8, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcsEngine);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcsEngine);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 14> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
|
@ -221,15 +223,15 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(schedulerCmdStream, AluRegisters::gpr2);
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(schedulerCmdStream, AluRegisters::gpr2, isBcsEngine);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
AluRegisters::gpr1, AluRegisters::gpr2, CompareOperation::notEqual, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true, isBcsEngine);
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
|
@ -249,20 +251,20 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
RegisterOffsets::csGprR1, currentRelaxedOrderingQueueSize, CompareOperation::greaterOrEqual, false, false);
|
||||
RegisterOffsets::csGprR1, currentRelaxedOrderingQueueSize, CompareOperation::greaterOrEqual, false, false, isBcsEngine);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
RegisterOffsets::csGprR5, 1, CompareOperation::equal, false, false);
|
||||
RegisterOffsets::csGprR5, 1, CompareOperation::equal, false, false, isBcsEngine);
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
{
|
||||
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionStart);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10 + 4, 0, true, isBcsEngine);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 4> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
|
@ -272,7 +274,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr0, AluRegisters::accu);
|
||||
aluHelper.copyToCmdStream(schedulerCmdStream);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, RegisterOffsets::csGprR11, CompareOperation::greaterOrEqual, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, RegisterOffsets::csGprR11, CompareOperation::greaterOrEqual, true, isBcsEngine);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
false, false, false);
|
||||
|
@ -291,9 +293,10 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
|||
|
||||
uint64_t semaphoreSectionVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart;
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR11, value, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32), true);
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR11, value, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32), true, isBcsEngine);
|
||||
|
||||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||
|
||||
|
@ -746,12 +749,13 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
|
|||
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false)),
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 1, true);
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 1, true, isBcsEngine);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
// patch conditional bb_start with current GPU address
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
||||
RegisterOffsets::csGprR1, 0, CompareOperation::equal, false, false);
|
||||
RegisterOffsets::csGprR1, 0, CompareOperation::equal, false, false, isBcsEngine);
|
||||
|
||||
relaxedOrderingSchedulerRequired = false;
|
||||
}
|
||||
|
@ -764,23 +768,27 @@ size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrdering
|
|||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
||||
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32), true, isBcsEngine);
|
||||
|
||||
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
|
||||
|
||||
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true, isBcsEngine);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::initRelaxedOrderingRegisters() {
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5 + 4, 0, true);
|
||||
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1 + 4, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5 + 4, 0, true, isBcsEngine);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
@ -793,16 +801,18 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
|||
EncodeMiPredicate<GfxFamily>::encode(stream, MiPredicateType::disable);
|
||||
|
||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
bool isBcsEngine = EngineHelpers::isBcs(this->osContext.getEngineType());
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcsEngine);
|
||||
|
||||
// Task start VA
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7 + 4, 0, true, isBcsEngine);
|
||||
|
||||
// Shift by 8 = multiply by 256. Address must by 64b aligned (shift by 6), but SHL accepts only 1, 2, 4, 8, 16 and 32
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8, 8, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8, 8, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8 + 4, 0, true, isBcsEngine);
|
||||
|
||||
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
|
||||
|
@ -820,7 +830,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
|||
|
||||
aluHelper.copyToCmdStream(stream);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(stream, AluRegisters::gpr1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(stream, AluRegisters::gpr1, isBcsEngine);
|
||||
|
||||
UNRECOVERABLE_IF(stream.getUsed() != RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
|
||||
|
||||
|
@ -831,9 +841,9 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
|||
uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
|
||||
// 1. Init section
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR11, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR11, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9, 0, true, isBcsEngine);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9 + 4, 0, true, isBcsEngine);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false);
|
||||
|
||||
// 2. Semaphore section
|
||||
|
@ -849,7 +859,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
|||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR5, 0, true, isBcsEngine);
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -15,7 +15,8 @@ template <typename GfxFamily, typename Dispatcher>
|
|||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchPartitionRegisterConfiguration() {
|
||||
ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(ringCommandStream,
|
||||
this->workPartitionAllocation->getGpuAddress(),
|
||||
this->immWritePostSyncOffset);
|
||||
this->immWritePostSyncOffset,
|
||||
EngineHelpers::isBcs(this->osContext.getEngineType()));
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
|
|
@ -66,6 +66,7 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
|||
LriHelper<Family>::program(&csr,
|
||||
RowChickenReg4::address,
|
||||
RowChickenReg4::regDataForArbitrationPolicy[properties.threadArbitrationPolicy.value],
|
||||
false,
|
||||
false);
|
||||
}
|
||||
if (properties.isCoherencyRequired.isDirty) {
|
||||
|
@ -73,6 +74,7 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
|||
LriHelper<Family>::program(&csr,
|
||||
gen11HdcModeRegister::address,
|
||||
DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, nonCoherentEnable),
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
|
|||
LriHelper<Family>::program(&stream,
|
||||
PWR_CLK_STATE_REGISTER::REG_ADDRESS,
|
||||
reg.TheStructure.RawData[0],
|
||||
false,
|
||||
false);
|
||||
|
||||
args = {};
|
||||
|
@ -96,6 +97,7 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
|
|||
LriHelper<Family>::program(&stream,
|
||||
PWR_CLK_STATE_REGISTER::REG_ADDRESS,
|
||||
reg.TheStructure.RawData[0],
|
||||
false,
|
||||
false);
|
||||
|
||||
MemorySynchronizationCommands<Family>::addSingleBarrier(stream, args);
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace NEO {
|
|||
static auto gfxCore = IGFX_GEN12LP_CORE;
|
||||
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::programL3(LinearStream &csr, uint32_t &newL3Config) {
|
||||
void CommandStreamReceiverHw<Family>::programL3(LinearStream &csr, uint32_t &newL3Config, bool isBcs) {
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -90,7 +90,7 @@ void PreambleHelper<Family>::addPipeControlBeforeVfeCmd(LinearStream *pCommandSt
|
|||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<Family>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
void PreambleHelper<Family>::programL3(LinearStream *pCommandStream, uint32_t l3Config, bool isBcs) {
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
|
@ -41,7 +41,7 @@ void PreemptionHelper::programCmdStream<GfxFamily>(LinearStream &cmdStream, Pree
|
|||
regVal = PreemptionConfig<GfxFamily>::cmdLevelVal;
|
||||
}
|
||||
|
||||
LriHelper<GfxFamily>::program(&cmdStream, PreemptionConfig<GfxFamily>::mmioAddress, regVal, false);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, PreemptionConfig<GfxFamily>::mmioAddress, regVal, false, false);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -94,6 +94,7 @@ void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pComm
|
|||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueBeforeWalker,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
@ -109,6 +110,7 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pComman
|
|||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueAfterWalker,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
|||
LriHelper<Gen9Family>::program(&csr,
|
||||
DebugControlReg2::address,
|
||||
DebugControlReg2::getRegData(properties.threadArbitrationPolicy.value),
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pComm
|
|||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueBeforeWalker,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
@ -58,6 +59,7 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pComman
|
|||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueAfterWalker,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -288,7 +288,7 @@ void BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(LinearStream &co
|
|||
if (debugManager.flags.GlobalSequencerFlushOnCopyEngine.get() != 0) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
constexpr uint32_t globalInvalidationRegister = 0xB404u;
|
||||
LriHelper<GfxFamily>::program(&commandStream, globalInvalidationRegister, 1u, false);
|
||||
LriHelper<GfxFamily>::program(&commandStream, globalInvalidationRegister, 1u, false, true);
|
||||
EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(commandStream,
|
||||
globalInvalidationRegister,
|
||||
0u,
|
||||
|
|
|
@ -453,7 +453,7 @@ template <typename GfxFamily>
|
|||
struct LriHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
static void *program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap);
|
||||
static void *program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap, bool isBcs);
|
||||
static void *program(MI_LOAD_REGISTER_IMM *lriCmd, uint32_t address, uint32_t value, bool remap);
|
||||
};
|
||||
|
||||
|
|
|
@ -782,7 +782,7 @@ bool GfxCoreHelperHw<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32_t ac
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *LriHelper<GfxFamily>::program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap) {
|
||||
void *LriHelper<GfxFamily>::program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap, bool isBcs) {
|
||||
auto lri = cmdStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
return LriHelper<GfxFamily>::program(lri, address, value, remap);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ struct PreambleHelper {
|
|||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using FrontEndStateCommand = typename GfxFamily::FrontEndStateCommand;
|
||||
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config, bool isBcs);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream,
|
||||
const PipelineSelectArgs &pipelineSelectArgs,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
@ -47,8 +47,8 @@ struct PreambleHelper {
|
|||
const StreamProperties &streamProperties);
|
||||
static uint64_t getScratchSpaceAddressOffsetForVfeState(LinearStream *pCommandStream, void *pVfeState);
|
||||
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
|
||||
GraphicsAllocation *preemptionCsr);
|
||||
static void programSemaphoreDelay(LinearStream *pCommandStream);
|
||||
GraphicsAllocation *preemptionCsr, bool isBcs);
|
||||
static void programSemaphoreDelay(LinearStream *pCommandStream, bool isBcs);
|
||||
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
|
||||
static bool isSystolicModeConfigurable(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t getAdditionalCommandsSize(const Device &device);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -30,13 +30,14 @@ void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStre
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programSemaphoreDelay(LinearStream *pCommandStream) {
|
||||
void PreambleHelper<GfxFamily>::programSemaphoreDelay(LinearStream *pCommandStream, bool isBcs) {
|
||||
if (debugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
|
||||
uint32_t valueOfNewSemaphoreDelay = debugManager.flags.ForceSemaphoreDelayBetweenWaits.get();
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
RegisterOffsets::semaWaitPoll,
|
||||
valueOfNewSemaphoreDelay,
|
||||
true);
|
||||
true,
|
||||
isBcs);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -55,11 +56,11 @@ size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device
|
|||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
|
||||
GraphicsAllocation *preemptionCsr) {
|
||||
programL3(pCommandStream, l3Config);
|
||||
GraphicsAllocation *preemptionCsr, bool isBcs) {
|
||||
programL3(pCommandStream, l3Config, isBcs);
|
||||
programPreemption(pCommandStream, device, preemptionCsr);
|
||||
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
|
||||
programSemaphoreDelay(pCommandStream);
|
||||
programSemaphoreDelay(pCommandStream, isBcs);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -13,11 +13,12 @@
|
|||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config, bool isBcs) {
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
L3CNTLRegisterOffset<GfxFamily>::registerOffset,
|
||||
l3Config,
|
||||
false);
|
||||
false,
|
||||
isBcs);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -22,7 +22,7 @@ void PreambleHelper<GfxFamily>::addPipeControlBeforeVfeCmd(LinearStream *pComman
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config, bool isBcs) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -109,23 +109,23 @@ struct TimestampPacketHelper {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programConditionalBbStartForRelaxedOrdering(LinearStream &cmdStream, TagNodeBase ×tampPacketNode) {
|
||||
static void programConditionalBbStartForRelaxedOrdering(LinearStream &cmdStream, TagNodeBase ×tampPacketNode, bool isBcs) {
|
||||
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
|
||||
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) {
|
||||
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(cmdStream, 0, compareAddress + compareOffset, TimestampPacketConstants::initValue,
|
||||
NEO::CompareOperation::equal, true, false);
|
||||
NEO::CompareOperation::equal, true, false, isBcs);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies, bool relaxedOrderingEnabled) {
|
||||
static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies, bool relaxedOrderingEnabled, bool isBcs) {
|
||||
for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
if (relaxedOrderingEnabled) {
|
||||
TimestampPacketHelper::programConditionalBbStartForRelaxedOrdering<GfxFamily>(cmdStream, *node);
|
||||
TimestampPacketHelper::programConditionalBbStartForRelaxedOrdering<GfxFamily>(cmdStream, *node, isBcs);
|
||||
} else {
|
||||
TimestampPacketHelper::programSemaphore<GfxFamily>(cmdStream, *node);
|
||||
}
|
||||
|
|
|
@ -39,12 +39,14 @@ int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t
|
|||
LriHelper<GfxFamily>::program(&csr->getCS(0),
|
||||
static_cast<uint32_t>(regOffset & 0xFFFFFFFF),
|
||||
static_cast<uint32_t>(l3GfxAddress & 0xFFFFFFFF),
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&csr->getCS(0),
|
||||
static_cast<uint32_t>(regOffset >> 32),
|
||||
static_cast<uint32_t>(l3GfxAddress >> 32),
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2023-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -29,7 +29,7 @@ template <typename FamilyType>
|
|||
bool verifyConditionalDataRegBbStart(void *cmd, uint64_t startAddress, uint32_t compareReg, uint32_t compareData, CompareOperation compareOperation, bool indirect);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool qwordData);
|
||||
bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool qwordData, bool isBcs);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalRegRegBbStart(void *cmd, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect);
|
||||
|
@ -224,7 +224,7 @@ bool verifyConditionalRegRegBbStart(void *cmd, uint64_t startAddress, AluRegiste
|
|||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool qwordData) {
|
||||
bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool qwordData, bool isBcs) {
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -25,7 +25,7 @@ class MockDebuggerL0 : public NEO::DebuggerL0 {
|
|||
}
|
||||
|
||||
size_t getSbaAddressLoadCommandsSize() override { return 0; };
|
||||
void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) override{};
|
||||
void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa, bool isBcs) override{};
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -620,12 +620,12 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur
|
|||
|
||||
EXPECT_EQ(0u, ImplicitScalingDispatch<FamilyType>::getRegisterConfigurationSize());
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchRegisterConfiguration(linearStream, 0, 0);
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchRegisterConfiguration(linearStream, 0, 0, false);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
|
||||
EXPECT_EQ(0u, ImplicitScalingDispatch<FamilyType>::getOffsetRegisterSize());
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchOffsetRegister(linearStream, 0);
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchOffsetRegister(linearStream, 0, 0);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
|
||||
EXPECT_EQ(static_cast<uint32_t>(sizeof(uint64_t)), ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset());
|
||||
|
|
|
@ -152,7 +152,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
|
|||
constexpr uint32_t immVal = 0xbaau;
|
||||
constexpr uint64_t dstAddress = 0xDEADCAF0u;
|
||||
void *storeRegMem = nullptr;
|
||||
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true, &storeRegMem);
|
||||
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true, &storeRegMem, false);
|
||||
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands,
|
||||
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
|
||||
|
|
|
@ -183,7 +183,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
|
|||
constexpr uint32_t immVal = 0xbaau;
|
||||
constexpr uint64_t dstAddress = 0xDEADCAF0u;
|
||||
void *storeRegMem = nullptr;
|
||||
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false, &storeRegMem);
|
||||
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false, &storeRegMem, false);
|
||||
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands,
|
||||
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
|
||||
|
@ -296,7 +296,7 @@ HWTEST_F(CommandEncodeAluTests, whenProgrammingIncrementOperationThenUseCorrectA
|
|||
uint8_t buffer[bufferSize] = {};
|
||||
LinearStream cmdStream(buffer, bufferSize);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeIncrement(cmdStream, incRegister);
|
||||
EncodeMathMMIO<FamilyType>::encodeIncrement(cmdStream, incRegister, false);
|
||||
|
||||
EXPECT_EQ(bufferSize, cmdStream.getUsed());
|
||||
|
||||
|
@ -343,7 +343,7 @@ HWTEST_F(CommandEncodeAluTests, whenProgrammingDecrementOperationThenUseCorrectA
|
|||
uint8_t buffer[bufferSize] = {};
|
||||
LinearStream cmdStream(buffer, bufferSize);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeDecrement(cmdStream, decRegister);
|
||||
EncodeMathMMIO<FamilyType>::encodeDecrement(cmdStream, decRegister, false);
|
||||
|
||||
EXPECT_EQ(bufferSize, cmdStream.getUsed());
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -27,7 +27,7 @@ HWTEST2_F(XeHPAndLaterCommandEncoderMathTest, WhenAppendsAGreaterThanThenPredica
|
|||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -107,7 +107,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
|
|||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, false, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
|
@ -152,7 +152,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDa
|
|||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, true);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, true, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
|
@ -197,7 +197,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
|||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, false, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
|
@ -242,7 +242,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDa
|
|||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, true);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, true, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -31,7 +31,7 @@ class CommandSetMMIOFixture : public DeviceFixture {
|
|||
using CommandSetMMIOTest = Test<CommandSetMMIOFixture>;
|
||||
|
||||
HWTEST_F(CommandSetMMIOTest, WhenProgrammingThenLoadRegisterImmIsUsed) {
|
||||
EncodeSetMMIO<FamilyType>::encodeIMM(*cmdContainer.get(), 0x2000, 0xbaa, false);
|
||||
EncodeSetMMIO<FamilyType>::encodeIMM(*cmdContainer.get(), 0x2000, 0xbaa, false, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -29,7 +29,7 @@ GEN11TEST_F(CommandEncoderMathTestGen11, WhenAppendsAGreaterThanThenPredicateCor
|
|||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -26,7 +26,7 @@ GEN11TEST_F(IclSlm, WhenL3ConfigIsDispatchedThenProperRegisterAddressAndValueAre
|
|||
typedef Gen11Family::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config);
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config, false);
|
||||
|
||||
parseCommands<Gen11Family>(cs);
|
||||
|
||||
|
@ -121,7 +121,7 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb
|
|||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
|
||||
MockDevice mockDevice;
|
||||
PreambleHelper<FamilyType>::programPreamble(&linearStream, mockDevice, l3Config, nullptr);
|
||||
PreambleHelper<FamilyType>::programPreamble(&linearStream, mockDevice, l3Config, nullptr, false);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -27,7 +27,7 @@ GEN12LPTEST_F(CommandEncoderMathTestGen12Lp, WhenAppendsAGreaterThanThenPredicat
|
|||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -24,7 +24,7 @@ HWTEST2_F(TglLpSlm, givenTglLpWhenPreambleIsBeingProgrammedThenThreadArbitration
|
|||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<Gen12LpFamily>::getL3Config(pDevice->getHardwareInfo(), true);
|
||||
MockDevice mockDevice;
|
||||
PreambleHelper<Gen12LpFamily>::programPreamble(&linearStream, mockDevice, l3Config, nullptr);
|
||||
PreambleHelper<Gen12LpFamily>::programPreamble(&linearStream, mockDevice, l3Config, nullptr, false);
|
||||
|
||||
parseCommands<Gen12LpFamily>(cs);
|
||||
|
||||
|
@ -36,7 +36,7 @@ HWTEST2_F(TglLpSlm, WhenPreambleIsCreatedThenSlmIsDisabled, IsTGLLP) {
|
|||
typedef Gen12LpFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(pDevice->getHardwareInfo(), true);
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config);
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config, false);
|
||||
|
||||
parseCommands<Gen12LpFamily>(cs);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -29,7 +29,7 @@ GEN8TEST_F(CommandEncoderMathTestGen8, WhenAppendsAGreaterThanThenPredicateCorre
|
|||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -24,7 +24,7 @@ BDWTEST_F(BdwSlm, WhenL3ConfigIsDispatchedThenProperRegisterAddressAndValueArePr
|
|||
typedef Gen8Family::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<Gen8Family>::getL3Config(*defaultHwInfo, true);
|
||||
PreambleHelper<Gen8Family>::programL3(&cs, l3Config);
|
||||
PreambleHelper<Gen8Family>::programL3(&cs, l3Config, false);
|
||||
|
||||
parseCommands<Gen8Family>(cs);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -60,7 +60,7 @@ GEN9TEST_F(ThreadArbitrationGen9, givenPreambleWhenItIsProgrammedThenThreadArbit
|
|||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
|
||||
MockDevice mockDevice;
|
||||
PreambleHelper<Gen9Family>::programPreamble(&linearStream, mockDevice, l3Config, nullptr);
|
||||
PreambleHelper<Gen9Family>::programPreamble(&linearStream, mockDevice, l3Config, nullptr, false);
|
||||
|
||||
parseCommands<Gen9Family>(cs);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -29,7 +29,7 @@ GEN9TEST_F(CommandEncoderMathTestGen9, WhenAppendsAGreaterThanThenPredicateCorre
|
|||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
|
||||
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
|
||||
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u, false);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -26,7 +26,7 @@ GEN9TEST_F(Gen9Slm, WhenL3ConfigIsDispatchedThenProperRegisterAddressAndValueAre
|
|||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
|
||||
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config);
|
||||
PreambleHelper<FamilyType>::programL3(&cs, l3Config, false);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
|
|
|
@ -228,7 +228,7 @@ HWTEST_F(LriHelperTests, givenAddressAndOffsetWhenHelperIsUsedThenProgramCmdStre
|
|||
expectedLri.setRegisterOffset(address);
|
||||
expectedLri.setDataDword(data);
|
||||
|
||||
LriHelper<FamilyType>::program(&stream, address, data, false);
|
||||
LriHelper<FamilyType>::program(&stream, address, data, false, false);
|
||||
auto lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(stream.getCpuBase());
|
||||
ASSERT_NE(nullptr, lri);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -79,7 +79,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidThreadPreemptionWhenPreambleIs
|
|||
uintptr_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte;
|
||||
MockGraphicsAllocation csrSurface(reinterpret_cast<void *>(minCsrAlignment), 1024);
|
||||
|
||||
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U, &csrSurface);
|
||||
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U, &csrSurface, false);
|
||||
|
||||
PreemptionHelper::programStateSip<FamilyType>(preemptionStream, *mockDevice, nullptr);
|
||||
|
||||
|
@ -220,7 +220,7 @@ HWTEST_F(PreambleTest, givenSetForceSemaphoreDelayBetweenWaitsWhenProgramSemapho
|
|||
auto buffer = std::unique_ptr<char[]>(new char[bufferSize]);
|
||||
|
||||
LinearStream stream(buffer.get(), bufferSize);
|
||||
PreambleHelper<FamilyType>::programSemaphoreDelay(&stream);
|
||||
PreambleHelper<FamilyType>::programSemaphoreDelay(&stream, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(stream);
|
||||
|
@ -244,7 +244,7 @@ HWTEST_F(PreambleTest, givenNotSetForceSemaphoreDelayBetweenWaitsWhenProgramSema
|
|||
auto buffer = std::unique_ptr<char[]>(new char[bufferSize]);
|
||||
|
||||
LinearStream stream(buffer.get(), bufferSize);
|
||||
PreambleHelper<FamilyType>::programSemaphoreDelay(&stream);
|
||||
PreambleHelper<FamilyType>::programSemaphoreDelay(&stream, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(stream);
|
||||
|
|
|
@ -89,7 +89,7 @@ XE_HPG_CORETEST_F(LriHelperTestsXeHpgCore, whenProgrammingLriCommandThenExpectMm
|
|||
expectedLri.setDataDword(data);
|
||||
expectedLri.setMmioRemapEnable(true);
|
||||
|
||||
LriHelper<FamilyType>::program(&stream, address, data, true);
|
||||
LriHelper<FamilyType>::program(&stream, address, data, true, false);
|
||||
MI_LOAD_REGISTER_IMM *lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(buffer.get());
|
||||
ASSERT_NE(nullptr, lri);
|
||||
|
||||
|
|
Loading…
Reference in New Issue