refactor: Passing information about the engine

Extension of the interface with information
about the engine type passed to the function

Related-To: NEO-10678

Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
This commit is contained in:
Andrzej Koska
2024-04-19 14:20:27 +00:00
committed by Compute-Runtime-Automation
parent 31b2dcfe57
commit ae139aeffd
73 changed files with 296 additions and 251 deletions

View File

@@ -154,7 +154,7 @@ struct CommandList : _ze_command_list_handle_t {
const size_t *pOffsets, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0;
virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) = 0;
virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0;
virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0;
virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -14,7 +14,7 @@
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -162,7 +162,7 @@ struct CommandListCoreFamily : public CommandListImp {
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override;
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) override;
ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override;
ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override;
ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override;

View File

@@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
commandContainer.addToResidencyContainer(alloc);
for (uint32_t i = 0; i < numKernels; i++) {
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly());
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
pLaunchArgumentsBuffer[i],
@@ -2518,7 +2518,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter());
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly());
} else {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
@@ -2784,8 +2784,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
} else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
@@ -3593,8 +3593,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
if (isQwordInOrderCounter()) {
indirectMode = true;
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true);
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true);
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly());
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly());
} else {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
@@ -3850,7 +3850,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
for (uint32_t i = 0u; i < packetsToWait; i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
NEO::CompareOperation::equal, true, false);
NEO::CompareOperation::equal, true, false, isCopyOnly());
} else {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddr,
@@ -4045,7 +4045,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
// Patch Primary Tile section skip (to Secondary Tile section)
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
NEO::CompareOperation::notEqual, false, false);
NEO::CompareOperation::notEqual, false, false, isCopyOnly());
// Secondary Tile section
{
@@ -4059,7 +4059,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
// Patch Primary Tile section jump to end
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
NEO::CompareOperation::equal, false, false);
NEO::CompareOperation::equal, false, false, isCopyOnly());
// End section
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);

View File

@@ -473,13 +473,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
partitionDataSize);
partitionDataSize,
isCopyOnly());
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset());
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset(),
isCopyOnly());
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -568,7 +570,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly());
}
}

View File

@@ -23,6 +23,7 @@
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/heap_base_address_model.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
@@ -1005,7 +1006,8 @@ void CommandQueueHw<gfxCoreFamily>::programCommandQueueDebugCmdsForSourceLevelOr
if (isDebugEnabled && !this->commandQueueDebugCmdsProgrammed) {
if (this->device->getL0Debugger()) {
this->device->getL0Debugger()->programSbaAddressLoad(cmdStream,
device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress());
device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress(),
NEO::EngineHelpers::isBcs(this->csr->getOsContext().getEngineType()));
this->commandQueueDebugCmdsProgrammed = true;
}
}

View File

@@ -517,7 +517,8 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(appendMILoadRegImm, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t reg,
uint32_t value));
uint32_t value,
bool isBcs));
ADDMETHOD_NOBASE(appendMILoadRegReg, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t reg1,

View File

@@ -1857,7 +1857,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
lrrCmd++;
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, cmdList->inOrderExecInfo->getBaseDeviceAddress(), 2,
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter));
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter, false));
}
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {

View File

@@ -1969,7 +1969,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, eventEndGpuVa, static_cast<uint64_t>(Event::STATE_CLEARED),
NEO::CompareOperation::equal, true, false));
NEO::CompareOperation::equal, true, false, false));
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
ASSERT_NE(nullptr, sdiCmd);
@@ -6393,7 +6393,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
}
// Primary Tile section skip - patching
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false)) {
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false, false)) {
return false;
}
@@ -6409,7 +6409,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
}
// Jump to end from Primary Tile section - patching
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false)) {
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false, false)) {
return false;
}