mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
performance: Add debug flag to set BCS split minimal size
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
eb8e69d2d4
commit
0e758e4bb5
@@ -379,7 +379,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
Device *device = nullptr;
|
||||
|
||||
inline static constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
|
||||
size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
|
||||
size_t cmdListCurrentStartOffset = 0;
|
||||
|
||||
unsigned long numThreads = 1u;
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
namespace L0 {
|
||||
|
||||
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
|
||||
if (NEO::DebugManager.flags.SplitBcsSize.get() != -1) {
|
||||
this->minimalSizeForBcsSplit = NEO::DebugManager.flags.SplitBcsSize.get() * MemoryConstants::kiloByte;
|
||||
}
|
||||
}
|
||||
|
||||
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
||||
|
||||
@@ -226,6 +226,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
using BaseClass::minimalSizeForBcsSplit;
|
||||
using BaseClass::nonImmediateLogicalStateHelper;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
|
||||
@@ -1363,6 +1363,21 @@ TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppen
|
||||
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = oldCsr;
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreate, givenSplitBcsSizeWhenCreateCommandListThenProperSizeSet) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsSize.set(120);
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
EXPECT_EQ(whiteBoxCmdList->minimalSizeForBcsSplit, 120 * MemoryConstants::kiloByte);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppendingSignalEventsThenDeviceLostIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
|
||||
@@ -439,6 +439,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
BcsInfoMask splitEngines = EngineHelpers::oddLinkedCopyEnginesMask;
|
||||
BcsInfoMask h2dEngines = NEO::EngineHelpers::h2dCopyEngineMask;
|
||||
BcsInfoMask d2hEngines = NEO::EngineHelpers::d2hCopyEngineMask;
|
||||
size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte;
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
|
||||
|
||||
@@ -39,6 +39,9 @@ class CommandQueueHw : public CommandQueue {
|
||||
ClDevice *device,
|
||||
const cl_queue_properties *properties,
|
||||
bool internalUsage) : BaseClass(context, device, properties, internalUsage) {
|
||||
if (DebugManager.flags.SplitBcsSize.get() != -1) {
|
||||
this->minimalSizeForBcsSplit = DebugManager.flags.SplitBcsSize.get() * MemoryConstants::kiloByte;
|
||||
}
|
||||
|
||||
auto clPriority = getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR);
|
||||
|
||||
|
||||
@@ -1217,8 +1217,6 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
|
||||
constexpr size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte;
|
||||
|
||||
auto bcsSplit = getDevice().isBcsSplitSupported() &&
|
||||
csr.getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
|
||||
transferSize >= minimalSizeForBcsSplit &&
|
||||
|
||||
@@ -348,6 +348,23 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenCheckIsSplitEnqueueBlit
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsSizeSetWhenCheckIsSplitEnqueueBlitNeededThenReturnProperValue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.SplitBcsSize.set(100);
|
||||
auto *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
VariableBackup<UltHwConfig> backup{&ultHwConfig};
|
||||
ultHwConfig.useBlitSplit = true;
|
||||
{
|
||||
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToLocal, 150 * MemoryConstants::kiloByte, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
|
||||
}
|
||||
{
|
||||
MockCommandQueueHw<FamilyType> queue(this->pContext, this->pClDevice, nullptr);
|
||||
EXPECT_TRUE(queue.isSplitEnqueueBlitNeeded(TransferDirection::HostToLocal, 150 * MemoryConstants::kiloByte, *queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
|
||||
EXPECT_EQ(queue.minimalSizeForBcsSplit, 100 * MemoryConstants::kiloByte);
|
||||
}
|
||||
}
|
||||
|
||||
char hostPtr[16 * MemoryConstants::megaByte];
|
||||
struct BcsSplitBufferTraits {
|
||||
enum { flags = CL_MEM_READ_WRITE };
|
||||
|
||||
@@ -255,6 +255,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
using BaseClass::gpgpuEngine;
|
||||
using BaseClass::isBlitAuxTranslationRequired;
|
||||
using BaseClass::latestSentEnqueueType;
|
||||
using BaseClass::minimalSizeForBcsSplit;
|
||||
using BaseClass::obtainCommandStream;
|
||||
using BaseClass::obtainNewTimestampPacketNodes;
|
||||
using BaseClass::overrideEngine;
|
||||
|
||||
@@ -319,6 +319,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default,
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsSize, -1, "-1: default, >=0: Size to apply BCS split from")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMask, 0, "0: default, >0: bitmask: indicates bcs engines for split")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMaskH2D, 0, "0: default, >0: bitmask: indicates bcs engines for H2D split")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMaskD2H, 0, "0: default, >0: bitmask: indicates bcs engines for D2H split")
|
||||
|
||||
@@ -419,6 +419,7 @@ AssignBCSAtEnqueue = -1
|
||||
DeferCmdQGpgpuInitialization = -1
|
||||
DeferCmdQBcsInitialization = -1
|
||||
SplitBcsCopy = -1
|
||||
SplitBcsSize = -1
|
||||
SplitBcsMask = 0
|
||||
SplitBcsMaskH2D = 0
|
||||
SplitBcsMaskD2H = 0
|
||||
|
||||
Reference in New Issue
Block a user