mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
feature: create copy offload queue under debug flag
Related-To: NEO-11376 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4c5d567b40
commit
cb9977b8f4
@@ -30,6 +30,9 @@ CommandList::~CommandList() {
|
||||
if (cmdQImmediate) {
|
||||
cmdQImmediate->destroy();
|
||||
}
|
||||
if (cmdQImmediateCopyOffload) {
|
||||
cmdQImmediateCopyOffload->destroy();
|
||||
}
|
||||
removeDeallocationContainerData();
|
||||
if (!isImmediateType() || !this->isFlushTaskSubmissionEnabled) {
|
||||
removeHostPtrAllocations();
|
||||
|
||||
@@ -412,6 +412,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
|
||||
ze_context_handle_t hContext = nullptr;
|
||||
CommandQueue *cmdQImmediate = nullptr;
|
||||
CommandQueue *cmdQImmediateCopyOffload = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
Device *device = nullptr;
|
||||
|
||||
|
||||
@@ -255,12 +255,36 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
pNext = reinterpret_cast<const ze_base_desc_t *>(pNext->pNext);
|
||||
}
|
||||
|
||||
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1) && !commandList->isCopyOnly() && commandList->isInOrderExecutionEnabled()) {
|
||||
commandList->enableCopyOperationOffload(productFamily, device, desc);
|
||||
}
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
void CommandListImp::enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc) {
|
||||
NEO::CommandStreamReceiver *copyCsr = nullptr;
|
||||
uint32_t ordinal = static_cast<DeviceImp *>(device)->getCopyEngineOrdinal();
|
||||
|
||||
device->getCsrForOrdinalAndIndexWithPriority(©Csr, ordinal, 0, desc->priority);
|
||||
UNRECOVERABLE_IF(!copyCsr);
|
||||
|
||||
ze_command_queue_desc_t copyQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
copyQueueDesc.ordinal = ordinal;
|
||||
copyQueueDesc.mode = desc->mode;
|
||||
copyQueueDesc.priority = desc->priority;
|
||||
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto offloadCommandQueue = CommandQueue::create(productFamily, device, copyCsr, ©QueueDesc, true, false, true, returnValue);
|
||||
UNRECOVERABLE_IF(!offloadCommandQueue);
|
||||
|
||||
this->cmdQImmediateCopyOffload = offloadCommandQueue;
|
||||
this->copyOperationOffloadEnabled = true;
|
||||
}
|
||||
|
||||
void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties) {
|
||||
if (this->stateComputeModeTracking) {
|
||||
streamProperties.stateComputeMode.setPropertiesCoherencyDevicePreemption(cmdListDefaultCoherency, this->commandListPreemptionMode, true);
|
||||
|
||||
@@ -46,6 +46,7 @@ struct CommandListImp : public CommandList {
|
||||
virtual void patchInOrderCmds() = 0;
|
||||
void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode);
|
||||
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
|
||||
void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc);
|
||||
|
||||
protected:
|
||||
std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo;
|
||||
@@ -61,6 +62,7 @@ struct CommandListImp : public CommandList {
|
||||
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
|
||||
static constexpr bool cmdListDefaultGlobalAtomics = false;
|
||||
std::vector<Event *> mappedTsEventList{};
|
||||
bool copyOperationOffloadEnabled = false;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -417,6 +417,20 @@ uint32_t DeviceImp::getCopyQueueGroupsFromSubDevice(uint32_t numberOfSubDeviceCo
|
||||
return subDeviceQueueGroupsIter;
|
||||
}
|
||||
|
||||
uint32_t DeviceImp::getCopyEngineOrdinal() const {
|
||||
auto &engineGroups = neoDevice->getRegularEngineGroups();
|
||||
uint32_t i = 0;
|
||||
for (; i < static_cast<uint32_t>(engineGroups.size()); i++) {
|
||||
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(this->subDeviceCopyEngineGroups.size() == 0);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
|
||||
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) {
|
||||
NEO::Device *activeDevice = getActiveDevice();
|
||||
|
||||
@@ -170,6 +170,7 @@ struct DeviceImp : public Device, NEO::NonCopyableOrMovableClass {
|
||||
uint32_t getEventMaxKernelCount() const override;
|
||||
uint32_t queryDeviceNodeMask();
|
||||
NEO::EngineGroupType getInternalEngineGroupType();
|
||||
uint32_t getCopyEngineOrdinal() const;
|
||||
|
||||
protected:
|
||||
void adjustCommandQueueDesc(uint32_t &ordinal, uint32_t &index);
|
||||
|
||||
@@ -50,6 +50,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::cmdListHeapAddressModel;
|
||||
using BaseClass::cmdListType;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::cmdQImmediateCopyOffload;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::commandListPerThreadScratchSize;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
@@ -58,6 +59,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::containsCooperativeKernelsFlag;
|
||||
using BaseClass::copyOperationOffloadEnabled;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::currentBindingTablePoolBaseAddress;
|
||||
using BaseClass::currentDynamicStateBaseAddress;
|
||||
@@ -182,10 +184,12 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::cmdListHeapAddressModel;
|
||||
using BaseClass::cmdListType;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::cmdQImmediateCopyOffload;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::compactL3FlushEvent;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::copyOperationOffloadEnabled;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::dcFlushSupport;
|
||||
using BaseClass::device;
|
||||
@@ -263,9 +267,11 @@ struct WhiteBox<::L0::CommandListImp> : public ::L0::CommandListImp {
|
||||
using BaseClass::cmdListHeapAddressModel;
|
||||
using BaseClass::cmdListType;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::cmdQImmediateCopyOffload;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::copyOperationOffloadEnabled;
|
||||
using BaseClass::copyThroughLockedPtrEnabled;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::currentBindingTablePoolBaseAddress;
|
||||
|
||||
@@ -28,6 +28,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::cmdListWithAssertExecuted;
|
||||
using BaseClass::commandStream;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::desc;
|
||||
using BaseClass::device;
|
||||
using BaseClass::preemptionCmdSyncProgramming;
|
||||
using BaseClass::printfKernelContainer;
|
||||
|
||||
@@ -6810,5 +6810,102 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenLimitedSyncDispatchWhenAppend
|
||||
EXPECT_TRUE(verifyTokenCleanup());
|
||||
}
|
||||
|
||||
struct CopyOffloadInOrderTests : public InOrderCmdListTests {
|
||||
void SetUp() override {
|
||||
backupHwInfo = std::make_unique<VariableBackup<NEO::HardwareInfo>>(defaultHwInfo.get());
|
||||
|
||||
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||
defaultHwInfo->featureTable.ftrBcsInfo = 0b111;
|
||||
|
||||
InOrderCmdListTests::SetUp();
|
||||
}
|
||||
|
||||
std::unique_ptr<VariableBackup<NEO::HardwareInfo>> backupHwInfo;
|
||||
};
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle;
|
||||
|
||||
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
cmdQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
|
||||
{
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdListHandle));
|
||||
auto cmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(CommandList::fromHandle(cmdListHandle));
|
||||
EXPECT_TRUE(cmdList->copyOperationOffloadEnabled);
|
||||
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
|
||||
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
|
||||
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
}
|
||||
|
||||
{
|
||||
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdListHandle));
|
||||
auto cmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(CommandList::fromHandle(cmdListHandle));
|
||||
EXPECT_TRUE(cmdList->copyOperationOffloadEnabled);
|
||||
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
|
||||
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
|
||||
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
|
||||
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
}
|
||||
|
||||
{
|
||||
cmdQueueDesc.flags = 0;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdListHandle));
|
||||
auto cmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(CommandList::fromHandle(cmdListHandle));
|
||||
EXPECT_FALSE(cmdList->copyOperationOffloadEnabled);
|
||||
EXPECT_EQ(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
|
||||
cmdQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
|
||||
}
|
||||
|
||||
{
|
||||
cmdQueueDesc.ordinal = static_cast<DeviceImp *>(device)->getCopyEngineOrdinal();
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdListHandle));
|
||||
auto cmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(CommandList::fromHandle(cmdListHandle));
|
||||
EXPECT_FALSE(cmdList->copyOperationOffloadEnabled);
|
||||
EXPECT_EQ(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
|
||||
cmdQueueDesc.ordinal = 0;
|
||||
}
|
||||
|
||||
{
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(-1);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdListHandle));
|
||||
auto cmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(CommandList::fromHandle(cmdListHandle));
|
||||
EXPECT_FALSE(cmdList->copyOperationOffloadEnabled);
|
||||
EXPECT_EQ(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -175,6 +175,54 @@ TEST(L0DeviceTest, givenMultipleMaskedSubDevicesWhenCreatingL0DeviceThenDontAddD
|
||||
EXPECT_EQ(0b100u, deviceImp->subDevices[1]->getNEODevice()->getDeviceBitfield().to_ulong());
|
||||
}
|
||||
|
||||
using DeviceCopyEngineTests = ::testing::Test;
|
||||
|
||||
HWTEST2_F(DeviceCopyEngineTests, givenRootOrSubDeviceWhenAskingForCopyOrdinalThenReturnCorrectId, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
debugManager.flags.EnableImplicitScaling.set(1);
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111;
|
||||
|
||||
auto executionEnvironment = std::make_unique<NEO::ExecutionEnvironment>();
|
||||
executionEnvironment->prepareRootDeviceEnvironments(1);
|
||||
|
||||
executionEnvironment->rootDeviceEnvironments[0]->setHwInfoAndInitHelpers(&hwInfo);
|
||||
executionEnvironment->rootDeviceEnvironments[0]->initGmm();
|
||||
executionEnvironment->parseAffinityMask();
|
||||
auto deviceFactory = std::make_unique<NEO::UltDeviceFactory>(1, 2, *executionEnvironment.release());
|
||||
auto rootDevice = deviceFactory->rootDevices[0];
|
||||
EXPECT_NE(nullptr, rootDevice);
|
||||
EXPECT_EQ(2u, rootDevice->getNumSubDevices());
|
||||
|
||||
auto driverHandle = std::make_unique<DriverHandleImp>();
|
||||
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto device = std::unique_ptr<L0::Device>(Device::create(driverHandle.get(), rootDevice, false, &returnValue));
|
||||
ASSERT_NE(nullptr, device);
|
||||
|
||||
auto deviceImp = static_cast<DeviceImp *>(device.get());
|
||||
ASSERT_EQ(2u, deviceImp->numSubDevices);
|
||||
|
||||
EXPECT_EQ(static_cast<uint32_t>(device->getNEODevice()->getRegularEngineGroups().size()), deviceImp->getCopyEngineOrdinal());
|
||||
|
||||
auto &subDeviceEngines = deviceImp->subDevices[0]->getNEODevice()->getRegularEngineGroups();
|
||||
|
||||
uint32_t subDeviceCopyEngineId = std::numeric_limits<uint32_t>::max();
|
||||
for (uint32_t i = 0; i < subDeviceEngines.size(); i++) {
|
||||
if (subDeviceEngines[i].engineGroupType == EngineGroupType::copy) {
|
||||
subDeviceCopyEngineId = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_NE(std::numeric_limits<uint32_t>::max(), subDeviceCopyEngineId);
|
||||
|
||||
EXPECT_EQ(subDeviceCopyEngineId, static_cast<DeviceImp *>(deviceImp->subDevices[0])->getCopyEngineOrdinal());
|
||||
EXPECT_EQ(subDeviceCopyEngineId, static_cast<DeviceImp *>(deviceImp->subDevices[1])->getCopyEngineOrdinal());
|
||||
}
|
||||
|
||||
TEST(L0DeviceTest, givenMidThreadPreemptionWhenCreatingDeviceThenSipKernelIsInitialized) {
|
||||
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -255,6 +255,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X s
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled. If set, all immediate compute in-order cmdlist will try to offload copy operations to copy engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")
|
||||
|
||||
@@ -602,4 +602,5 @@ DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
|
||||
ReadOnlyAllocationsTypeMask = 0
|
||||
EnableLogLevel = 6
|
||||
EnableReusingGpuTimestamps = 0
|
||||
ForceCopyOperationOffloadForComputeCmdList = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
Reference in New Issue
Block a user