mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Assign gpgpu engine at first enqueue
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
a6490062a9
commit
73d3d83e60
@ -73,14 +73,10 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
gpgpuEngine = &device->getDefaultEngine();
|
||||
|
||||
UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES);
|
||||
|
||||
bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
|
||||
hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS);
|
||||
|
||||
if (bcsAllowed || gpgpuEngine->commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
|
||||
if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
|
||||
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
|
||||
}
|
||||
@ -104,9 +100,8 @@ CommandQueue::~CommandQueue() {
|
||||
}
|
||||
|
||||
if (device) {
|
||||
auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage();
|
||||
|
||||
if (commandStream) {
|
||||
auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage();
|
||||
storageForAllocation->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||
}
|
||||
delete commandStream;
|
||||
@ -130,7 +125,59 @@ CommandQueue::~CommandQueue() {
|
||||
gtpinRemoveCommandQueue(this);
|
||||
}
|
||||
|
||||
void CommandQueue::initializeGpgpu() const {
|
||||
if (gpgpuEngine == nullptr) {
|
||||
auto &hwInfo = device->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
auto assignEngineRoundRobin =
|
||||
!this->isSpecialCommandQueue &&
|
||||
!this->queueFamilySelected &&
|
||||
!(getCmdQueueProperties<cl_queue_priority_khr>(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
|
||||
hwHelper.isAssignEngineRoundRobinSupported() &&
|
||||
this->isAssignEngineRoundRobinEnabled();
|
||||
|
||||
if (assignEngineRoundRobin) {
|
||||
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
||||
} else {
|
||||
this->gpgpuEngine = &device->getDefaultEngine();
|
||||
}
|
||||
|
||||
this->initializeGpgpuInternals();
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueue::initializeGpgpuInternals() const {
|
||||
auto &hwInfo = device->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (getCmdQueueProperties<cl_queue_properties>(propertiesVector.data(), CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
|
||||
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
|
||||
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(static_cast<DispatchMode>(DebugManager.flags.CsrDispatchMode.get()));
|
||||
}
|
||||
this->gpgpuEngine->commandStreamReceiver->enableNTo1SubmissionModel();
|
||||
}
|
||||
|
||||
if (device->getDevice().getDebugger() && !this->gpgpuEngine->commandStreamReceiver->getDebugSurfaceAllocation()) {
|
||||
auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo);
|
||||
auto debugSurface = this->gpgpuEngine->commandStreamReceiver->allocateDebugSurface(maxDbgSurfaceSize);
|
||||
memset(debugSurface->getUnderlyingBuffer(), 0, debugSurface->getUnderlyingBufferSize());
|
||||
|
||||
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
|
||||
if (stateSaveAreaHeader.size() > 0) {
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
|
||||
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
|
||||
stateSaveAreaHeader.size());
|
||||
}
|
||||
}
|
||||
|
||||
gpgpuEngine->osContext->ensureContextInitialized();
|
||||
gpgpuEngine->commandStreamReceiver->initDirectSubmission();
|
||||
}
|
||||
|
||||
CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const {
|
||||
this->initializeGpgpu();
|
||||
return *gpgpuEngine->commandStreamReceiver;
|
||||
}
|
||||
|
||||
@ -700,7 +747,7 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
|
||||
} else {
|
||||
const auto &hwInfo = device->getHardwareInfo();
|
||||
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const auto engineGroupType = hwHelper.getEngineGroupType(gpgpuEngine->getEngineType(), gpgpuEngine->getEngineUsage(), hwInfo);
|
||||
const auto engineGroupType = hwHelper.getEngineGroupType(getGpgpuEngine().getEngineType(), getGpgpuEngine().getEngineUsage(), hwInfo);
|
||||
const auto familyIndex = device->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||
return static_cast<cl_uint>(familyIndex);
|
||||
}
|
||||
|
@ -222,6 +222,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList);
|
||||
|
||||
void initializeGpgpu() const;
|
||||
void initializeGpgpuInternals() const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const;
|
||||
CommandStreamReceiver *getBcsForAuxTranslation() const;
|
||||
@ -230,7 +232,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
ClDevice &getClDevice() const { return *device; }
|
||||
Context &getContext() const { return *context; }
|
||||
Context *getContextPtr() const { return context; }
|
||||
EngineControl &getGpgpuEngine() const { return *gpgpuEngine; }
|
||||
EngineControl &getGpgpuEngine() const {
|
||||
this->initializeGpgpu();
|
||||
return *gpgpuEngine;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize);
|
||||
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
|
||||
@ -387,7 +392,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
Context *context = nullptr;
|
||||
ClDevice *device = nullptr;
|
||||
EngineControl *gpgpuEngine = nullptr;
|
||||
mutable EngineControl *gpgpuEngine = nullptr;
|
||||
std::array<EngineControl *, bcsInfoMaskSize> bcsEngines = {};
|
||||
std::vector<aub_stream::EngineType> bcsEngineTypes = {};
|
||||
|
||||
|
@ -62,39 +62,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
this->gpgpuEngine = &device->getInternalEngine();
|
||||
}
|
||||
|
||||
auto &hwInfo = device->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
auto assignEngineRoundRobin =
|
||||
!internalUsage &&
|
||||
!this->queueFamilySelected &&
|
||||
!(clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
|
||||
hwHelper.isAssignEngineRoundRobinSupported() &&
|
||||
this->isAssignEngineRoundRobinEnabled();
|
||||
|
||||
if (assignEngineRoundRobin) {
|
||||
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
||||
}
|
||||
|
||||
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
|
||||
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
|
||||
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(static_cast<DispatchMode>(DebugManager.flags.CsrDispatchMode.get()));
|
||||
}
|
||||
getGpgpuCommandStreamReceiver().enableNTo1SubmissionModel();
|
||||
}
|
||||
|
||||
if (device->getDevice().getDebugger() && !getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()) {
|
||||
auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo);
|
||||
auto debugSurface = getGpgpuCommandStreamReceiver().allocateDebugSurface(maxDbgSurfaceSize);
|
||||
memset(debugSurface->getUnderlyingBuffer(), 0, debugSurface->getUnderlyingBufferSize());
|
||||
|
||||
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
|
||||
if (stateSaveAreaHeader.size() > 0) {
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
|
||||
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
|
||||
stateSaveAreaHeader.size());
|
||||
}
|
||||
if (gpgpuEngine) {
|
||||
this->initializeGpgpuInternals();
|
||||
}
|
||||
|
||||
uint64_t requestedSliceCount = getCmdQueueProperties<cl_command_queue_properties>(properties, CL_QUEUE_SLICE_COUNT_INTEL);
|
||||
@ -102,8 +71,16 @@ class CommandQueueHw : public CommandQueue {
|
||||
sliceCount = requestedSliceCount;
|
||||
}
|
||||
|
||||
gpgpuEngine->osContext->ensureContextInitialized();
|
||||
gpgpuEngine->commandStreamReceiver->initDirectSubmission();
|
||||
auto initializeGpgpu = false;
|
||||
|
||||
if (DebugManager.flags.DeferCmdQGpgpuInitialization.get() != -1) {
|
||||
initializeGpgpu = !DebugManager.flags.DeferCmdQGpgpuInitialization.get();
|
||||
}
|
||||
|
||||
if (initializeGpgpu) {
|
||||
this->initializeGpgpu();
|
||||
}
|
||||
|
||||
for (const EngineControl *engine : bcsEngines) {
|
||||
if (engine != nullptr) {
|
||||
engine->osContext->ensureContextInitialized();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -71,6 +71,7 @@ TEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenQueueIs
|
||||
}
|
||||
|
||||
HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToBatchingMode) {
|
||||
using BaseType = typename CommandQueue::BaseType;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
auto clDevice = castToObject<ClDevice>(testedClDevice);
|
||||
@ -79,7 +80,8 @@ HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenComma
|
||||
EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode);
|
||||
|
||||
auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal);
|
||||
EXPECT_EQ(DispatchMode::BatchedDispatch, csr.dispatchMode);
|
||||
auto queue = castToObject<CommandQueue>(static_cast<BaseType *>(cmdq));
|
||||
EXPECT_EQ(DispatchMode::BatchedDispatch, queue->getGpgpuCommandStreamReceiver().getDispatchMode());
|
||||
retVal = clReleaseCommandQueue(cmdq);
|
||||
}
|
||||
|
||||
@ -100,6 +102,7 @@ HWTEST_F(clCreateCommandQueueTest, GivenForcedDispatchModeAndOoqParametersWhenQu
|
||||
}
|
||||
|
||||
HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) {
|
||||
using BaseType = typename CommandQueue::BaseType;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
auto clDevice = castToObject<ClDevice>(testedClDevice);
|
||||
@ -108,7 +111,8 @@ HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenComma
|
||||
EXPECT_FALSE(csr.isNTo1SubmissionModelEnabled());
|
||||
|
||||
auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal);
|
||||
EXPECT_TRUE(csr.isNTo1SubmissionModelEnabled());
|
||||
auto queue = castToObject<CommandQueue>(static_cast<BaseType *>(cmdq));
|
||||
EXPECT_TRUE(queue->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled());
|
||||
retVal = clReleaseCommandQueue(cmdq);
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
auto mockProgram = mockKernel->mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
|
||||
gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver;
|
||||
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
|
||||
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
|
||||
}
|
||||
|
||||
|
@ -32,11 +32,11 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
|
||||
|
||||
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
EXPECT_NE(dbgSurface, nullptr);
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw2(context, device.get(), nullptr);
|
||||
EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
|
||||
EXPECT_EQ(dbgSurface, mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) {
|
||||
@ -63,7 +63,7 @@ HWTEST_F(CommandQueueHwTest, WhenDebugSurfaceIsAllocatedThenBufferIsZeroed) {
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
|
||||
|
||||
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
EXPECT_NE(dbgSurface, nullptr);
|
||||
auto mem = dbgSurface->getUnderlyingBuffer();
|
||||
ASSERT_NE(nullptr, mem);
|
||||
@ -96,7 +96,7 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingCommandQueueDebugOnButIgcDoesNotRet
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
|
||||
|
||||
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
|
||||
EXPECT_NE(dbgSurface, nullptr);
|
||||
|
||||
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
|
||||
|
@ -1089,7 +1089,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseW
|
||||
cmdStream->waitForTaskCountReturnValue = WaitStatus::Ready;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
|
||||
|
||||
constexpr uint32_t taskCount = 0u;
|
||||
@ -1109,7 +1109,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEq
|
||||
cmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
|
||||
|
||||
constexpr uint32_t taskCount = 0u;
|
||||
@ -1128,7 +1128,7 @@ HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenW
|
||||
cmdStream->initializeTagAllocation();
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
|
||||
|
||||
constexpr uint32_t taskCount = 0u;
|
||||
@ -1147,7 +1147,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaiting
|
||||
cmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
|
||||
|
||||
constexpr uint32_t taskCount = 0u;
|
||||
@ -1174,7 +1174,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
|
||||
bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
|
||||
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
|
||||
|
||||
@ -1207,7 +1207,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
|
||||
bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
|
||||
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
|
||||
|
||||
@ -1241,7 +1241,7 @@ HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteT
|
||||
bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::Ready;
|
||||
|
||||
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
|
||||
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
|
||||
|
||||
@ -2803,7 +2803,7 @@ TEST_F(MultiTileFixture, givenDefaultContextWithRootDeviceWhenQueueIsCreatedThen
|
||||
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
MockCommandQueue queue(&context, rootDevice, nullptr, false);
|
||||
ASSERT_NE(nullptr, queue.gpgpuEngine);
|
||||
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
|
||||
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
|
||||
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
|
||||
}
|
||||
@ -2814,7 +2814,7 @@ TEST_F(MultiTileFixture, givenDefaultContextWithSubdeviceWhenQueueIsCreatedThenQ
|
||||
context.contextType = ContextType::CONTEXT_TYPE_DEFAULT;
|
||||
|
||||
MockCommandQueue queue(&context, subdevice, nullptr, false);
|
||||
ASSERT_NE(nullptr, queue.gpgpuEngine);
|
||||
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
|
||||
EXPECT_FALSE(queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
|
||||
}
|
||||
|
||||
@ -2826,7 +2826,7 @@ TEST_F(MultiTileFixture, givenUnrestrictiveContextWithRootDeviceWhenQueueIsCreat
|
||||
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
MockCommandQueue queue(&context, rootDevice, nullptr, false);
|
||||
ASSERT_NE(nullptr, queue.gpgpuEngine);
|
||||
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
|
||||
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
|
||||
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
|
||||
}
|
||||
@ -2840,7 +2840,7 @@ TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQu
|
||||
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
MockCommandQueue queue(&context, rootClDevice, nullptr, false);
|
||||
ASSERT_NE(nullptr, queue.gpgpuEngine);
|
||||
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
|
||||
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
|
||||
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
|
||||
}
|
||||
|
@ -144,12 +144,48 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhe
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
propertiesCooperativeQueue[3] = i;
|
||||
auto pCommandQueue = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, pDevice.get(), propertiesCooperativeQueue);
|
||||
EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->gpgpuEngine->osContext->getEngineType());
|
||||
EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->gpgpuEngine->osContext->getEngineUsage());
|
||||
EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->getGpgpuEngine().osContext->getEngineType());
|
||||
EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->getGpgpuEngine().osContext->getEngineUsage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQGpgpuInitializationEnabledWhenCreateCommandQueueThenGpgpuIsNullptr, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DeferCmdQGpgpuInitialization.set(1u);
|
||||
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
|
||||
MockClDevice clDevice{device};
|
||||
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
|
||||
ClDeviceVector clDevices{&clDeviceId, 1u};
|
||||
cl_int retVal{};
|
||||
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), &clDevice, nullptr);
|
||||
|
||||
EXPECT_EQ(nullptr, queue->gpgpuEngine);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQGpgpuInitializationDisabledWhenCreateCommandQueueThenGpgpuIsnotNullptr, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DeferCmdQGpgpuInitialization.set(0u);
|
||||
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
|
||||
MockClDevice clDevice{device};
|
||||
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
|
||||
ClDeviceVector clDevices{&clDeviceId, 1u};
|
||||
cl_int retVal{};
|
||||
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), &clDevice, nullptr);
|
||||
|
||||
EXPECT_NE(nullptr, queue->gpgpuEngine);
|
||||
}
|
||||
|
||||
struct BcsCsrSelectionCommandQueueTests : ::testing::Test {
|
||||
void SetUp() override {
|
||||
HardwareInfo hwInfo = *::defaultHwInfo;
|
||||
|
@ -108,7 +108,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfa
|
||||
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
|
||||
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
|
||||
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
|
||||
|
||||
auto srcImage = Image2dHelper<>::create(context);
|
||||
@ -132,7 +132,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfa
|
||||
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
|
||||
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
|
||||
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
|
||||
|
||||
auto srcImage = Image2dHelper<>::create(context);
|
||||
@ -175,7 +175,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndPtrCopyAllowedForHostSurfaceW
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
|
||||
|
||||
csr->setupContext(*pDevice->getDefaultEngine().osContext);
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = csr.get();
|
||||
csr->initializeTagAllocation();
|
||||
|
||||
@ -199,7 +199,7 @@ HWTEST_F(EnqueueReadImageTest, givenGpuHangAndCommandQueueAndPtrCopyAllowedForHo
|
||||
cmdQ->waitForAllEnginesReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
csr->setupContext(*pDevice->getDefaultEngine().osContext);
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = csr.get();
|
||||
csr->initializeTagAllocation();
|
||||
|
||||
|
@ -2135,7 +2135,7 @@ HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenRe
|
||||
void *pSrcSVM = ptrSVM;
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
|
||||
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
|
||||
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
|
||||
retVal = cmdQ.enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
@ -2156,7 +2156,7 @@ HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreation
|
||||
void *pSrcSVM = srcHostPtr;
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
|
||||
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
|
||||
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
|
||||
retVal = cmdQ.enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
@ -2178,7 +2178,7 @@ HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreati
|
||||
void *pSrcSVM = srcHostPtr;
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
|
||||
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
|
||||
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
|
||||
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
|
||||
retVal = cmdQ.enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
|
@ -500,7 +500,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWithGpgpuSubmissionWhe
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
|
||||
auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
auto queueCsr = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
auto initialTaskCount = queueCsr->peekTaskCount();
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
@ -531,7 +531,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWithGpgpuSubmissionWhen
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
|
||||
auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver;
|
||||
auto queueCsr = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||
auto initialTaskCount = queueCsr->peekTaskCount();
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
@ -627,7 +627,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
|
||||
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&cmdQ->getGpgpuCommandStreamReceiver());
|
||||
queueCsr->stallingCommandsOnNextFlushRequired = true;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
@ -726,7 +726,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
|
||||
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&cmdQ->getGpgpuCommandStreamReceiver());
|
||||
queueCsr->stallingCommandsOnNextFlushRequired = true;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
@ -282,7 +282,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
}
|
||||
|
||||
UltCommandStreamReceiver<GfxFamily> &getUltCommandStreamReceiver() {
|
||||
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(*BaseClass::gpgpuEngine->commandStreamReceiver);
|
||||
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(BaseClass::getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
cl_int enqueueWriteImage(Image *dstImage,
|
||||
|
@ -382,6 +382,7 @@ ForceExtendedKernelIsaSize = -1
|
||||
MakeIndirectAllocationsResidentAsPack = -1
|
||||
MakeEachAllocationResident = -1
|
||||
AssignBCSAtEnqueue = -1
|
||||
DeferCmdQGpgpuInitialization = -1
|
||||
ReuseKernelBinaries = -1
|
||||
EnableChipsetUniqueUUID = -1
|
||||
ForceSimdMessageSizeInWalker = -1
|
||||
|
@ -267,6 +267,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: def
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
|
Reference in New Issue
Block a user