Assign gpgpu engine at first enqueue

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-04-29 08:02:40 +00:00
committed by Compute-Runtime-Automation
parent a6490062a9
commit 73d3d83e60
14 changed files with 149 additions and 78 deletions

View File

@ -73,14 +73,10 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
gpgpuEngine = &device->getDefaultEngine();
UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES);
bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS);
if (bcsAllowed || gpgpuEngine->commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
}
@ -104,9 +100,8 @@ CommandQueue::~CommandQueue() {
}
if (device) {
auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage();
if (commandStream) {
auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage();
storageForAllocation->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
}
delete commandStream;
@ -130,7 +125,59 @@ CommandQueue::~CommandQueue() {
gtpinRemoveCommandQueue(this);
}
void CommandQueue::initializeGpgpu() const {
if (gpgpuEngine == nullptr) {
auto &hwInfo = device->getDevice().getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto assignEngineRoundRobin =
!this->isSpecialCommandQueue &&
!this->queueFamilySelected &&
!(getCmdQueueProperties<cl_queue_priority_khr>(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
hwHelper.isAssignEngineRoundRobinSupported() &&
this->isAssignEngineRoundRobinEnabled();
if (assignEngineRoundRobin) {
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
} else {
this->gpgpuEngine = &device->getDefaultEngine();
}
this->initializeGpgpuInternals();
}
}
void CommandQueue::initializeGpgpuInternals() const {
auto &hwInfo = device->getDevice().getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (getCmdQueueProperties<cl_queue_properties>(propertiesVector.data(), CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(static_cast<DispatchMode>(DebugManager.flags.CsrDispatchMode.get()));
}
this->gpgpuEngine->commandStreamReceiver->enableNTo1SubmissionModel();
}
if (device->getDevice().getDebugger() && !this->gpgpuEngine->commandStreamReceiver->getDebugSurfaceAllocation()) {
auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo);
auto debugSurface = this->gpgpuEngine->commandStreamReceiver->allocateDebugSurface(maxDbgSurfaceSize);
memset(debugSurface->getUnderlyingBuffer(), 0, debugSurface->getUnderlyingBufferSize());
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
if (stateSaveAreaHeader.size() > 0) {
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
stateSaveAreaHeader.size());
}
}
gpgpuEngine->osContext->ensureContextInitialized();
gpgpuEngine->commandStreamReceiver->initDirectSubmission();
}
CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const {
this->initializeGpgpu();
return *gpgpuEngine->commandStreamReceiver;
}
@ -700,7 +747,7 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
} else {
const auto &hwInfo = device->getHardwareInfo();
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto engineGroupType = hwHelper.getEngineGroupType(gpgpuEngine->getEngineType(), gpgpuEngine->getEngineUsage(), hwInfo);
const auto engineGroupType = hwHelper.getEngineGroupType(getGpgpuEngine().getEngineType(), getGpgpuEngine().getEngineUsage(), hwInfo);
const auto familyIndex = device->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
return static_cast<cl_uint>(familyIndex);
}

View File

@ -222,6 +222,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
void initializeGpgpu() const;
void initializeGpgpuInternals() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const;
CommandStreamReceiver *getBcsForAuxTranslation() const;
@ -230,7 +232,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; }
Context *getContextPtr() const { return context; }
EngineControl &getGpgpuEngine() const { return *gpgpuEngine; }
EngineControl &getGpgpuEngine() const {
this->initializeGpgpu();
return *gpgpuEngine;
}
MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize);
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
@ -387,7 +392,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
Context *context = nullptr;
ClDevice *device = nullptr;
EngineControl *gpgpuEngine = nullptr;
mutable EngineControl *gpgpuEngine = nullptr;
std::array<EngineControl *, bcsInfoMaskSize> bcsEngines = {};
std::vector<aub_stream::EngineType> bcsEngineTypes = {};

View File

@ -62,39 +62,8 @@ class CommandQueueHw : public CommandQueue {
this->gpgpuEngine = &device->getInternalEngine();
}
auto &hwInfo = device->getDevice().getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto assignEngineRoundRobin =
!internalUsage &&
!this->queueFamilySelected &&
!(clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
hwHelper.isAssignEngineRoundRobinSupported() &&
this->isAssignEngineRoundRobinEnabled();
if (assignEngineRoundRobin) {
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
}
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(static_cast<DispatchMode>(DebugManager.flags.CsrDispatchMode.get()));
}
getGpgpuCommandStreamReceiver().enableNTo1SubmissionModel();
}
if (device->getDevice().getDebugger() && !getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()) {
auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo);
auto debugSurface = getGpgpuCommandStreamReceiver().allocateDebugSurface(maxDbgSurfaceSize);
memset(debugSurface->getUnderlyingBuffer(), 0, debugSurface->getUnderlyingBufferSize());
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
if (stateSaveAreaHeader.size() > 0) {
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
stateSaveAreaHeader.size());
}
if (gpgpuEngine) {
this->initializeGpgpuInternals();
}
uint64_t requestedSliceCount = getCmdQueueProperties<cl_command_queue_properties>(properties, CL_QUEUE_SLICE_COUNT_INTEL);
@ -102,8 +71,16 @@ class CommandQueueHw : public CommandQueue {
sliceCount = requestedSliceCount;
}
gpgpuEngine->osContext->ensureContextInitialized();
gpgpuEngine->commandStreamReceiver->initDirectSubmission();
auto initializeGpgpu = false;
if (DebugManager.flags.DeferCmdQGpgpuInitialization.get() != -1) {
initializeGpgpu = !DebugManager.flags.DeferCmdQGpgpuInitialization.get();
}
if (initializeGpgpu) {
this->initializeGpgpu();
}
for (const EngineControl *engine : bcsEngines) {
if (engine != nullptr) {
engine->osContext->ensureContextInitialized();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -71,6 +71,7 @@ TEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenQueueIs
}
HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToBatchingMode) {
using BaseType = typename CommandQueue::BaseType;
cl_int retVal = CL_SUCCESS;
cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto clDevice = castToObject<ClDevice>(testedClDevice);
@ -79,7 +80,8 @@ HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenComma
EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode);
auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal);
EXPECT_EQ(DispatchMode::BatchedDispatch, csr.dispatchMode);
auto queue = castToObject<CommandQueue>(static_cast<BaseType *>(cmdq));
EXPECT_EQ(DispatchMode::BatchedDispatch, queue->getGpgpuCommandStreamReceiver().getDispatchMode());
retVal = clReleaseCommandQueue(cmdq);
}
@ -100,6 +102,7 @@ HWTEST_F(clCreateCommandQueueTest, GivenForcedDispatchModeAndOoqParametersWhenQu
}
HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) {
using BaseType = typename CommandQueue::BaseType;
cl_int retVal = CL_SUCCESS;
cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto clDevice = castToObject<ClDevice>(testedClDevice);
@ -108,7 +111,8 @@ HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenComma
EXPECT_FALSE(csr.isNTo1SubmissionModelEnabled());
auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal);
EXPECT_TRUE(csr.isNTo1SubmissionModelEnabled());
auto queue = castToObject<CommandQueue>(static_cast<BaseType *>(cmdq));
EXPECT_TRUE(queue->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled());
retVal = clReleaseCommandQueue(cmdq);
}

View File

@ -106,7 +106,7 @@ struct BlitEnqueueTests : public ::testing::Test {
auto mockProgram = mockKernel->mockProgram;
mockProgram->setAllowNonUniform(true);
gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver;
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
}

View File

@ -32,11 +32,11 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
EXPECT_NE(dbgSurface, nullptr);
MockCommandQueueHw<FamilyType> mockCmdQueueHw2(context, device.get(), nullptr);
EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
EXPECT_EQ(dbgSurface, mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
}
HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) {
@ -63,7 +63,7 @@ HWTEST_F(CommandQueueHwTest, WhenDebugSurfaceIsAllocatedThenBufferIsZeroed) {
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
EXPECT_NE(dbgSurface, nullptr);
auto mem = dbgSurface->getUnderlyingBuffer();
ASSERT_NE(nullptr, mem);
@ -96,7 +96,7 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingCommandQueueDebugOnButIgcDoesNotRet
MockCommandQueueHw<FamilyType> mockCmdQueueHw1(context, device.get(), nullptr);
auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
auto dbgSurface = mockCmdQueueHw1.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
EXPECT_NE(dbgSurface, nullptr);
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();

View File

@ -1089,7 +1089,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseW
cmdStream->waitForTaskCountReturnValue = WaitStatus::Ready;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
constexpr uint32_t taskCount = 0u;
@ -1109,7 +1109,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEq
cmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
constexpr uint32_t taskCount = 0u;
@ -1128,7 +1128,7 @@ HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenW
cmdStream->initializeTagAllocation();
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
constexpr uint32_t taskCount = 0u;
@ -1147,7 +1147,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaiting
cmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
constexpr uint32_t taskCount = 0u;
@ -1174,7 +1174,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
@ -1207,7 +1207,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
@ -1241,7 +1241,7 @@ HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteT
bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::Ready;
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
cmdQ->bcsCsrToReturn = bcsCmdStream.get();
@ -2803,7 +2803,7 @@ TEST_F(MultiTileFixture, givenDefaultContextWithRootDeviceWhenQueueIsCreatedThen
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}
@ -2814,7 +2814,7 @@ TEST_F(MultiTileFixture, givenDefaultContextWithSubdeviceWhenQueueIsCreatedThenQ
context.contextType = ContextType::CONTEXT_TYPE_DEFAULT;
MockCommandQueue queue(&context, subdevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
EXPECT_FALSE(queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
}
@ -2826,7 +2826,7 @@ TEST_F(MultiTileFixture, givenUnrestrictiveContextWithRootDeviceWhenQueueIsCreat
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}
@ -2840,7 +2840,7 @@ TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQu
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootClDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
ASSERT_NE(nullptr, &queue.getGpgpuEngine());
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}

View File

@ -144,12 +144,48 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhe
for (size_t i = 0; i < 4; i++) {
propertiesCooperativeQueue[3] = i;
auto pCommandQueue = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, pDevice.get(), propertiesCooperativeQueue);
EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->gpgpuEngine->osContext->getEngineType());
EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->gpgpuEngine->osContext->getEngineUsage());
EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->getGpgpuEngine().osContext->getEngineType());
EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->getGpgpuEngine().osContext->getEngineUsage());
}
}
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQGpgpuInitializationEnabledWhenCreateCommandQueueThenGpgpuIsNullptr, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQGpgpuInitialization.set(1u);
HardwareInfo hwInfo = *defaultHwInfo;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), &clDevice, nullptr);
EXPECT_EQ(nullptr, queue->gpgpuEngine);
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQGpgpuInitializationDisabledWhenCreateCommandQueueThenGpgpuIsnotNullptr, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQGpgpuInitialization.set(0u);
HardwareInfo hwInfo = *defaultHwInfo;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), &clDevice, nullptr);
EXPECT_NE(nullptr, queue->gpgpuEngine);
}
struct BcsCsrSelectionCommandQueueTests : ::testing::Test {
void SetUp() override {
HardwareInfo hwInfo = *::defaultHwInfo;

View File

@ -108,7 +108,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfa
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
auto srcImage = Image2dHelper<>::create(context);
@ -132,7 +132,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfa
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
auto srcImage = Image2dHelper<>::create(context);
@ -175,7 +175,7 @@ HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndPtrCopyAllowedForHostSurfaceW
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
csr->setupContext(*pDevice->getDefaultEngine().osContext);
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = csr.get();
csr->initializeTagAllocation();
@ -199,7 +199,7 @@ HWTEST_F(EnqueueReadImageTest, givenGpuHangAndCommandQueueAndPtrCopyAllowedForHo
cmdQ->waitForAllEnginesReturnValue = WaitStatus::GpuHang;
csr->setupContext(*pDevice->getDefaultEngine().osContext);
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
cmdQ->gpgpuEngine->commandStreamReceiver = csr.get();
csr->initializeTagAllocation();

View File

@ -2135,7 +2135,7 @@ HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenRe
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
@ -2156,7 +2156,7 @@ HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreation
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
@ -2178,7 +2178,7 @@ HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreati
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy

View File

@ -500,7 +500,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWithGpgpuSubmissionWhe
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver;
auto queueCsr = &cmdQ->getGpgpuCommandStreamReceiver();
auto initialTaskCount = queueCsr->peekTaskCount();
cl_int retVal = CL_SUCCESS;
@ -531,7 +531,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWithGpgpuSubmissionWhen
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver;
auto queueCsr = &cmdQ->getGpgpuCommandStreamReceiver();
auto initialTaskCount = queueCsr->peekTaskCount();
cl_int retVal = CL_SUCCESS;
@ -627,7 +627,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&cmdQ->getGpgpuCommandStreamReceiver());
queueCsr->stallingCommandsOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;
@ -726,7 +726,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&cmdQ->getGpgpuCommandStreamReceiver());
queueCsr->stallingCommandsOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;

View File

@ -282,7 +282,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
}
UltCommandStreamReceiver<GfxFamily> &getUltCommandStreamReceiver() {
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(*BaseClass::gpgpuEngine->commandStreamReceiver);
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(BaseClass::getGpgpuCommandStreamReceiver());
}
cl_int enqueueWriteImage(Image *dstImage,

View File

@ -382,6 +382,7 @@ ForceExtendedKernelIsaSize = -1
MakeIndirectAllocationsResidentAsPack = -1
MakeEachAllocationResident = -1
AssignBCSAtEnqueue = -1
DeferCmdQGpgpuInitialization = -1
ReuseKernelBinaries = -1
EnableChipsetUniqueUUID = -1
ForceSimdMessageSizeInWalker = -1

View File

@ -267,6 +267,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: def
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
/*DIRECT SUBMISSION FLAGS*/