Revert "Assign BCS at first blit enqueue"

This reverts commit 0469034acb.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation 2022-05-04 12:46:37 +02:00 committed by Compute-Runtime-Automation
parent 85b47476b6
commit a3fe50c2e4
8 changed files with 22 additions and 91 deletions

View File

@ -73,22 +73,19 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) && bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS); hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS);
if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) { if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
timestampPacketContainer = std::make_unique<TimestampPacketContainer>(); timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>(); deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
} }
if (bcsAllowed) {
auto deferCmdQBcsInitialization = true; auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
if (DebugManager.flags.DeferCmdQBcsInitialization.get() != -1) { auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
deferCmdQBcsInitialization = DebugManager.flags.DeferCmdQBcsInitialization.get(); bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
} bcsEngineTypes.push_back(bcsEngineType);
if (!deferCmdQBcsInitialization) {
this->initializeBcsEngine(internalUsage);
} }
} }
@ -184,8 +181,7 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const {
return *gpgpuEngine->commandStreamReceiver; return *gpgpuEngine->commandStreamReceiver;
} }
CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) { CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const {
initializeBcsEngine(isSpecial());
const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]; const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)];
if (engine == nullptr) { if (engine == nullptr) {
return nullptr; return nullptr;
@ -194,8 +190,7 @@ CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::Eng
} }
} }
CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() { CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
initializeBcsEngine(isSpecial());
for (const EngineControl *engine : this->bcsEngines) { for (const EngineControl *engine : this->bcsEngines) {
if (engine != nullptr) { if (engine != nullptr) {
return engine->commandStreamReceiver; return engine->commandStreamReceiver;
@ -204,8 +199,7 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() {
return nullptr; return nullptr;
} }
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) { CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const {
initializeBcsEngine(isSpecial());
if (isCopyOnly) { if (isCopyOnly) {
return *getBcsCommandStreamReceiver(bcsEngineTypes[0]); return *getBcsCommandStreamReceiver(bcsEngineTypes[0]);
} }
@ -273,21 +267,6 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
return *selectedCsr; return *selectedCsr;
} }
void CommandQueue::initializeBcsEngine(bool internalUsage) {
if (bcsAllowed && !bcsInitialized) {
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
bcsEngineTypes.push_back(bcsEngineType);
bcsInitialized = true;
if (bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]) {
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->osContext->ensureContextInitialized();
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->commandStreamReceiver->initDirectSubmission();
}
}
}
Device &CommandQueue::getDevice() const noexcept { Device &CommandQueue::getDevice() const noexcept {
return device->getDevice(); return device->getDevice();
} }
@ -301,7 +280,7 @@ volatile uint32_t *CommandQueue::getHwTagAddress() const {
return getGpgpuCommandStreamReceiver().getTagAddress(); return getGpgpuCommandStreamReceiver().getTagAddress();
} }
bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) { bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const {
DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady); DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady);
if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) { if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) {
@ -1049,7 +1028,6 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
timestampPacketContainer = std::make_unique<TimestampPacketContainer>(); timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>(); deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
isCopyOnly = true; isCopyOnly = true;
bcsInitialized = true;
} else { } else {
gpgpuEngine = &device->getEngine(engineType, engineUsage); gpgpuEngine = &device->getEngine(engineType, engineUsage);
} }

View File

@ -202,7 +202,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
volatile uint32_t *getHwTagAddress() const; volatile uint32_t *getHwTagAddress() const;
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState); bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const;
bool isWaitForTimestampsEnabled() const; bool isWaitForTimestampsEnabled() const;
virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount) = 0; virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount) = 0;
@ -225,10 +225,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void initializeGpgpu() const; void initializeGpgpu() const;
void initializeGpgpuInternals() const; void initializeGpgpuInternals() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType); MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const;
CommandStreamReceiver *getBcsForAuxTranslation(); CommandStreamReceiver *getBcsForAuxTranslation() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args); MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const;
void initializeBcsEngine(bool internalUsage);
Device &getDevice() const noexcept; Device &getDevice() const noexcept;
ClDevice &getClDevice() const { return *device; } ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; } Context &getContext() const { return *context; }
@ -414,8 +413,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool perfCountersEnabled = false; bool perfCountersEnabled = false;
bool isCopyOnly = false; bool isCopyOnly = false;
bool bcsAllowed = false;
bool bcsInitialized = false;
LinearStream *commandStream = nullptr; LinearStream *commandStream = nullptr;

View File

@ -1067,7 +1067,7 @@ struct WaitUntilCompletionTests : public ::testing::Test {
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){}; MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) override { CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const override {
return bcsCsrToReturn; return bcsCsrToReturn;
} }

View File

@ -84,46 +84,6 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenAdditionalBcsWhenCreatingCommandQue
EXPECT_EQ(1u, queue->countBcsEngines()); EXPECT_EQ(1u, queue->countBcsEngines());
} }
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationEnabledWhenCreateCommandQueueThenBcsCountIsZero, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(0u, queue->countBcsEngines());
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationDisabledWhenCreateCommandQueueThenBcsIsInitialized, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(0u);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_NE(0u, queue->countBcsEngines());
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQueueIsCreatedThenMainBcsCanBeUsedAgain, IsAtLeastXeHpcCore) { HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQueueIsCreatedThenMainBcsCanBeUsedAgain, IsAtLeastXeHpcCore) {
HardwareInfo hwInfo = *defaultHwInfo; HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
@ -142,9 +102,9 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu
auto queue4 = std::make_unique<MockCommandQueue>(*context); auto queue4 = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType());
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
// Releasing main BCS. Next creation should be able to grab it // Releasing main BCS. Next creation should be able to grab it
queue1.reset(); queue1.reset();
@ -154,7 +114,7 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu
// Releasing link BCS. Shouldn't change anything // Releasing link BCS. Shouldn't change anything
queue2.reset(); queue2.reset();
queue2 = std::make_unique<MockCommandQueue>(*context); queue2 = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
} }
HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) { HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) {
@ -531,10 +491,10 @@ HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelec
aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS7,
aub_stream::ENGINE_BCS8, aub_stream::ENGINE_BCS8,
}); });
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args));
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args));
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
} }
} }

View File

@ -144,7 +144,6 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou
new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockAlignedMallocManagerDevice>(&hwInfo)}}; new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockAlignedMallocManagerDevice>(&hwInfo)}};
MockContext context{device.get()}; MockContext context{device.get()};
MockCommandQueue queue{context}; MockCommandQueue queue{context};
queue.initializeBcsEngine(false);
queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19); queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19);
Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0}; Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0};

View File

@ -257,7 +257,6 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
MockCommandQueueHw(Context *context, MockCommandQueueHw(Context *context,
ClDevice *device, ClDevice *device,
cl_queue_properties *properties) : BaseClass(context, device, properties, false) { cl_queue_properties *properties) : BaseClass(context, device, properties, false) {
this->initializeBcsEngine(false);
} }
void clearBcsEngines() { void clearBcsEngines() {

View File

@ -383,7 +383,6 @@ MakeIndirectAllocationsResidentAsPack = -1
MakeEachAllocationResident = -1 MakeEachAllocationResident = -1
AssignBCSAtEnqueue = -1 AssignBCSAtEnqueue = -1
DeferCmdQGpgpuInitialization = -1 DeferCmdQGpgpuInitialization = -1
DeferCmdQBcsInitialization = -1
ReuseKernelBinaries = -1 ReuseKernelBinaries = -1
EnableChipsetUniqueUUID = -1 EnableChipsetUniqueUUID = -1
ForceSimdMessageSizeInWalker = -1 ForceSimdMessageSizeInWalker = -1

View File

@ -268,7 +268,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1:
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush") DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.") DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
/*DIRECT SUBMISSION FLAGS*/ /*DIRECT SUBMISSION FLAGS*/