Add initial enqueue bcs split infrastructure

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-07-20 15:05:27 +00:00
committed by Compute-Runtime-Automation
parent 61242205e0
commit b10b3ed9dd
24 changed files with 547 additions and 19 deletions

View File

@ -6,6 +6,7 @@
*/
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/utilities/base_object_utils.h"
@ -288,6 +289,191 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted
EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenCheckIsSplitEnqueueBlitSupportedThenReturnProperValue) {
DebugManagerStateRestore restorer;
auto *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
{
DebugManager.flags.SplitBcsCopy.set(1);
EXPECT_TRUE(cmdQHw->isSplitEnqueueBlitSupported());
}
{
DebugManager.flags.SplitBcsCopy.set(0);
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitSupported());
}
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenCheckIsSplitEnqueueBlitNeededThenReturnProperValue) {
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
auto *cmdQHw = static_cast<CommandQueueHw<FamilyType> *>(this->pCmdQ);
{
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToHost, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToLocal, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_TRUE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToHost, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_TRUE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToLocal, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
}
{
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToHost, cmdQHw->getGpgpuCommandStreamReceiver()));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToLocal, cmdQHw->getGpgpuCommandStreamReceiver()));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToHost, cmdQHw->getGpgpuCommandStreamReceiver()));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToLocal, cmdQHw->getGpgpuCommandStreamReceiver()));
}
{
DebugManager.flags.SplitBcsCopy.set(0);
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToHost, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToLocal, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::LocalToHost, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
EXPECT_FALSE(cmdQHw->isSplitEnqueueBlitNeeded(TransferDirection::HostToLocal, *cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)));
}
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenEnqueueBlitSplit) {
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
DebugManager.flags.UpdateTaskCountFromWait.set(3);
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS2, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr1->setupContext(*osContext1);
csr1->initializeTagAllocation();
EngineControl control1(csr1.get(), osContext1.get());
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS4, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr2->setupContext(*osContext2);
csr2->initializeTagAllocation();
EngineControl control2(csr2.get(), osContext2.get());
cmdQHw->bcsEngines[2] = &control1;
cmdQHw->bcsEngines[4] = &control2;
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
char ptr[1] = {};
EXPECT_EQ(csr1->peekTaskCount(), 0u);
EXPECT_EQ(csr2->peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
EXPECT_EQ(csr1->peekTaskCount(), 1u);
EXPECT_EQ(csr2->peekTaskCount(), 1u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
pCmdQ->release();
pCmdQ = nullptr;
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueBlockingReadThenEnqueueBlitSplit) {
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
DebugManager.flags.UpdateTaskCountFromWait.set(3);
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS2, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr1->setupContext(*osContext1);
csr1->initializeTagAllocation();
EngineControl control1(csr1.get(), osContext1.get());
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS4, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr2->setupContext(*osContext2);
csr2->initializeTagAllocation();
EngineControl control2(csr2.get(), osContext2.get());
cmdQHw->bcsEngines[2] = &control1;
cmdQHw->bcsEngines[4] = &control2;
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
char ptr[1] = {};
EXPECT_EQ(csr1->peekTaskCount(), 0u);
EXPECT_EQ(csr2->peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
EXPECT_EQ(csr1->peekTaskCount(), 2u);
EXPECT_EQ(csr2->peekTaskCount(), 2u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
pCmdQ->release();
pCmdQ = nullptr;
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithEventThenEnqueueBlitSplitAndAddBothTimestampsToEvent) {
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
DebugManager.flags.UpdateTaskCountFromWait.set(3);
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS2, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr1->setupContext(*osContext1);
csr1->initializeTagAllocation();
EngineControl control1(csr1.get(), osContext1.get());
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS4, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
csr2->setupContext(*osContext2);
csr2->initializeTagAllocation();
EngineControl control2(csr2.get(), osContext2.get());
cmdQHw->bcsEngines[2] = &control1;
cmdQHw->bcsEngines[4] = &control2;
BufferDefaults::context = context;
auto buffer = clUniquePtr(BufferHelper<>::create());
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
char ptr[1] = {};
EXPECT_EQ(csr1->peekTaskCount(), 0u);
EXPECT_EQ(csr2->peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
cl_event event;
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, &event));
EXPECT_EQ(csr1->peekTaskCount(), 1u);
EXPECT_EQ(csr2->peekTaskCount(), 1u);
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
EXPECT_NE(event, nullptr);
auto pEvent = castToObject<Event>(event);
EXPECT_EQ(pEvent->getTimestampPacketNodes()->peekNodes().size(), 3u);
clReleaseEvent(event);
pCmdQ->release();
pCmdQ = nullptr;
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};

View File

@ -104,6 +104,109 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationEnabledWh
EXPECT_EQ(0u, queue->countBcsEngines());
}
HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenContainsMultipleBcsEngines, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(0u, queue->countBcsEngines());
queue->constructBcsEnginesForSplit();
EXPECT_EQ(4u, queue->countBcsEngines());
queue->constructBcsEnginesForSplit();
EXPECT_EQ(4u, queue->countBcsEngines());
}
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(0u, queue->countBcsEngines());
queue->constructBcsEnginesForSplit();
EXPECT_EQ(4u, queue->countBcsEngines());
auto &csr1 = queue->selectCsrForHostPtrAllocation(true, *queue->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS2));
EXPECT_EQ(&csr1, &queue->getGpgpuCommandStreamReceiver());
auto &csr2 = queue->selectCsrForHostPtrAllocation(false, *queue->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS2));
EXPECT_EQ(&csr2, queue->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS2));
}
HWTEST2_F(CommandQueuePvcAndLaterTests, whenPrepareHostPtrSurfaceForSplitThenSetTaskCountsToZero, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
hwInfo.capabilityTable.blitterOperationsSupported = true;
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
MockClDevice clDevice{device};
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
ClDeviceVector clDevices{&clDeviceId, 1u};
cl_int retVal{};
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
EXPECT_EQ(CL_SUCCESS, retVal);
auto queue = std::make_unique<MockCommandQueue>(*context);
EXPECT_EQ(0u, queue->countBcsEngines());
queue->constructBcsEnginesForSplit();
EXPECT_EQ(4u, queue->countBcsEngines());
auto ptr = reinterpret_cast<void *>(0x1234);
auto ptrSize = MemoryConstants::pageSize;
HostPtrSurface hostPtrSurf(ptr, ptrSize);
queue->getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
queue->prepareHostPtrSurfaceForSplit(false, *hostPtrSurf.getAllocation());
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS1); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i++) {
auto bcs = queue->getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
if (bcs) {
auto contextId = bcs->getOsContext().getContextId();
EXPECT_EQ(hostPtrSurf.getAllocation()->getTaskCount(contextId), GraphicsAllocation::objectNotUsed);
}
}
queue->prepareHostPtrSurfaceForSplit(true, *hostPtrSurf.getAllocation());
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS1); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i++) {
auto bcs = queue->getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
if (bcs) {
auto contextId = bcs->getOsContext().getContextId();
EXPECT_EQ(hostPtrSurf.getAllocation()->getTaskCount(contextId), 0u);
}
}
queue->prepareHostPtrSurfaceForSplit(true, *hostPtrSurf.getAllocation());
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS1); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i++) {
auto bcs = queue->getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
if (bcs) {
auto contextId = bcs->getOsContext().getContextId();
EXPECT_EQ(hostPtrSurf.getAllocation()->getTaskCount(contextId), 0u);
}
}
}
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationDisabledWhenCreateCommandQueueThenBcsIsInitialized, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.DeferCmdQBcsInitialization.set(0u);