Do not obtain command stream if it will not be needed

Change-Id: Id7fa1c6b78e71a085084f8fcb66a7b8e873ad2bc
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
Related-To: NEO-5120
This commit is contained in:
Maciej Dziuban
2020-09-30 16:58:20 +02:00
committed by sys_ocldev
parent 960860e4cb
commit 8fcd51c2c8
8 changed files with 133 additions and 25 deletions

View File

@ -381,7 +381,7 @@ class CommandQueueHw : public CommandQueue {
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
size_t surfaceCount,
LinearStream &commandStream,
LinearStream *commandStream,
size_t commandStreamStart,
bool &blocking,
const EnqueueProperties &enqueueProperties,
@ -396,7 +396,7 @@ class CommandQueueHw : public CommandQueue {
BlitProperties processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest,
LinearStream &commandStream,
LinearStream *commandStream,
uint32_t commandType, bool queueBlocked);
void submitCacheFlush(Surface **surfaces,
size_t numSurfaces,

View File

@ -287,7 +287,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
completionStamp = enqueueCommandWithoutKernel(
surfacesForResidency,
numSurfaceForResidency,
commandStream,
&commandStream,
commandStreamStart,
blocking,
enqueueProperties,
@ -430,7 +430,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
template <typename GfxFamily>
BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest, LinearStream &commandStream,
const EventsRequest &eventsRequest, LinearStream *commandStream,
uint32_t commandType, bool queueBlocked) {
auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType);
@ -450,12 +450,12 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
if (isGpgpuSubmissionForBcsRequired(queueBlocked)) {
if (commandStream) {
if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) {
auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]);
PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStream,
*commandStream,
GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
cacheFlushTimestampPacketGpuAddress,
0,
@ -463,7 +463,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
args);
}
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode,
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*commandStream, *currentTimestampPacketNode,
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
}
return blitProperties;
@ -908,7 +908,7 @@ template <typename GfxFamily>
CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
Surface **surfaces,
size_t surfaceCount,
LinearStream &commandStream,
LinearStream *commandStream,
size_t commandStreamStart,
bool &blocking,
const EnqueueProperties &enqueueProperties,
@ -973,7 +973,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
}
completionStamp = getGpgpuCommandStreamReceiver().flushTask(
commandStream,
*commandStream,
commandStreamStart,
getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
@ -1061,15 +1061,19 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, true);
csrDeps.push_back(&timestampPacketDependencies.previousEnqueueNodes);
auto &commandStream = *obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0);
auto commandStreamStart = commandStream.getUsed();
LinearStream *gpgpuCommandStream = {};
size_t gpgpuCommandStreamStart = {};
if (isGpgpuSubmissionForBcsRequired(blockQueue)) {
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0);
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
}
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
eventsRequest, commandStream, cmdType, blockQueue));
eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
@ -1078,7 +1082,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
if (!blockQueue) {
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, commandStream, commandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel);
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());

View File

@ -7,6 +7,7 @@
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/mocks/mock_os_library.h"
#include "shared/test/unit_test/test_macros/test_checks_shared.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
@ -1350,3 +1351,83 @@ HWTEST_F(CommandQueueHwTest, WhenForcePerDssBackedBufferProgrammingSetThenDispat
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer);
}
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
using ContextFixture::SetUp;
void SetUp() override {
REQUIRE_BLITTER_OR_SKIP(defaultHwInfo.get());
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
DebugManager.flags.EnableTimestampPacket.set(1);
ClDeviceFixture::SetUp();
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
cl_device_id device = pClDevice;
ContextFixture::SetUp(1, &device);
CommandQueueHwFixture::SetUp(pClDevice, 0);
}
void TearDown() override {
CommandQueueHwFixture::TearDown();
ContextFixture::TearDown();
ClDeviceFixture::TearDown();
}
DebugManagerStateRestore state{};
};
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
cl_int retVal = pCmdQ->enqueueCopyBuffer(
srcBuffer.get(),
dstBuffer.get(),
0,
0,
1,
0,
nullptr,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
retVal = pCmdQ->enqueueCopyBuffer(
srcBuffer.get(),
dstBuffer.get(),
0,
0,
1,
0,
nullptr,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
}
HWTEST_F(CommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
size_t offset = 0;
size_t size = 1;
cl_int retVal = pCmdQ->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
const auto ensureCommandBufferAllocationCalledAfterKernel = gpgpuCsr.ensureCommandBufferAllocationCalled;
retVal = pCmdQ->enqueueCopyBuffer(
srcBuffer.get(),
dstBuffer.get(),
0,
0,
1,
0,
nullptr,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
}

View File

@ -51,7 +51,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
TimestampPacketDependencies timestampPacketDependencies;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u);
}
@ -91,7 +91,7 @@ HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEna
EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u);
@ -121,7 +121,7 @@ HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacke
EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u);
@ -198,7 +198,7 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
EventBuilder eventBuilder;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking);
@ -225,7 +225,7 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrot
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
bool blocking = true;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle);
@ -258,13 +258,13 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush);
@ -301,18 +301,18 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO
mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, true);
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer);
mockCsr->nTo1SubmissionModelEnabled = false;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
mockCsr->nTo1SubmissionModelEnabled = true;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies,
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
}

View File

@ -244,6 +244,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return blitterDirectSubmissionAvailable;
}
void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) override {
ensureCommandBufferAllocationCalled++;
BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize);
}
std::atomic<uint32_t> recursiveLockCounter;
bool createPageTableManagerCalled = false;
bool recordFlusheBatchBuffer = false;
@ -264,5 +269,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
bool multiOsContextCapable = false;
bool directSubmissionAvailable = false;
bool blitterDirectSubmissionAvailable = false;
int ensureCommandBufferAllocationCalled = 0;
};
} // namespace NEO

View File

@ -85,7 +85,7 @@ class CommandStreamReceiver {
virtual void processEviction();
void makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation);
void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize);
MOCKABLE_VIRTUAL void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize);
MemoryManager *getMemoryManager() const;

View File

@ -8,12 +8,23 @@
#include "shared/test/unit_test/test_macros/test_checks_shared.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/test/unit_test/helpers/default_hw_info.h"
#include "test.h"
using namespace NEO;
bool NEO::TestChecks::supportsBlitter(const HardwareInfo *pHardwareInfo) {
auto engines = HwHelper::get(::renderCoreFamily).getGpgpuEngineInstances(*pHardwareInfo);
for (const auto &engine : engines) {
if (engine.first == aub_stream::EngineType::ENGINE_BCS) {
return true;
}
}
return false;
}
bool TestChecks::supportsSvm(const HardwareInfo *pHardwareInfo) {
return pHardwareInfo->capabilityTable.ftrSvm;
}
@ -46,4 +57,4 @@ HWTEST2_P(TestMacrosWithParamIfNotMatchTearDownCall, givenNotMatchPlatformWhenUs
}
INSTANTIATE_TEST_CASE_P(givenNotMatchPlatformWhenUseHwTest2PThenSetUpAndTearDownAreNotCalled,
TestMacrosWithParamIfNotMatchTearDownCall,
::testing::Values(0));
::testing::Values(0));

View File

@ -15,6 +15,7 @@ class Device;
struct HardwareInfo;
namespace TestChecks {
bool supportsBlitter(const HardwareInfo *pHardwareInfo);
bool supportsSvm(const HardwareInfo *pHardwareInfo);
bool supportsSvm(const std::unique_ptr<HardwareInfo> &pHardwareInfo);
bool supportsSvm(const Device *pDevice);
@ -26,3 +27,8 @@ bool supportsSvm(const Device *pDevice);
if (NEO::TestChecks::supportsSvm(param) == false) { \
GTEST_SKIP(); \
}
#define REQUIRE_BLITTER_OR_SKIP(param) \
if (NEO::TestChecks::supportsBlitter(param) == false) { \
GTEST_SKIP(); \
}