Add profiling support for blitter

Resolves: NEO-4121

Change-Id: I29dfcf07d48100c578cbc432fee4d87dfa18e8f4
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2020-04-29 14:06:01 +02:00
parent f4ef256900
commit 02f2f22045
21 changed files with 198 additions and 42 deletions

View File

@ -809,7 +809,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
} }
if (enqueueProperties.blitPropertiesContainer->size() > 0) { if (enqueueProperties.blitPropertiesContainer->size() > 0) {
this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false); this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled());
dispatchFlags.implicitFlush = true; dispatchFlags.implicitFlush = true;
} }
@ -956,9 +956,16 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
surface->makeResident(getGpgpuCommandStreamReceiver()); surface->makeResident(getGpgpuCommandStreamReceiver());
} }
TimeStampData submitTimeStamp;
if (eventBuilder.getEvent() && isProfilingEnabled() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
eventBuilder.getEvent()->getTimestampPacketNodes()->makeResident(getGpgpuCommandStreamReceiver());
}
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) { if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer); UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer);
this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false); this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled());
} }
DispatchFlags dispatchFlags( DispatchFlags dispatchFlags(

View File

@ -165,7 +165,6 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &queueTimeStamp.GPUTimeStamp; src = &queueTimeStamp.GPUTimeStamp;
} }
srcSize = sizeof(cl_ulong); srcSize = sizeof(cl_ulong);
break; break;

View File

@ -204,7 +204,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
*currentTimestampPacketNodes, csrDeps, *currentTimestampPacketNodes, csrDeps,
commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr); commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr);
auto bcsTaskCount = bcsCsr.blitBuffer(kernelOperation->blitPropertiesContainer, false); auto bcsTaskCount = bcsCsr.blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled());
commandQueue.updateBcsTaskCount(bcsTaskCount); commandQueue.updateBcsTaskCount(bcsTaskCount);
} }
@ -290,7 +290,7 @@ void CommandWithoutKernel::dispatchBlitOperation() {
blitProperties.csrDependencies.push_back(&timestampPacketDependencies->barrierNodes); blitProperties.csrDependencies.push_back(&timestampPacketDependencies->barrierNodes);
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
auto bcsTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false); auto bcsTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled());
commandQueue.updateBcsTaskCount(bcsTaskCount); commandQueue.updateBcsTaskCount(bcsTaskCount);
} }

View File

@ -43,7 +43,7 @@ struct BlitEnqueueTests : public ::testing::Test {
BlitPropertiesContainer container; BlitPropertiesContainer container;
container.push_back(blitProperties); container.push_back(blitProperties);
bcsCsr->blitBuffer(container, true); bcsCsr->blitBuffer(container, true, false);
return BlitOperationResult::Success; return BlitOperationResult::Success;
} }

View File

@ -16,6 +16,7 @@
#include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_csr.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h"
@ -53,6 +54,80 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u); EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u);
} }
template <bool enabled>
struct EnqueueHandlerTimestampTest : public EnqueueHandlerTest {
void SetUp() override {
DebugManager.flags.EnableTimestampPacket.set(enabled);
EnqueueHandlerTest::SetUp();
}
void TearDown() override {
EnqueueHandlerTest::TearDown();
}
DebugManagerStateRestore restorer;
};
using EnqueueHandlerTimestampEnabledTest = EnqueueHandlerTimestampTest<true>;
HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEnabledWhenEnqueueCommandWithoutKernelThenSubmitTimeStampIsSet) {
cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
std::unique_ptr<MockCommandQueueHw<FamilyType>> mockCmdQ(new MockCommandQueueHw<FamilyType>(context, pClDevice, properties));
char buffer[64];
std::unique_ptr<MockGraphicsAllocation> allocation(new MockGraphicsAllocation(buffer, sizeof(buffer)));
std::unique_ptr<GeneralSurface> surface(new GeneralSurface(allocation.get()));
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
eventBuilder.create<MockEvent<Event>>(mockCmdQ.get(), CL_COMMAND_USER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady);
auto ev = static_cast<MockEvent<UserEvent> *>(eventBuilder.getEvent());
Surface *surfaces[] = {surface.get()};
auto blocking = true;
TimestampPacketDependencies timestampPacketDependencies;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_NE(ev->submitTimeStamp.GPUTimeStamp, 0u);
delete ev;
}
using EnqueueHandlerTimestampDisabledTest = EnqueueHandlerTimestampTest<false>;
HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacketsDisabledWhenEnqueueCommandWithoutKernelThenSubmitTimeStampIsNotSet) {
cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
std::unique_ptr<MockCommandQueueHw<FamilyType>> mockCmdQ(new MockCommandQueueHw<FamilyType>(context, pClDevice, properties));
char buffer[64];
std::unique_ptr<MockGraphicsAllocation> allocation(new MockGraphicsAllocation(buffer, sizeof(buffer)));
std::unique_ptr<GeneralSurface> surface(new GeneralSurface(allocation.get()));
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
eventBuilder.create<MockEvent<Event>>(mockCmdQ.get(), CL_COMMAND_USER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady);
auto ev = static_cast<MockEvent<UserEvent> *>(eventBuilder.getEvent());
Surface *surfaces[] = {surface.get()};
auto blocking = true;
TimestampPacketDependencies timestampPacketDependencies;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0);
EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
delete ev;
}
HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontRegisterBlitProperties) { HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontRegisterBlitProperties) {
std::unique_ptr<MockCommandQueueHw<FamilyType>> mockCmdQ(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0)); std::unique_ptr<MockCommandQueueHw<FamilyType>> mockCmdQ(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver(); auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver();

View File

@ -1167,7 +1167,7 @@ HWTEST_F(AubCommandStreamReceiverTests, WhenBlitBufferIsCalledThenCounterIsCorre
BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0); BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0);
BlitPropertiesContainer blitPropertiesContainer; BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties); blitPropertiesContainer.push_back(blitProperties);
aubCsr->blitBuffer(blitPropertiesContainer, true); aubCsr->blitBuffer(blitPropertiesContainer, true, false);
EXPECT_EQ(1u, aubCsr->blitBufferCalled); EXPECT_EQ(1u, aubCsr->blitBufferCalled);
} }

View File

@ -331,7 +331,7 @@ struct BcsTests : public CommandStreamReceiverHwTest {
BlitPropertiesContainer container; BlitPropertiesContainer container;
container.push_back(blitProperties); container.push_back(blitProperties);
return bcsCsr->blitBuffer(container, blocking); return bcsCsr->blitBuffer(container, blocking, false);
} }
TimestampPacketContainer timestampPacketContainer; TimestampPacketContainer timestampPacketContainer;
@ -350,8 +350,8 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({alignedBltSize, 1, 1}, csrDependencies, false); auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({alignedBltSize, 1, 1}, csrDependencies, false, false);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({notAlignedBltSize, 1, 1}, csrDependencies, false); auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({notAlignedBltSize, 1, 1}, csrDependencies, false, false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
@ -368,8 +368,8 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAdd
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false); auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false, false);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false); auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false, false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
@ -409,7 +409,7 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa
expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo()); auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo(), false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
} }
@ -437,7 +437,7 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWri
expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo()); auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo(), false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
} }
@ -448,8 +448,8 @@ HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommands
auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite(); auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite();
auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize; auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize;
auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, true); auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, true, false);
auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false, false);
EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite); EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite);
EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite); EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite);
@ -469,7 +469,7 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd
size_t expectedSize = (cmdsSizePerBlit * numberOfBlts) + size_t expectedSize = (cmdsSizePerBlit * numberOfBlts) +
TimestampPacketHelper::getRequiredCmdStreamSize<FamilyType>(csrDependencies); TimestampPacketHelper::getRequiredCmdStreamSize<FamilyType>(csrDependencies);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false, false);
EXPECT_EQ(expectedSize, estimatedSize); EXPECT_EQ(expectedSize, estimatedSize);
} }
@ -972,7 +972,7 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands
blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties1);
blitPropertiesContainer.push_back(blitProperties2); blitPropertiesContainer.push_back(blitProperties2);
csr.blitBuffer(blitPropertiesContainer, true); csr.blitBuffer(blitPropertiesContainer, true, false);
HardwareParse hwParser; HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream); hwParser.parseCommands<FamilyType>(csr.commandStream);
@ -1001,6 +1001,46 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands
EXPECT_EQ(2u, dependenciesFound); EXPECT_EQ(2u, dependenciesFound);
} }
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled,
false, false, false));
auto bcsCsr = std::make_unique<UltCommandStreamReceiver<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex());
bcsCsr->setupContext(*bcsOsContext);
bcsCsr->initializeTagAllocation();
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, {1, 1, 1}, 0, 0, 0, 0);
MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u);
blitProperties.outputTimestampPacket = timestamp.getNode(0);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
bcsCsr->blitBuffer(blitPropertiesContainer, false, true);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto &cmdList = hwParser.cmdList;
auto cmdIterator = find<typename FamilyType::MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
cmdIterator = find<typename FamilyType::MI_STORE_REGISTER_MEM *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
cmdIterator = find<typename FamilyType::MI_STORE_REGISTER_MEM *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
cmdIterator = find<typename FamilyType::MI_STORE_REGISTER_MEM *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
}
HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) { HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true; csr.storeMakeResidentAllocations = true;
@ -1027,7 +1067,7 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations
blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties1);
blitPropertiesContainer.push_back(blitProperties2); blitPropertiesContainer.push_back(blitProperties2);
csr.blitBuffer(blitPropertiesContainer, false); csr.blitBuffer(blitPropertiesContainer, false, false);
EXPECT_TRUE(csr.isMadeResident(buffer1->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(buffer1->getGraphicsAllocation()));
EXPECT_TRUE(csr.isMadeResident(buffer2->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(buffer2->getGraphicsAllocation()));
@ -1070,7 +1110,7 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl
blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties1);
blitPropertiesContainer.push_back(blitProperties2); blitPropertiesContainer.push_back(blitProperties2);
bcsCsr->blitBuffer(blitPropertiesContainer, false); bcsCsr->blitBuffer(blitPropertiesContainer, false, false);
EXPECT_TRUE(bcsCsr->isMadeResident(buffer1->getGraphicsAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(buffer1->getGraphicsAllocation()));
EXPECT_TRUE(bcsCsr->isMadeResident(buffer2->getGraphicsAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(buffer2->getGraphicsAllocation()));

View File

@ -509,7 +509,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override { void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { return taskCount; }; uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override { return taskCount; };
CompletionStamp flushTask( CompletionStamp flushTask(
LinearStream &commandStream, LinearStream &commandStream,

View File

@ -39,9 +39,9 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
return csr; return csr;
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override {
blitBufferCalled++; blitBufferCalled++;
return BaseClass::blitBuffer(blitPropertiesContainer, blocking); return BaseClass::blitBuffer(blitPropertiesContainer, blocking, profilingEnabled);
} }
uint32_t blitBufferCalled = 0; uint32_t blitBufferCalled = 0;

View File

@ -191,9 +191,9 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership(); return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override {
blitBufferCalled++; blitBufferCalled++;
return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitPropertiesContainer, blocking); return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitPropertiesContainer, blocking, profilingEnabled);
} }
bool createPerDssBackedBuffer(Device &device) override { bool createPerDssBackedBuffer(Device &device) override {

View File

@ -689,7 +689,7 @@ struct BcsBufferTests : public ::testing::Test {
BlitPropertiesContainer container; BlitPropertiesContainer container;
container.push_back(blitProperties); container.push_back(blitProperties);
bcsCsr->blitBuffer(container, true); bcsCsr->blitBuffer(container, true, false);
return BlitOperationResult::Success; return BlitOperationResult::Success;
} }

View File

@ -148,6 +148,7 @@ struct EncodeStoreMMIO {
static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
static void encode(LinearStream &csr, uint32_t offset, uint64_t address); static void encode(LinearStream &csr, uint32_t offset, uint64_t address);
static void remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem);
}; };
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -259,6 +259,7 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem; MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem;
cmd.setRegisterAddress(offset); cmd.setRegisterAddress(offset);
cmd.setMemoryAddress(address); cmd.setMemoryAddress(address);
remapOffset(&cmd);
auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>(); auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
*buffer = cmd; *buffer = cmd;
} }

View File

@ -13,4 +13,8 @@ namespace NEO {
template <typename Family> template <typename Family>
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) { void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) {
} }
template <typename Family>
void EncodeStoreMMIO<Family>::remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem) {
}
} // namespace NEO } // namespace NEO

View File

@ -22,4 +22,9 @@ void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t nu
EncodeComputeMode<Family>::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable); EncodeComputeMode<Family>::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable);
} }
template <typename Family>
void EncodeStoreMMIO<Family>::remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem) {
pStoreRegMem->setMmioRemapEnable(true);
}
} // namespace NEO } // namespace NEO

View File

@ -194,7 +194,7 @@ class CommandStreamReceiver {
this->latestSentTaskCount = latestSentTaskCount; this->latestSentTaskCount = latestSentTaskCount;
} }
virtual uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) = 0; virtual uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) = 0;
ScratchSpaceController *getScratchSpaceController() const { ScratchSpaceController *getScratchSpaceController() const {
return scratchSpaceController.get(); return scratchSpaceController.get();

View File

@ -83,7 +83,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW; return CommandStreamReceiverType::CSR_HW;
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override; uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override;
bool isMultiOsContextCapable() const override; bool isMultiOsContextCapable() const override;

View File

@ -854,13 +854,13 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
} }
template <typename GfxFamily> template <typename GfxFamily>
uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) { uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
auto lock = obtainUniqueOwnership(); auto lock = obtainUniqueOwnership();
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer, peekHwInfo())); auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer, peekHwInfo(), profilingEnabled));
auto commandStreamStart = commandStream.getUsed(); auto commandStreamStart = commandStream.getUsed();
auto newTaskCount = taskCount + 1; auto newTaskCount = taskCount + 1;
latestSentTaskCount = newTaskCount; latestSentTaskCount = newTaskCount;
@ -870,11 +870,27 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
for (auto &blitProperties : blitPropertiesContainer) { for (auto &blitProperties : blitPropertiesContainer) {
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices()); TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
if (blitProperties.outputTimestampPacket && profilingEnabled) {
auto timestampContextStartGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextStart);
auto timestampGlobalStartAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalStart);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress);
}
BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]);
if (blitProperties.outputTimestampPacket) { if (blitProperties.outputTimestampPacket) {
auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); if (profilingEnabled) {
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, true, true); auto timestampContextEndGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto timestampGlobalEndAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalEnd);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress);
} else {
auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket);
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, true, true);
}
makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation());
} }

View File

@ -83,8 +83,8 @@ struct BlitCommandsHelper {
static uint64_t getMaxBlitHeight(); static uint64_t getMaxBlitHeight();
static void dispatchPostBlitCommand(LinearStream &linearStream); static void dispatchPostBlitCommand(LinearStream &linearStream);
static size_t estimatePostBlitCommandSize(); static size_t estimatePostBlitCommandSize();
static size_t estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket); static size_t estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled);
static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo, bool profilingEnabled);
static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@ -60,7 +60,7 @@ size_t BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize() {
} }
template <typename GfxFamily> template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) { size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled) {
size_t numberOfBlits = 0; size_t numberOfBlits = 0;
uint64_t width = 1; uint64_t width = 1;
uint64_t height = 1; uint64_t height = 1;
@ -87,17 +87,25 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(Vec3<size_t> copy
const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
size_t timestampCmdSize = 0;
if (updateTimestampPacket) {
if (profilingEnabled) {
timestampCmdSize = 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
} else {
timestampCmdSize = EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
}
}
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) + return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
(cmdsSizePerBlit * numberOfBlits) + (cmdsSizePerBlit * numberOfBlits) + timestampCmdSize;
(EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() * static_cast<size_t>(updateTimestampPacket));
} }
template <typename GfxFamily> template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo) { size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo, bool profilingEnabled) {
size_t size = 0; size_t size = 0;
for (auto &blitProperties : blitPropertiesContainer) { for (auto &blitProperties : blitPropertiesContainer) {
size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies, size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies,
blitProperties.outputTimestampPacket != nullptr); blitProperties.outputTimestampPacket != nullptr, profilingEnabled);
} }
size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo); size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo);
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);

View File

@ -68,7 +68,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override { void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { return taskCount; }; uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override { return taskCount; };
CommandStreamReceiverType getType() override { CommandStreamReceiverType getType() override {
return CommandStreamReceiverType::CSR_HW; return CommandStreamReceiverType::CSR_HW;
@ -156,9 +156,9 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
return completionStamp; return completionStamp;
} }
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override {
if (!skipBlitCalls) { if (!skipBlitCalls) {
return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitPropertiesContainer, blocking); return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitPropertiesContainer, blocking, profilingEnabled);
} }
return taskCount; return taskCount;
} }