Add events support for cache flushes

Related-To: NEO-2536

Change-Id: Iea9e9b08df0225ce5a126ab950621576b3880bbe
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2019-04-10 12:44:02 +02:00
committed by sys_ocldev
parent 282b0d49ee
commit da19e924f5
8 changed files with 143 additions and 63 deletions

View File

@ -356,7 +356,12 @@ class CommandQueueHw : public CommandQueue {
uint32_t taskLevel);
void processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream);
LinearStream *commandStream,
CsrDependencies &csrDeps);
void submitCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream,
uint64_t postSyncAddress);
bool isCacheFlushCommand(uint32_t commandType) override;

View File

@ -46,9 +46,10 @@ bool CommandQueueHw<Family>::requiresCacheFlushAfterWalkerBasedOnProperties(cons
return false;
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
void CommandQueueHw<GfxFamily>::submitCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream) {
LinearStream *commandStream,
uint64_t postSyncAddress) {
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::isCacheFlushCommand(uint32_t commandType) {

View File

@ -202,25 +202,30 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (!multiDispatchInfo.empty()) {
obtainNewTimestampPacketNodes(estimateTimestampPacketNodesCount(multiDispatchInfo), previousTimestampPacketNodes);
csrDeps.push_back(&previousTimestampPacketNodes);
} else if (isCacheFlushCommand(commandType)) {
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes);
csrDeps.push_back(&previousTimestampPacketNodes);
}
}
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
auto commandStreamStart = commandStream.getUsed();
if (eventBuilder.getEvent() && getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
if (multiDispatchInfo.empty() == false) {
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
previousTimestampPacketNodes, preemption);
} else if (isCacheFlushCommand(commandType)) {
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream);
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (CL_COMMAND_BARRIER == commandType) {
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
}
if (eventBuilder.getEvent()) {
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
if (waitlistEvent->getTimestampPacketNodes()) {
@ -383,11 +388,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
}
}
if (event) {
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
event->addTimestampPacketNodes(*timestampPacketContainer);
}
if (this->isProfilingEnabled()) {
if (event && this->isProfilingEnabled()) {
// Get allocation for timestamps
hwTimeStamps = event->getHwTimeStampNode();
if (this->isPerfCountersEnabled()) {
@ -396,7 +397,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
event->copyPerfCounters(this->getPerfCountersConfigData());
}
}
}
if (parentKernel) {
parentKernel->createReflectionSurface();
@ -432,6 +432,24 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream,
CsrDependencies &csrDeps) {
TimestampPacketHelper::programCsrDependencies<GfxFamily>(*commandStream, csrDeps);
uint64_t postSyncAddress = 0;
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0);
postSyncAddress = TimestampPacketHelper::getGpuAddressForDataWrite(*timestampPacketNodeForPostSync, TimestampPacket::DataIndex::ContextStart);
}
submitCacheFlush(surfaces, numSurfaces, commandStream, postSyncAddress);
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
DeviceQueueHw<GfxFamily> *devQueueHw,
@ -769,6 +787,11 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
EventBuilder &eventBuilder,
uint32_t taskLevel) {
if (timestampPacketContainer) {
timestampPacketContainer->makeResident(getCommandStreamReceiver());
previousTimestampPacketNodes->makeResident(getCommandStreamReceiver());
}
auto requiresCoherency = false;
for (auto surface : CreateRange(surfaces, surfaceCount)) {
surface->makeResident(getCommandStreamReceiver());
@ -776,7 +799,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
}
DispatchFlags dispatchFlags = {};
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
dispatchFlags.csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, getCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
}
CompletionStamp completionStamp = getCommandStreamReceiver().flushTask(
commandStream,
commandStreamStart,

View File

@ -7,12 +7,14 @@
#include "runtime/event/event_builder.h"
#include "runtime/event/user_event.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/memory_manager/surface.h"
#include "runtime/os_interface/os_context.h"
#include "test.h"
#include "unit_tests/fixtures/enqueue_handler_fixture.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_graphics_allocation.h"
#include "unit_tests/mocks/mock_timestamp_container.h"
namespace NEO {
@ -38,7 +40,8 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
EventBuilder eventBuilder;
Surface *surfaces[] = {surface.get()};
auto blocking = true;
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, nullptr, eventsRequest, eventBuilder, 0);
TimestampPacketContainer previousTimestampPacketNodes;
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getCommandStreamReceiver().getOsContext().getContextId()), 1u);
}
HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenUsedSizeEqualZero) {
@ -49,4 +52,36 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), 0u);
}
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
auto mockTagAllocator = new MockTagAllocator<>(pDevice->getMemoryManager());
csr.timestampPacketAllocator.reset(mockTagAllocator);
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
mockCmdQ->commandRequireCacheFlush = true;
cl_event event;
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
auto node1 = mockCmdQ->timestampPacketContainer->peekNodes().at(0);
EXPECT_NE(nullptr, node1);
clReleaseEvent(event);
}
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteDisabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenTimeStampContainerIsNotCreated) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = false;
auto mockTagAllocator = new MockTagAllocator<>(pDevice->getMemoryManager());
csr.timestampPacketAllocator.reset(mockTagAllocator);
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
mockCmdQ->commandRequireCacheFlush = true;
cl_event event;
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
auto container = mockCmdQ->timestampPacketContainer.get();
EXPECT_EQ(nullptr, container);
clReleaseEvent(event);
}
} // namespace NEO

View File

@ -22,6 +22,7 @@
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_mdi.h"
#include "unit_tests/mocks/mock_memory_manager.h"
#include "unit_tests/mocks/mock_timestamp_container.h"
#include "unit_tests/utilities/base_object_utils.h"
#include "gmock/gmock.h"
@ -35,45 +36,6 @@ struct TimestampPacketSimpleTests : public ::testing::Test {
using TimestampPacket::implicitDependenciesCount;
};
template <typename TagType = TimestampPacket>
class MockTagAllocator : public TagAllocator<TagType> {
public:
using BaseClass = TagAllocator<TagType>;
using BaseClass::freeTags;
using BaseClass::usedTags;
using NodeType = typename BaseClass::NodeType;
MockTagAllocator(MemoryManager *memoryManager, size_t tagCount = 10) : BaseClass(memoryManager, tagCount, 10) {}
void returnTag(NodeType *node) override {
releaseReferenceNodes.push_back(node);
BaseClass::returnTag(node);
}
void returnTagToFreePool(NodeType *node) override {
returnedToFreePoolNodes.push_back(node);
BaseClass::returnTagToFreePool(node);
}
std::vector<NodeType *> releaseReferenceNodes;
std::vector<NodeType *> returnedToFreePoolNodes;
};
class MockTimestampPacketContainer : public TimestampPacketContainer {
public:
using TimestampPacketContainer::timestampPacketNodes;
MockTimestampPacketContainer(TagAllocator<TimestampPacket> &tagAllocator, size_t numberOfPreallocatedTags) {
for (size_t i = 0; i < numberOfPreallocatedTags; i++) {
add(tagAllocator.getTag());
}
}
TagNode<TimestampPacket> *getNode(size_t position) {
return timestampPacketNodes.at(position);
}
};
void setTagToReadyState(TagNode<TimestampPacket> *tagNode) {
auto &data = tagNode->tagForCpuAccess->data;
std::fill(data.begin(), data.end(), 0u);

View File

@ -70,6 +70,7 @@ set(IGDRCL_SRCS_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/mock_svm_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_stream.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h
)
if(WIN32)

View File

@ -0,0 +1,51 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/helpers/timestamp_packet.h"
namespace NEO {
template <typename TagType = TimestampPacket>
class MockTagAllocator : public TagAllocator<TagType> {
public:
using BaseClass = TagAllocator<TagType>;
using BaseClass::freeTags;
using BaseClass::usedTags;
using NodeType = typename BaseClass::NodeType;
MockTagAllocator(MemoryManager *memoryManager, size_t tagCount = 10) : BaseClass(memoryManager, tagCount, 10) {}
void returnTag(NodeType *node) override {
releaseReferenceNodes.push_back(node);
BaseClass::returnTag(node);
}
void returnTagToFreePool(NodeType *node) override {
returnedToFreePoolNodes.push_back(node);
BaseClass::returnTagToFreePool(node);
}
std::vector<NodeType *> releaseReferenceNodes;
std::vector<NodeType *> returnedToFreePoolNodes;
};
class MockTimestampPacketContainer : public TimestampPacketContainer {
public:
using TimestampPacketContainer::timestampPacketNodes;
MockTimestampPacketContainer(TagAllocator<TimestampPacket> &tagAllocator, size_t numberOfPreallocatedTags) {
for (size_t i = 0; i < numberOfPreallocatedTags; i++) {
add(tagAllocator.getTag());
}
}
TagNode<TimestampPacket> *getNode(size_t position) {
return timestampPacketNodes.at(position);
}
};
} // namespace NEO

View File

@ -859,8 +859,8 @@ HWTEST_F(ProfilingWithPerfCountersTests,
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
}
struct MockTimestampPacketContainer : public TimestampPacketContainer {
~MockTimestampPacketContainer() override {
struct MockTimestampContainer : public TimestampPacketContainer {
~MockTimestampContainer() override {
for (const auto &node : timestampPacketNodes) {
delete node->tagForCpuAccess;
delete node;
@ -873,7 +873,7 @@ struct ProfilingTimestampPacketsTest : public ::testing::Test {
void SetUp() override {
DebugManager.flags.ReturnRawGpuTimestamps.set(true);
cmdQ->setProfilingEnabled();
ev->timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>();
ev->timestampPacketContainer = std::make_unique<MockTimestampContainer>();
}
void addTimestampNode(int contextStart, int contextEnd, int globalStart) {