mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add events support for cache flushes
Related-To: NEO-2536 Change-Id: Iea9e9b08df0225ce5a126ab950621576b3880bbe Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
282b0d49ee
commit
da19e924f5
@ -356,7 +356,12 @@ class CommandQueueHw : public CommandQueue {
|
||||
uint32_t taskLevel);
|
||||
void processDispatchForCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream);
|
||||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps);
|
||||
void submitCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream,
|
||||
uint64_t postSyncAddress);
|
||||
|
||||
bool isCacheFlushCommand(uint32_t commandType) override;
|
||||
|
||||
|
@ -46,9 +46,10 @@ bool CommandQueueHw<Family>::requiresCacheFlushAfterWalkerBasedOnProperties(cons
|
||||
return false;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
void CommandQueueHw<GfxFamily>::submitCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream) {
|
||||
LinearStream *commandStream,
|
||||
uint64_t postSyncAddress) {
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isCacheFlushCommand(uint32_t commandType) {
|
||||
|
@ -202,25 +202,30 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
if (!multiDispatchInfo.empty()) {
|
||||
obtainNewTimestampPacketNodes(estimateTimestampPacketNodesCount(multiDispatchInfo), previousTimestampPacketNodes);
|
||||
csrDeps.push_back(&previousTimestampPacketNodes);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes);
|
||||
csrDeps.push_back(&previousTimestampPacketNodes);
|
||||
}
|
||||
}
|
||||
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
if (eventBuilder.getEvent() && getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
|
||||
previousTimestampPacketNodes, preemption);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream);
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
||||
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType) {
|
||||
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
||||
}
|
||||
if (eventBuilder.getEvent()) {
|
||||
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
if (waitlistEvent->getTimestampPacketNodes()) {
|
||||
@ -383,11 +388,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
}
|
||||
}
|
||||
|
||||
if (event) {
|
||||
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
event->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
if (this->isProfilingEnabled()) {
|
||||
if (event && this->isProfilingEnabled()) {
|
||||
// Get allocation for timestamps
|
||||
hwTimeStamps = event->getHwTimeStampNode();
|
||||
if (this->isPerfCountersEnabled()) {
|
||||
@ -396,7 +397,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
event->copyPerfCounters(this->getPerfCountersConfigData());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (parentKernel) {
|
||||
parentKernel->createReflectionSurface();
|
||||
@ -432,6 +432,24 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
|
||||
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps) {
|
||||
|
||||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(*commandStream, csrDeps);
|
||||
|
||||
uint64_t postSyncAddress = 0;
|
||||
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0);
|
||||
postSyncAddress = TimestampPacketHelper::getGpuAddressForDataWrite(*timestampPacketNodeForPostSync, TimestampPacket::DataIndex::ContextStart);
|
||||
}
|
||||
|
||||
submitCacheFlush(surfaces, numSurfaces, commandStream, postSyncAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
@ -769,6 +787,11 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel) {
|
||||
|
||||
if (timestampPacketContainer) {
|
||||
timestampPacketContainer->makeResident(getCommandStreamReceiver());
|
||||
previousTimestampPacketNodes->makeResident(getCommandStreamReceiver());
|
||||
}
|
||||
|
||||
auto requiresCoherency = false;
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
surface->makeResident(getCommandStreamReceiver());
|
||||
@ -776,7 +799,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags = {};
|
||||
|
||||
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, getCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
}
|
||||
CompletionStamp completionStamp = getCommandStreamReceiver().flushTask(
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
|
@ -7,12 +7,14 @@
|
||||
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/os_interface/os_context.h"
|
||||
#include "test.h"
|
||||
#include "unit_tests/fixtures/enqueue_handler_fixture.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
#include "unit_tests/mocks/mock_graphics_allocation.h"
|
||||
#include "unit_tests/mocks/mock_timestamp_container.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@ -38,7 +40,8 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
|
||||
EventBuilder eventBuilder;
|
||||
Surface *surfaces[] = {surface.get()};
|
||||
auto blocking = true;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, nullptr, eventsRequest, eventBuilder, 0);
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getCommandStreamReceiver().getOsContext().getContextId()), 1u);
|
||||
}
|
||||
HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenUsedSizeEqualZero) {
|
||||
@ -49,4 +52,36 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE
|
||||
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
|
||||
auto mockTagAllocator = new MockTagAllocator<>(pDevice->getMemoryManager());
|
||||
csr.timestampPacketAllocator.reset(mockTagAllocator);
|
||||
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
|
||||
mockCmdQ->commandRequireCacheFlush = true;
|
||||
|
||||
cl_event event;
|
||||
|
||||
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
|
||||
auto node1 = mockCmdQ->timestampPacketContainer->peekNodes().at(0);
|
||||
EXPECT_NE(nullptr, node1);
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteDisabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenTimeStampContainerIsNotCreated) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = false;
|
||||
|
||||
auto mockTagAllocator = new MockTagAllocator<>(pDevice->getMemoryManager());
|
||||
csr.timestampPacketAllocator.reset(mockTagAllocator);
|
||||
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
|
||||
mockCmdQ->commandRequireCacheFlush = true;
|
||||
|
||||
cl_event event;
|
||||
|
||||
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
|
||||
auto container = mockCmdQ->timestampPacketContainer.get();
|
||||
EXPECT_EQ(nullptr, container);
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
#include "unit_tests/mocks/mock_mdi.h"
|
||||
#include "unit_tests/mocks/mock_memory_manager.h"
|
||||
#include "unit_tests/mocks/mock_timestamp_container.h"
|
||||
#include "unit_tests/utilities/base_object_utils.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
@ -35,45 +36,6 @@ struct TimestampPacketSimpleTests : public ::testing::Test {
|
||||
using TimestampPacket::implicitDependenciesCount;
|
||||
};
|
||||
|
||||
template <typename TagType = TimestampPacket>
|
||||
class MockTagAllocator : public TagAllocator<TagType> {
|
||||
public:
|
||||
using BaseClass = TagAllocator<TagType>;
|
||||
using BaseClass::freeTags;
|
||||
using BaseClass::usedTags;
|
||||
using NodeType = typename BaseClass::NodeType;
|
||||
|
||||
MockTagAllocator(MemoryManager *memoryManager, size_t tagCount = 10) : BaseClass(memoryManager, tagCount, 10) {}
|
||||
|
||||
void returnTag(NodeType *node) override {
|
||||
releaseReferenceNodes.push_back(node);
|
||||
BaseClass::returnTag(node);
|
||||
}
|
||||
|
||||
void returnTagToFreePool(NodeType *node) override {
|
||||
returnedToFreePoolNodes.push_back(node);
|
||||
BaseClass::returnTagToFreePool(node);
|
||||
}
|
||||
|
||||
std::vector<NodeType *> releaseReferenceNodes;
|
||||
std::vector<NodeType *> returnedToFreePoolNodes;
|
||||
};
|
||||
|
||||
class MockTimestampPacketContainer : public TimestampPacketContainer {
|
||||
public:
|
||||
using TimestampPacketContainer::timestampPacketNodes;
|
||||
|
||||
MockTimestampPacketContainer(TagAllocator<TimestampPacket> &tagAllocator, size_t numberOfPreallocatedTags) {
|
||||
for (size_t i = 0; i < numberOfPreallocatedTags; i++) {
|
||||
add(tagAllocator.getTag());
|
||||
}
|
||||
}
|
||||
|
||||
TagNode<TimestampPacket> *getNode(size_t position) {
|
||||
return timestampPacketNodes.at(position);
|
||||
}
|
||||
};
|
||||
|
||||
void setTagToReadyState(TagNode<TimestampPacket> *tagNode) {
|
||||
auto &data = tagNode->tagForCpuAccess->data;
|
||||
std::fill(data.begin(), data.end(), 0u);
|
||||
|
@ -70,6 +70,7 @@ set(IGDRCL_SRCS_tests_mocks
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_svm_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_stream.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
|
51
unit_tests/mocks/mock_timestamp_container.h
Normal file
51
unit_tests/mocks/mock_timestamp_container.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename TagType = TimestampPacket>
|
||||
class MockTagAllocator : public TagAllocator<TagType> {
|
||||
public:
|
||||
using BaseClass = TagAllocator<TagType>;
|
||||
using BaseClass::freeTags;
|
||||
using BaseClass::usedTags;
|
||||
using NodeType = typename BaseClass::NodeType;
|
||||
|
||||
MockTagAllocator(MemoryManager *memoryManager, size_t tagCount = 10) : BaseClass(memoryManager, tagCount, 10) {}
|
||||
|
||||
void returnTag(NodeType *node) override {
|
||||
releaseReferenceNodes.push_back(node);
|
||||
BaseClass::returnTag(node);
|
||||
}
|
||||
|
||||
void returnTagToFreePool(NodeType *node) override {
|
||||
returnedToFreePoolNodes.push_back(node);
|
||||
BaseClass::returnTagToFreePool(node);
|
||||
}
|
||||
|
||||
std::vector<NodeType *> releaseReferenceNodes;
|
||||
std::vector<NodeType *> returnedToFreePoolNodes;
|
||||
};
|
||||
|
||||
class MockTimestampPacketContainer : public TimestampPacketContainer {
|
||||
public:
|
||||
using TimestampPacketContainer::timestampPacketNodes;
|
||||
|
||||
MockTimestampPacketContainer(TagAllocator<TimestampPacket> &tagAllocator, size_t numberOfPreallocatedTags) {
|
||||
for (size_t i = 0; i < numberOfPreallocatedTags; i++) {
|
||||
add(tagAllocator.getTag());
|
||||
}
|
||||
}
|
||||
|
||||
TagNode<TimestampPacket> *getNode(size_t position) {
|
||||
return timestampPacketNodes.at(position);
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
@ -859,8 +859,8 @@ HWTEST_F(ProfilingWithPerfCountersTests,
|
||||
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
||||
}
|
||||
|
||||
struct MockTimestampPacketContainer : public TimestampPacketContainer {
|
||||
~MockTimestampPacketContainer() override {
|
||||
struct MockTimestampContainer : public TimestampPacketContainer {
|
||||
~MockTimestampContainer() override {
|
||||
for (const auto &node : timestampPacketNodes) {
|
||||
delete node->tagForCpuAccess;
|
||||
delete node;
|
||||
@ -873,7 +873,7 @@ struct ProfilingTimestampPacketsTest : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.ReturnRawGpuTimestamps.set(true);
|
||||
cmdQ->setProfilingEnabled();
|
||||
ev->timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>();
|
||||
ev->timestampPacketContainer = std::make_unique<MockTimestampContainer>();
|
||||
}
|
||||
|
||||
void addTimestampNode(int contextStart, int contextEnd, int globalStart) {
|
||||
|
Reference in New Issue
Block a user