mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Refactor timestamp wait mechanism
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
9d063f29d2
commit
1f0c58d0bf
@ -230,28 +230,30 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
|
||||
return false;
|
||||
}
|
||||
|
||||
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) {
|
||||
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
|
||||
WAIT_ENTER()
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
|
||||
|
||||
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
|
||||
if (!skipWait) {
|
||||
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
|
||||
|
||||
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
|
||||
flushStampToWait,
|
||||
useQuickKmdSleep,
|
||||
forcePowerSavingMode);
|
||||
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
|
||||
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
|
||||
flushStampToWait,
|
||||
useQuickKmdSleep,
|
||||
forcePowerSavingMode);
|
||||
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
|
||||
}
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
|
||||
}
|
||||
|
||||
for (const CopyEngineState ©Engine : copyEnginesToWait) {
|
||||
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
|
||||
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
|
||||
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
|
||||
for (const CopyEngineState ©Engine : copyEnginesToWait) {
|
||||
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
|
||||
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
|
||||
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
|
||||
}
|
||||
}
|
||||
|
||||
if (cleanTemporaryAllocationList) {
|
||||
@ -957,8 +959,10 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::isTimestampWaitEnabled() {
|
||||
auto enabled = false;
|
||||
bool CommandQueue::isWaitForTimestampsEnabled() {
|
||||
auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto enabled = CommandQueue::isTimestampWaitEnabled();
|
||||
enabled &= hwHelper.isTimestampWaitSupported();
|
||||
|
||||
switch (DebugManager.flags.EnableTimestampWait.get()) {
|
||||
case 0:
|
||||
@ -987,9 +991,10 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
||||
}
|
||||
}
|
||||
|
||||
auto waitedOnTimestamps = waitForTimestamps(taskCount);
|
||||
|
||||
TimestampPacketContainer nodesToRelease;
|
||||
if (deferredTimestampPackets) {
|
||||
waitForTimestamps(taskCount);
|
||||
deferredTimestampPackets->swapNodes(nodesToRelease);
|
||||
}
|
||||
|
||||
@ -999,7 +1004,7 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
||||
activeBcsStates.push_back(state);
|
||||
}
|
||||
}
|
||||
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList);
|
||||
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
|
||||
|
||||
if (printfHandler) {
|
||||
printfHandler->printEnqueueOutput();
|
||||
|
@ -204,14 +204,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const;
|
||||
|
||||
bool isTimestampWaitEnabled();
|
||||
virtual void waitForTimestamps(uint32_t taskCount) = 0;
|
||||
bool isWaitForTimestampsEnabled();
|
||||
virtual bool waitForTimestamps(uint32_t taskCount) = 0;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isQueueBlocked();
|
||||
|
||||
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList);
|
||||
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait);
|
||||
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
|
||||
this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true);
|
||||
this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false);
|
||||
}
|
||||
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList);
|
||||
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) {
|
||||
@ -240,6 +240,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
size_t minRequiredSize, IndirectHeap *&indirectHeap);
|
||||
|
||||
static bool isAssignEngineRoundRobinEnabled();
|
||||
static bool isTimestampWaitEnabled();
|
||||
|
||||
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
||||
|
||||
|
@ -446,7 +446,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
bool isCacheFlushCommand(uint32_t commandType) const override;
|
||||
|
||||
void waitForTimestamps(uint32_t taskCount) override;
|
||||
bool waitForTimestamps(uint32_t taskCount) override;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;
|
||||
|
||||
|
@ -139,12 +139,14 @@ template <typename TSPacketType>
|
||||
inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container) {
|
||||
bool waited = false;
|
||||
|
||||
for (const auto ×tamp : container->peekNodes()) {
|
||||
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
|
||||
while (timestamp->getContextEndValue(i) == 1) {
|
||||
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
|
||||
if (container) {
|
||||
for (const auto ×tamp : container->peekNodes()) {
|
||||
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
|
||||
while (timestamp->getContextEndValue(i) == 1) {
|
||||
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
|
||||
}
|
||||
waited = true;
|
||||
}
|
||||
waited = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,20 +154,19 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
|
||||
bool CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
|
||||
using TSPacketType = typename Family::TimestampPacketType;
|
||||
bool waited = false;
|
||||
|
||||
if (isTimestampWaitEnabled()) {
|
||||
bool waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get());
|
||||
if (isWaitForTimestampsEnabled()) {
|
||||
waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get());
|
||||
|
||||
if (isOOQEnabled()) {
|
||||
waited |= waitForTimestampsWithinContainer<TSPacketType>(deferredTimestampPackets.get());
|
||||
}
|
||||
|
||||
if (waited) {
|
||||
getGpgpuCommandStreamReceiver().updateTagFromCpu(taskCount);
|
||||
}
|
||||
}
|
||||
|
||||
return waited;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
@ -12,6 +12,7 @@ endif()
|
||||
|
||||
set(RUNTIME_SRCS_DLL_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_dll.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp
|
||||
@ -42,7 +43,6 @@ set(RUNTIME_SRCS_DLL_BASE
|
||||
append_sources_from_properties(RUNTIME_SRCS_DLL_BASE NEO_CORE_SRCS_LINK)
|
||||
|
||||
set(RUNTIME_SRCS_DLL_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux/command_queue_linux_dll.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
|
||||
@ -54,7 +54,6 @@ set(RUNTIME_SRCS_DLL_LINUX
|
||||
)
|
||||
|
||||
set(RUNTIME_SRCS_DLL_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/command_queue_windows_dll.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/windows/options_windows.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/windows/os_interface.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||
auto assignEngineRoundRobin = false;
|
||||
|
||||
@ -17,4 +18,9 @@ bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||
|
||||
return assignEngineRoundRobin;
|
||||
}
|
||||
|
||||
bool CommandQueue::isTimestampWaitEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
|
||||
namespace NEO {
|
||||
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||
auto assignEngineRoundRobin = false;
|
||||
|
||||
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
|
||||
assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
|
||||
}
|
||||
|
||||
return assignEngineRoundRobin;
|
||||
}
|
||||
} // namespace NEO
|
@ -100,6 +100,21 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
|
||||
EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableTimestampPacket.set(0);
|
||||
DebugManager.flags.EnableTimestampWait.set(4);
|
||||
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDeviceWithDebuggerActive>(executionEnvironment, 0u));
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
auto taskCount = device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount();
|
||||
|
||||
cmdQ.waitForTimestamps(101u);
|
||||
|
||||
EXPECT_EQ(device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount(), taskCount);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, WhenDebugSurfaceIsAllocatedThenBufferIsZeroed) {
|
||||
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
|
||||
executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary));
|
||||
|
@ -140,32 +140,34 @@ TEST(CommandQueue, WhenConstructingCommandQueueThenQueueFamilyIsNotSelected) {
|
||||
|
||||
TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenReturnProperValue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useWaitForTimestamps = true;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableTimestampWait.set(0);
|
||||
EXPECT_FALSE(cmdQ.isTimestampWaitEnabled());
|
||||
EXPECT_FALSE(cmdQ.isWaitForTimestampsEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableTimestampWait.set(1);
|
||||
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
|
||||
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableTimestampWait.set(2);
|
||||
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
|
||||
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableTimestampWait.set(3);
|
||||
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
|
||||
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableTimestampWait.set(4);
|
||||
EXPECT_TRUE(cmdQ.isTimestampWaitEnabled());
|
||||
EXPECT_TRUE(cmdQ.isWaitForTimestampsEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
@ -884,7 +886,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
|
||||
template <typename Family>
|
||||
struct MyCmdQueue : public CommandQueueHw<Family> {
|
||||
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
requestedUseQuickKmdSleep = useQuickKmdSleep;
|
||||
waitUntilCompleteCounter++;
|
||||
}
|
||||
@ -977,7 +979,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCommandQueueAndCleanTemporaryAllocationL
|
||||
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
|
||||
uint32_t taskCount = 0u;
|
||||
StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
|
||||
cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false);
|
||||
cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, false);
|
||||
|
||||
auto cmdStreamPtr = &device->getGpgpuCommandStreamReceiver();
|
||||
|
||||
|
@ -111,9 +111,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
|
||||
public:
|
||||
MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
|
||||
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
waitUntilCompleteCalled = true;
|
||||
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
|
||||
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override {
|
||||
|
@ -803,9 +803,9 @@ class MyCmdQ : public MockCommandQueueHw<FamilyType> {
|
||||
auxTranslationDirection);
|
||||
}
|
||||
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
waitCalled++;
|
||||
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
|
||||
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
std::vector<AuxTranslationDirection> auxTranslationDirections;
|
||||
|
@ -775,18 +775,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
|
||||
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenWaitOnTimestampAndDoNotUpdateTagFromCpu) {
|
||||
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.EnableTimestampWait.set(1);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
csr.callBaseWaitForCompletionWithTimeout = false;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
|
||||
|
||||
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
|
||||
auto taskCount = *csr.getTagAddress();
|
||||
auto latestFlushedTaskCount = csr.peekLatestFlushedTaskCount();
|
||||
|
||||
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
||||
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
|
||||
|
||||
@ -795,16 +793,17 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueT
|
||||
|
||||
cmdQ->finish();
|
||||
|
||||
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), latestFlushedTaskCount);
|
||||
EXPECT_EQ(*csr.getTagAddress(), taskCount);
|
||||
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) {
|
||||
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.EnableTimestampWait.set(1);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
csr.callBaseWaitForCompletionWithTimeout = false;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
|
||||
|
||||
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
||||
@ -819,23 +818,22 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimes
|
||||
|
||||
typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2};
|
||||
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
|
||||
deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
|
||||
timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
|
||||
}
|
||||
|
||||
cmdQ->finish();
|
||||
|
||||
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
|
||||
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u);
|
||||
EXPECT_EQ(*csr.getTagAddress(), 2u);
|
||||
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) {
|
||||
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.EnableTimestampWait.set(1);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
csr.callBaseWaitForCompletionWithTimeout = false;
|
||||
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
|
||||
|
||||
@ -857,9 +855,7 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitO
|
||||
|
||||
cmdQ->finish();
|
||||
|
||||
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
|
||||
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u);
|
||||
EXPECT_EQ(*csr.getTagAddress(), 2u);
|
||||
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
|
||||
|
||||
cmdQ.reset();
|
||||
}
|
||||
|
@ -5,9 +5,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||
auto assignEngineRoundRobin = false;
|
||||
|
||||
@ -17,4 +20,9 @@ bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||
|
||||
return assignEngineRoundRobin;
|
||||
}
|
||||
|
||||
bool CommandQueue::isTimestampWaitEnabled() {
|
||||
return ultHwConfig.useWaitForTimestamps;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
@ -27,7 +27,7 @@ add_executable(igdrcl_${target_name}
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
|
||||
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
|
||||
${NEO_SOURCE_DIR}/opencl/source/dll/linux/command_queue_linux_dll.cpp
|
||||
${NEO_SOURCE_DIR}/opencl/source/dll/command_queue_dll.cpp
|
||||
${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp
|
||||
${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp
|
||||
)
|
||||
|
@ -803,10 +803,14 @@ TEST(DirectSubmissionControllerTest, whenCheckDirectSubmissionControllerSupportT
|
||||
EXPECT_TRUE(DirectSubmissionController::isSupported());
|
||||
}
|
||||
|
||||
TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsTrue) {
|
||||
TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsFalse) {
|
||||
EXPECT_FALSE(CommandQueue::isAssignEngineRoundRobinEnabled());
|
||||
}
|
||||
|
||||
TEST(CommandQueueTest, whenCheckEngineTimestampWaitEnabledThenReturnsFalse) {
|
||||
EXPECT_FALSE(CommandQueue::isTimestampWaitEnabled());
|
||||
}
|
||||
|
||||
TEST(CommandQueueTest, givenEnableCmdQRoundRobindEngineAssignSetWhenCheckEngineRoundRobinAssignThenReturnsTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||
|
@ -90,9 +90,9 @@ class MockCommandQueue : public CommandQueue {
|
||||
return writeBufferRetValue;
|
||||
}
|
||||
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
latestTaskCountWaited = gpgpuTaskCountToWait;
|
||||
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
|
||||
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
|
||||
@ -202,7 +202,7 @@ class MockCommandQueue : public CommandQueue {
|
||||
|
||||
bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; }
|
||||
|
||||
void waitForTimestamps(uint32_t taskCount) override{};
|
||||
bool waitForTimestamps(uint32_t taskCount) override { return false; };
|
||||
|
||||
bool releaseIndirectHeapCalled = false;
|
||||
|
||||
@ -333,9 +333,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
useBcsCsrOnNotifyEnabled = notifyBcsCsr;
|
||||
}
|
||||
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
|
||||
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
latestTaskCountWaited = gpgpuTaskCountToWait;
|
||||
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
|
||||
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
bool isCacheFlushForBcsRequired() const override {
|
||||
|
@ -674,16 +674,6 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
|
||||
return true;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::updateTagFromCpu(uint32_t taskCount) {
|
||||
this->latestFlushedTaskCount.store(taskCount);
|
||||
|
||||
auto partitionAddress = getTagAddress();
|
||||
for (uint32_t i = 0; i < activePartitions; i++) {
|
||||
*partitionAddress = taskCount;
|
||||
partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset);
|
||||
}
|
||||
}
|
||||
|
||||
TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
|
||||
if (profilingTimeStampAllocator.get() == nullptr) {
|
||||
std::vector<uint32_t> rootDeviceIndices = {rootDeviceIndex};
|
||||
|
@ -225,8 +225,6 @@ class CommandStreamReceiver {
|
||||
virtual void updateTagFromWait() = 0;
|
||||
virtual bool isUpdateTagFromWaitEnabled() = 0;
|
||||
|
||||
void updateTagFromCpu(uint32_t taskCount);
|
||||
|
||||
ScratchSpaceController *getScratchSpaceController() const {
|
||||
return scratchSpaceController.get();
|
||||
}
|
||||
|
@ -64,6 +64,7 @@ class HwHelper {
|
||||
static bool compressedImagesSupported(const HardwareInfo &hwInfo);
|
||||
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
|
||||
virtual bool timestampPacketWriteSupported() const = 0;
|
||||
virtual bool isTimestampWaitSupported() const = 0;
|
||||
virtual size_t getRenderSurfaceStateSize() const = 0;
|
||||
virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
@ -239,6 +240,8 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool timestampPacketWriteSupported() const override;
|
||||
|
||||
bool isTimestampWaitSupported() const override;
|
||||
|
||||
bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override;
|
||||
|
||||
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
|
||||
|
@ -40,6 +40,11 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isAssignEngineRoundRobinSupported() const {
|
||||
return false;
|
||||
|
@ -36,6 +36,11 @@ bool HwHelperHw<Family>::isCooperativeDispatchSupported(const EngineGroupType en
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isTimestampWaitSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const HardwareInfo &hwInfo, bool isEngineInstanced) const {
|
||||
|
@ -54,6 +54,11 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const EngineInstancesContainer HwHelperHw<GfxFamily>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
|
||||
auto defaultEngine = getChosenEngineType(hwInfo);
|
||||
|
@ -12,6 +12,7 @@ struct UltHwConfig {
|
||||
bool useHwCsr = false;
|
||||
bool useMockedPrepareDeviceEnvironmentsFunc = true;
|
||||
bool forceOsAgnosticMemoryManager = true;
|
||||
bool useWaitForTimestamps = false;
|
||||
|
||||
bool csrFailInitDirectSubmission = false;
|
||||
bool csrBaseCallDirectSubmissionAvailable = false;
|
||||
|
@ -32,7 +32,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
|
||||
|
||||
// Ensure that global state is restored
|
||||
UltHwConfig expectedState{};
|
||||
static_assert(sizeof(UltHwConfig) == 11 * sizeof(bool), ""); // Ensure that there is no internal padding
|
||||
static_assert(sizeof(UltHwConfig) == 12 * sizeof(bool), ""); // Ensure that there is no internal padding
|
||||
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
||||
|
||||
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));
|
||||
|
Reference in New Issue
Block a user