Refactor timestamp wait mechanism

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2021-12-09 11:59:52 +00:00
committed by Compute-Runtime-Automation
parent 9d063f29d2
commit 1f0c58d0bf
24 changed files with 131 additions and 107 deletions

View File

@ -230,28 +230,30 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
return false;
}
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) {
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
WAIT_ENTER()
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
if (!skipWait) {
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
flushStampToWait,
useQuickKmdSleep,
forcePowerSavingMode);
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
flushStampToWait,
useQuickKmdSleep,
forcePowerSavingMode);
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
if (gtpinIsGTPinInitialized()) {
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
}
if (gtpinIsGTPinInitialized()) {
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
}
for (const CopyEngineState &copyEngine : copyEnginesToWait) {
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
for (const CopyEngineState &copyEngine : copyEnginesToWait) {
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
}
}
if (cleanTemporaryAllocationList) {
@ -957,8 +959,10 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co
}
}
bool CommandQueue::isTimestampWaitEnabled() {
auto enabled = false;
bool CommandQueue::isWaitForTimestampsEnabled() {
auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto enabled = CommandQueue::isTimestampWaitEnabled();
enabled &= hwHelper.isTimestampWaitSupported();
switch (DebugManager.flags.EnableTimestampWait.get()) {
case 0:
@ -987,9 +991,10 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
}
}
auto waitedOnTimestamps = waitForTimestamps(taskCount);
TimestampPacketContainer nodesToRelease;
if (deferredTimestampPackets) {
waitForTimestamps(taskCount);
deferredTimestampPackets->swapNodes(nodesToRelease);
}
@ -999,7 +1004,7 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
activeBcsStates.push_back(state);
}
}
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList);
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
if (printfHandler) {
printfHandler->printEnqueueOutput();

View File

@ -204,14 +204,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const;
bool isTimestampWaitEnabled();
virtual void waitForTimestamps(uint32_t taskCount) = 0;
bool isWaitForTimestampsEnabled();
virtual bool waitForTimestamps(uint32_t taskCount) = 0;
MOCKABLE_VIRTUAL bool isQueueBlocked();
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList);
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait);
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true);
this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false);
}
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList);
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) {
@ -240,6 +240,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
size_t minRequiredSize, IndirectHeap *&indirectHeap);
static bool isAssignEngineRoundRobinEnabled();
static bool isTimestampWaitEnabled();
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);

View File

@ -446,7 +446,7 @@ class CommandQueueHw : public CommandQueue {
bool isCacheFlushCommand(uint32_t commandType) const override;
void waitForTimestamps(uint32_t taskCount) override;
bool waitForTimestamps(uint32_t taskCount) override;
MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;

View File

@ -139,12 +139,14 @@ template <typename TSPacketType>
inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container) {
bool waited = false;
for (const auto &timestamp : container->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
while (timestamp->getContextEndValue(i) == 1) {
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
if (container) {
for (const auto &timestamp : container->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
while (timestamp->getContextEndValue(i) == 1) {
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
}
waited = true;
}
waited = true;
}
}
@ -152,20 +154,19 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
}
template <typename Family>
void CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
bool CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
using TSPacketType = typename Family::TimestampPacketType;
bool waited = false;
if (isTimestampWaitEnabled()) {
bool waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get());
if (isWaitForTimestampsEnabled()) {
waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get());
if (isOOQEnabled()) {
waited |= waitForTimestampsWithinContainer<TSPacketType>(deferredTimestampPackets.get());
}
if (waited) {
getGpgpuCommandStreamReceiver().updateTagFromCpu(taskCount);
}
}
return waited;
}
template <typename Family>

View File

@ -12,6 +12,7 @@ endif()
set(RUNTIME_SRCS_DLL_BASE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_dll.cpp
${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp
${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp
${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp
@ -42,7 +43,6 @@ set(RUNTIME_SRCS_DLL_BASE
append_sources_from_properties(RUNTIME_SRCS_DLL_BASE NEO_CORE_SRCS_LINK)
set(RUNTIME_SRCS_DLL_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/linux/command_queue_linux_dll.cpp
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
@ -54,7 +54,6 @@ set(RUNTIME_SRCS_DLL_LINUX
)
set(RUNTIME_SRCS_DLL_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/windows/command_queue_windows_dll.cpp
${NEO_SHARED_DIRECTORY}/dll/windows/options_windows.cpp
${NEO_SHARED_DIRECTORY}/dll/windows/os_interface.cpp
${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp

View File

@ -8,6 +8,7 @@
#include "opencl/source/command_queue/command_queue.h"
namespace NEO {
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
auto assignEngineRoundRobin = false;
@ -17,4 +18,9 @@ bool CommandQueue::isAssignEngineRoundRobinEnabled() {
return assignEngineRoundRobin;
}
bool CommandQueue::isTimestampWaitEnabled() {
return false;
}
} // namespace NEO

View File

@ -1,20 +0,0 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/command_queue.h"
namespace NEO {
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
auto assignEngineRoundRobin = false;
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
}
return assignEngineRoundRobin;
}
} // namespace NEO

View File

@ -100,6 +100,21 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
}
HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableTimestampPacket.set(0);
DebugManager.flags.EnableTimestampWait.set(4);
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDeviceWithDebuggerActive>(executionEnvironment, 0u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
auto taskCount = device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount();
cmdQ.waitForTimestamps(101u);
EXPECT_EQ(device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount(), taskCount);
}
HWTEST_F(CommandQueueHwTest, WhenDebugSurfaceIsAllocatedThenBufferIsZeroed) {
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary));

View File

@ -140,32 +140,34 @@ TEST(CommandQueue, WhenConstructingCommandQueueThenQueueFamilyIsNotSelected) {
TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenReturnProperValue) {
DebugManagerStateRestore restorer;
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useWaitForTimestamps = true;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
{
DebugManager.flags.EnableTimestampWait.set(0);
EXPECT_FALSE(cmdQ.isTimestampWaitEnabled());
EXPECT_FALSE(cmdQ.isWaitForTimestampsEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(1);
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(2);
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(3);
EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(4);
EXPECT_TRUE(cmdQ.isTimestampWaitEnabled());
EXPECT_TRUE(cmdQ.isWaitForTimestampsEnabled());
}
}
@ -884,7 +886,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
template <typename Family>
struct MyCmdQueue : public CommandQueueHw<Family> {
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
requestedUseQuickKmdSleep = useQuickKmdSleep;
waitUntilCompleteCounter++;
}
@ -977,7 +979,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCommandQueueAndCleanTemporaryAllocationL
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
uint32_t taskCount = 0u;
StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false);
cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, false);
auto cmdStreamPtr = &device->getGpgpuCommandStreamReceiver();

View File

@ -111,9 +111,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
public:
MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
waitUntilCompleteCalled = true;
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
}
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override {

View File

@ -803,9 +803,9 @@ class MyCmdQ : public MockCommandQueueHw<FamilyType> {
auxTranslationDirection);
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
waitCalled++;
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
}
std::vector<AuxTranslationDirection> auxTranslationDirections;

View File

@ -775,18 +775,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
}
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenWaitOnTimestampAndDoNotUpdateTagFromCpu) {
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
auto taskCount = *csr.getTagAddress();
auto latestFlushedTaskCount = csr.peekLatestFlushedTaskCount();
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
@ -795,16 +793,17 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueT
cmdQ->finish();
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), latestFlushedTaskCount);
EXPECT_EQ(*csr.getTagAddress(), taskCount);
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u);
}
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) {
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
@ -819,23 +818,22 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimes
typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2};
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
}
cmdQ->finish();
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u);
EXPECT_EQ(*csr.getTagAddress(), 2u);
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
}
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) {
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
@ -857,9 +855,7 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitO
cmdQ->finish();
const auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u);
EXPECT_EQ(*csr.getTagAddress(), 2u);
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
cmdQ.reset();
}

View File

@ -5,9 +5,12 @@
*
*/
#include "shared/test/common/helpers/ult_hw_config.h"
#include "opencl/source/command_queue/command_queue.h"
namespace NEO {
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
auto assignEngineRoundRobin = false;
@ -17,4 +20,9 @@ bool CommandQueue::isAssignEngineRoundRobinEnabled() {
return assignEngineRoundRobin;
}
bool CommandQueue::isTimestampWaitEnabled() {
return ultHwConfig.useWaitForTimestamps;
}
} // namespace NEO

View File

@ -27,7 +27,7 @@ add_executable(igdrcl_${target_name}
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
${NEO_SOURCE_DIR}/opencl/source/dll/linux/command_queue_linux_dll.cpp
${NEO_SOURCE_DIR}/opencl/source/dll/command_queue_dll.cpp
${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp
${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp
)

View File

@ -803,10 +803,14 @@ TEST(DirectSubmissionControllerTest, whenCheckDirectSubmissionControllerSupportT
EXPECT_TRUE(DirectSubmissionController::isSupported());
}
TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsTrue) {
TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsFalse) {
EXPECT_FALSE(CommandQueue::isAssignEngineRoundRobinEnabled());
}
TEST(CommandQueueTest, whenCheckEngineTimestampWaitEnabledThenReturnsFalse) {
EXPECT_FALSE(CommandQueue::isTimestampWaitEnabled());
}
TEST(CommandQueueTest, givenEnableCmdQRoundRobindEngineAssignSetWhenCheckEngineRoundRobinAssignThenReturnsTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);

View File

@ -90,9 +90,9 @@ class MockCommandQueue : public CommandQueue {
return writeBufferRetValue;
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
@ -202,7 +202,7 @@ class MockCommandQueue : public CommandQueue {
bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; }
void waitForTimestamps(uint32_t taskCount) override{};
bool waitForTimestamps(uint32_t taskCount) override { return false; };
bool releaseIndirectHeapCalled = false;
@ -333,9 +333,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
useBcsCsrOnNotifyEnabled = notifyBcsCsr;
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
}
bool isCacheFlushForBcsRequired() const override {

View File

@ -674,16 +674,6 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
return true;
}
void CommandStreamReceiver::updateTagFromCpu(uint32_t taskCount) {
this->latestFlushedTaskCount.store(taskCount);
auto partitionAddress = getTagAddress();
for (uint32_t i = 0; i < activePartitions; i++) {
*partitionAddress = taskCount;
partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset);
}
}
TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
if (profilingTimeStampAllocator.get() == nullptr) {
std::vector<uint32_t> rootDeviceIndices = {rootDeviceIndex};

View File

@ -225,8 +225,6 @@ class CommandStreamReceiver {
virtual void updateTagFromWait() = 0;
virtual bool isUpdateTagFromWaitEnabled() = 0;
void updateTagFromCpu(uint32_t taskCount);
ScratchSpaceController *getScratchSpaceController() const {
return scratchSpaceController.get();
}

View File

@ -64,6 +64,7 @@ class HwHelper {
static bool compressedImagesSupported(const HardwareInfo &hwInfo);
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
virtual bool timestampPacketWriteSupported() const = 0;
virtual bool isTimestampWaitSupported() const = 0;
virtual size_t getRenderSurfaceStateSize() const = 0;
virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
void *surfaceStateBuffer,
@ -239,6 +240,8 @@ class HwHelperHw : public HwHelper {
bool timestampPacketWriteSupported() const override;
bool isTimestampWaitSupported() const override;
bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override;
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;

View File

@ -40,6 +40,11 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isAssignEngineRoundRobinSupported() const {
return false;

View File

@ -36,6 +36,11 @@ bool HwHelperHw<Family>::isCooperativeDispatchSupported(const EngineGroupType en
return true;
}
template <>
bool HwHelperHw<Family>::isTimestampWaitSupported() const {
return true;
}
template <>
uint32_t HwHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
const HardwareInfo &hwInfo, bool isEngineInstanced) const {

View File

@ -54,6 +54,11 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
return true;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
return false;
}
template <typename GfxFamily>
const EngineInstancesContainer HwHelperHw<GfxFamily>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
auto defaultEngine = getChosenEngineType(hwInfo);

View File

@ -12,6 +12,7 @@ struct UltHwConfig {
bool useHwCsr = false;
bool useMockedPrepareDeviceEnvironmentsFunc = true;
bool forceOsAgnosticMemoryManager = true;
bool useWaitForTimestamps = false;
bool csrFailInitDirectSubmission = false;
bool csrBaseCallDirectSubmissionAvailable = false;

View File

@ -32,7 +32,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
// Ensure that global state is restored
UltHwConfig expectedState{};
static_assert(sizeof(UltHwConfig) == 11 * sizeof(bool), ""); // Ensure that there is no internal padding
static_assert(sizeof(UltHwConfig) == 12 * sizeof(bool), ""); // Ensure that there is no internal padding
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));