Multiply wait timeout by task count difference

- Linux specific
- Use only for non-quickSleep requests

Change-Id: I245546f83672d128377e51d92b6c7708a7448f05
This commit is contained in:
Dunajski, Bartosz
2018-03-29 10:41:39 +02:00
committed by sys_ocldev
parent f4af035ab7
commit 8505658cab
8 changed files with 68 additions and 4 deletions

View File

@@ -85,6 +85,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};
virtual int64_t computeTimeoutMultiplier(bool useQuickKmdSleep, uint32_t taskCountToWait) const { return 1u; };
void addPipeControlWA(LinearStream &commandStream, bool flushDC);
void addDcFlushToPipeControl(typename GfxFamily::PIPE_CONTROL *pCmd, bool flushDC);
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);

View File

@@ -568,10 +568,9 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
const auto &kmdNotifyProperties = this->hwInfo.capabilityTable.kmdNotifyProperties;
useQuickKmdSleep |= kmdNotifyProperties.applyQuickKmdSleepForSporadicWait(lastWaitForCompletionTimestamp);
const auto &kmdNotifyDelay = kmdNotifyProperties.selectDelay(useQuickKmdSleep);
int64_t kmdNotifyDelay = kmdNotifyProperties.selectDelay(useQuickKmdSleep) * computeTimeoutMultiplier(useQuickKmdSleep, taskCountToWait);
auto status = waitForCompletionWithTimeout(kmdNotifyProperties.enableKmdNotify && flushStampToWait != 0,
kmdNotifyDelay, taskCountToWait);

View File

@@ -36,6 +36,7 @@ template <typename GfxFamily>
class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
protected:
typedef DeviceCommandStreamReceiver<GfxFamily> BaseClass;
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiver::getTagAddress;
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiver::memoryManager;
using BaseClass::getScratchPatchAddress;
using BaseClass::hwInfo;
@@ -66,6 +67,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
protected:
void makeResident(BufferObject *bo);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags) override;
int64_t computeTimeoutMultiplier(bool useQuickKmdSleep, uint32_t taskCountToWait) const override;
std::vector<BufferObject *> residency;
std::vector<drm_i915_gem_exec_object2> execObjectsStorage;

View File

@@ -190,4 +190,13 @@ inline void DrmCommandStreamReceiver<GfxFamily>::programVFEState(LinearStream &c
currentContextDirtyFlag = false;
}
}
template <typename GfxFamily>
inline int64_t DrmCommandStreamReceiver<GfxFamily>::computeTimeoutMultiplier(bool useQuickKmdSleep, uint32_t taskCountToWait) const {
auto currentHwTag = *getTagAddress();
if (currentHwTag >= taskCountToWait || useQuickKmdSleep) {
return 1u;
}
return static_cast<int64_t>(taskCountToWait - currentHwTag);
}
} // namespace OCLRT

View File

@@ -56,8 +56,8 @@
using namespace OCLRT;
using ::testing::Invoke;
using ::testing::_;
using ::testing::Invoke;
struct UltCommandStreamReceiverTest
: public DeviceFixture,
@@ -1500,6 +1500,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverT
EXPECT_EQ(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverWhenAskedForTimeoutMultiplierThenAlwaysReturnOne) {
UltCommandStreamReceiver<FamilyType> commandStreamReceiver(*platformDevices[0]);
EXPECT_EQ(1u, commandStreamReceiver.computeTimeoutMultiplier(false, 5));
EXPECT_EQ(1u, commandStreamReceiver.computeTimeoutMultiplier(true, 5));
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t GWS = 1;

View File

@@ -26,6 +26,11 @@
#include "unit_tests/mocks/mock_context.h"
#include "test.h"
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
#endif
using namespace OCLRT;
struct KmdNotifyTests : public ::testing::Test {
@@ -50,10 +55,15 @@ struct KmdNotifyTests : public ::testing::Test {
}
template <typename Family>
struct MyCsr : public UltCommandStreamReceiver<Family> {
class MyCsr : public UltCommandStreamReceiver<Family> {
public:
MyCsr(const HardwareInfo &hwInfo) : UltCommandStreamReceiver<Family>(hwInfo) {}
MOCK_METHOD1(waitForFlushStamp, bool(FlushStamp &flushStampToWait));
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
int64_t computeTimeoutMultiplierRetValue = 1u;
protected:
int64_t computeTimeoutMultiplier(bool useQuickKmdSleep, uint32_t taskCountDiff) const override { return computeTimeoutMultiplierRetValue; };
};
HardwareInfo localHwInfo = **platformDevices;
@@ -192,6 +202,19 @@ HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIs
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, true);
}
HWTEST_F(KmdNotifyTests, givenComputeTimeoutMultiplierWhenWaitCalledThenUseNewTimeout) {
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
csr->computeTimeoutMultiplierRetValue = 3;
device->resetCommandStreamReceiver(csr);
auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds *
csr->computeTimeoutMultiplierRetValue;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false);
}
template <typename Family>
struct MyCsrWithTimestampCheck : public UltCommandStreamReceiver<Family> {
MyCsrWithTimestampCheck(const HardwareInfo &hwInfo) : UltCommandStreamReceiver<Family>(hwInfo) {}
@@ -222,3 +245,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWithDisabledSporadicWa
csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false);
EXPECT_EQ(0u, csr->updateLastWaitForCompletionTimestampCalled);
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

View File

@@ -33,6 +33,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using BaseClass = CommandStreamReceiverHw<GfxFamily>;
public:
using BaseClass::computeTimeoutMultiplier;
using BaseClass::dshState;
using BaseClass::hwInfo;
using BaseClass::iohState;

View File

@@ -536,6 +536,24 @@ TEST_F(DrmCommandStreamTest, CheckDrmFreeCloseFailed) {
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
HWTEST_F(DrmCommandStreamTest, givenDrmCsrWhenAskedForTimeoutMultiplierThenReturnCorrectValueDependingOnRequest) {
struct MyDrmCsr : public DrmCommandStreamReceiver<FamilyType> {
using DrmCommandStreamReceiver<FamilyType>::computeTimeoutMultiplier;
using DrmCommandStreamReceiver<FamilyType>::tagAddress;
MyDrmCsr(const HardwareInfo &hwInfoIn) : DrmCommandStreamReceiver<FamilyType>(hwInfoIn, nullptr) {
tagAddress = &hwTag;
};
uint32_t hwTag = 2;
} myDrmCsr(**platformDevices);
uint32_t taskCountToWait = 5;
EXPECT_EQ(taskCountToWait - myDrmCsr.hwTag, myDrmCsr.computeTimeoutMultiplier(false, taskCountToWait));
EXPECT_EQ(1u, myDrmCsr.computeTimeoutMultiplier(true, taskCountToWait));
EXPECT_EQ(1u, myDrmCsr.computeTimeoutMultiplier(true, 1));
}
struct DrmCsrVfeTests : ::testing::Test {
template <typename FamilyType>
struct MyCsr : public DrmCommandStreamReceiver<FamilyType> {