Fix for Execution model PageFaults

- adding PC with MediaStateClear and MEDIA_VFE_STATE in
EMCleanupSection

Change-Id: I0ee0e121bc2fcc09ac79cb3b601591247326482a
This commit is contained in:
Hoppe, Mateusz 2017-12-20 13:24:19 +01:00 committed by sys_ocldev
parent 03646887bf
commit a9f30a5059
12 changed files with 261 additions and 25 deletions

View File

@ -24,6 +24,8 @@
namespace OCLRT {
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
#define L3SQC_BIT_LQSC_RO_PERF_DIS 0x08000000
#define L3SQC_REG4 0xB118
@ -32,4 +34,4 @@ void applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const K
template <typename GfxFamily>
size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
}
} // namespace OCLRT

View File

@ -25,7 +25,6 @@ namespace OCLRT {
#define CS_GPR_R0 0x2600
#define CS_GPR_R1 0x2608
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
#define ALU_OPCODE_LOAD 0x080
#define ALU_OPCODE_STORE 0x180
#define ALU_OPCODE_OR 0x103

View File

@ -324,7 +324,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
}
if (executionModelKernel && devQueueHw->getSchedulerReturnInstance() > 0) {
if (executionModelKernel) {
commandStreamReceiver.overrideMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
@ -338,6 +341,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
}
}
} else {
auto maxTaskCount = this->taskCount;
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {

View File

@ -44,6 +44,11 @@ class DeviceQueueHw : public DeviceQueue {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename GfxFamily::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
public:
DeviceQueueHw(Context *context,
@ -92,6 +97,12 @@ class DeviceQueueHw : public DeviceQueue {
void buildSlbDummyCommands();
void addProfilingEndCmds(uint64_t timestampAddress);
static size_t getProfilingEndCmdsSize();
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
static size_t getMediaStateClearCmdsSize();
static size_t getExecutionModelCleanupSectionSize();
LinearStream slbCS;
IGIL_CommandQueue *igilQueue = nullptr;

View File

@ -25,6 +25,7 @@
#include "runtime/command_queue/dispatch_walker.h"
#include "runtime/command_queue/dispatch_walker_helper.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/string.h"
#include "runtime/memory_manager/memory_manager.h"
@ -33,8 +34,10 @@ template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
auto slbSize = getMinimumSlbSize() + getWaCommandsSize();
slbSize *= 128; //num of enqueues
slbSize += sizeof(MI_BATCH_BUFFER_START) +
(4 * MemoryConstants::pageSize); // +4 pages spec restriction
slbSize += sizeof(MI_BATCH_BUFFER_START);
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
slbSize += DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize();
slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
slbBuffer = device->getMemoryManager()->allocateGraphicsMemory(slbSize);
@ -253,6 +256,8 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
pipeControl2->setAddress(tagAddress & (0xffffffff));
pipeControl2->setImmediateData(taskCount);
addMediaStateClearCmds();
auto pBBE = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_END>();
*pBBE = MI_BATCH_BUFFER_END::sInit();
@ -404,4 +409,55 @@ void DeviceQueueHw<GfxFamily>::addLriCmd(bool setArbCheck) {
lri->setDataDword(0x0);
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::addMediaStateClearCmds() {
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
addPipeControlCmdWa();
auto pipeControl = slbCS.getSpaceForCmd<PIPE_CONTROL>();
*pipeControl = PIPE_CONTROL::sInit();
pipeControl->setGenericMediaStateClear(true);
pipeControl->setCommandStreamerStallEnable(true);
PreambleHelper<GfxFamily>::programVFEState(&slbCS, device->getHardwareInfo(), 0, 0);
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
// PC with GenreicMediaStateClear + WA PC
size_t size = 2 * sizeof(PIPE_CONTROL);
// VFE state cmds
size += sizeof(PIPE_CONTROL);
size += sizeof(MEDIA_VFE_STATE);
return size;
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize() {
size_t totalSize = 0;
totalSize += sizeof(PIPE_CONTROL) +
2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +
sizeof(PIPE_CONTROL) +
sizeof(MI_MATH) +
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
totalSize += getProfilingEndCmdsSize();
totalSize += getMediaStateClearCmdsSize();
totalSize += 4 * sizeof(PIPE_CONTROL);
totalSize += sizeof(MI_BATCH_BUFFER_END);
return totalSize;
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
size_t size = 0;
size += sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM);
size += sizeof(MI_LOAD_REGISTER_IMM);
return size;
}
} // namespace OCLRT

View File

@ -858,8 +858,9 @@ bool Wddm::submit(void *commandBuffer, size_t size, void *commandHeader) {
monitoredFence.currentFenceValue++;
}
}
UNRECOVERABLE_IF(!success);
getDeviceState();
UNRECOVERABLE_IF(!success);
return success;
}

View File

@ -34,6 +34,8 @@
#include "runtime/command_queue/dispatch_walker_helper.h"
#include "runtime/helpers/kernel_commands.h"
#include <memory>
using namespace OCLRT;
using namespace DeviceHostQueue;
@ -163,6 +165,20 @@ class DeviceQueueSlb : public DeviceQueueHwTest {
}
};
HWTEST_F(DeviceQueueSlb, allocateSlbBufferAllocatesCorrectSize) {
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]));
LinearStream *slbCS = mockDeviceQueueHw->getSlbCS();
size_t expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128;
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
expectedSize += (4 * MemoryConstants::pageSize);
EXPECT_LE(expectedSize, slbCS->getAvailableSpace());
}
HWTEST_F(DeviceQueueSlb, buildSlbAfterReset) {
auto mockDeviceQueueHw =
new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
@ -305,6 +321,13 @@ HWTEST_F(DeviceQueueSlb, cleanupSection) {
size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize);
size_t cleanupSectionOffsetToParse = cleanupSectionOffset;
size_t slbUsed = slbCS->getUsed();
slbUsed = alignUp(slbUsed, MemoryConstants::pageSize);
size_t slbMax = slbCS->getMaxAvailableSpace();
// 4 pages padding expected after cleanup section
EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed);
if (mockParentKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
cleanupSectionOffsetToParse += getSizeForWADisableLSQCROPERFforOCL<FamilyType>(mockParentKernel) / 2;
@ -618,15 +641,127 @@ HWTEST_F(TheSimplestDeviceQueueFixture, resetDeviceQueueSetEarlyReturnValues) {
DebugManager.flags.SchedulerSimulationReturnInstance.set(3);
MockDevice *device = Device::create<MockDevice>(platformDevices[0]);
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
MockDeviceQueueHw<FamilyType> *mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(&context, device, deviceQueueProperties::minimumProperties[0]);
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
mockDeviceQueueHw->resetDeviceQueue();
EXPECT_EQ(3u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturn);
EXPECT_EQ(0u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturnCounter);
delete mockDeviceQueueHw;
delete device;
}
HWTEST_F(TheSimplestDeviceQueueFixture, addMediaStateClearCmds) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
HardwareParse hwParser;
auto *slbCS = mockDeviceQueueHw->getSlbCS();
mockDeviceQueueHw->addMediaStateClearCmds();
hwParser.parseCommands<FamilyType>(*slbCS, 0);
hwParser.findHardwareCommands<FamilyType>();
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
if (mockDeviceQueueHw->pipeControlWa) {
pipeControlItor++;
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
}
PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor;
EXPECT_TRUE(pipeControl->getGenericMediaStateClear());
auto mediaVfeStateItor = find<MEDIA_VFE_STATE *>(pipeControlItor, hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), mediaVfeStateItor);
}
HWTEST_F(TheSimplestDeviceQueueFixture, addExecutionModelCleanupSectionClearsMediaState) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
class MockDeviceQueueWithMediaStateClearRegistering : public MockDeviceQueueHw<FamilyType> {
public:
MockDeviceQueueWithMediaStateClearRegistering(Context *context,
Device *device,
cl_queue_properties &properties) : MockDeviceQueueHw<FamilyType>(context, device, properties) {
}
bool addMediaStateClearCmdsCalled = false;
void addMediaStateClearCmds() override {
addMediaStateClearCmdsCalled = true;
}
};
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
std::unique_ptr<MockDeviceQueueWithMediaStateClearRegistering> mockDeviceQueueHw(new MockDeviceQueueWithMediaStateClearRegistering(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
std::unique_ptr<MockParentKernel> mockParentKernel(MockParentKernel::create(*device));
uint32_t taskCount = 7;
mockDeviceQueueHw->buildSlbDummyCommands();
EXPECT_FALSE(mockDeviceQueueHw->addMediaStateClearCmdsCalled);
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel.get(), nullptr, taskCount);
EXPECT_TRUE(mockDeviceQueueHw->addMediaStateClearCmdsCalled);
}
HWTEST_F(TheSimplestDeviceQueueFixture, getMediaStateClearCmdsSize) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
size_t expectedSize = 2 * sizeof(PIPE_CONTROL) + sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE);
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getMediaStateClearCmdsSize());
}
HWTEST_F(TheSimplestDeviceQueueFixture, getExecutionModelCleanupSectionSize) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
size_t expectedSize = sizeof(PIPE_CONTROL) +
2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +
sizeof(PIPE_CONTROL) +
sizeof(MI_MATH) +
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
expectedSize += MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize();
expectedSize += MockDeviceQueueHw<FamilyType>::getMediaStateClearCmdsSize();
expectedSize += 4 * sizeof(PIPE_CONTROL);
expectedSize += sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize());
}
HWTEST_F(TheSimplestDeviceQueueFixture, getProfilingEndCmdsSize) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(platformDevices[0]));
MockContext context;
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
size_t expectedSize = sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM);
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize());
}

View File

@ -506,3 +506,19 @@ HWTEST_F(ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKer
EXPECT_EQ(1, mockCsr->flushCalledCount);
}
}
HWTEST_F(ParentKernelEnqueueFixture, ParentKernelEnqueueMarksCSRMediaVFEStateDirty) {
if (pDevice->getSupportedClVersion() >= 20) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
int32_t execStamp;
auto mockCsr = new MockCsr<FamilyType>(execStamp);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideMediaVFEStateDirty(false);
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
EXPECT_TRUE(mockCsr->peekMediaVfeStateDirty());
}
}

View File

@ -39,9 +39,11 @@ GEN8TEST_F(Gen8DeviceQueueSlb, expectedAllocationSize) {
sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM);
expectedSize *= 128; //num of enqueues
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START) + (4 * MemoryConstants::pageSize);
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
expectedSize += (4 * MemoryConstants::pageSize);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr);
EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize);

View File

@ -42,7 +42,10 @@ GEN9TEST_F(Gen9DeviceQueueSlb, expectedAllocationSize) {
sizeof(typename FamilyType::PIPE_CONTROL) +
sizeof(typename FamilyType::PIPE_CONTROL);
expectedSize *= 128; //num of enqueues
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START) + (4 * MemoryConstants::pageSize);
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
expectedSize += (4 * MemoryConstants::pageSize);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr);

View File

@ -96,6 +96,7 @@ template <typename GfxFamily>
class MockCsr : public MockCsrBase<GfxFamily> {
public:
using BaseClass = MockCsrBase<GfxFamily>;
using CommandStreamReceiver::mediaVfeStateDirty;
MockCsr() = delete;
MockCsr(const HardwareInfo &hwInfoIn) = delete;
@ -132,6 +133,8 @@ class MockCsr : public MockCsrBase<GfxFamily> {
dispatchFlags);
}
bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; }
bool slmUsedInLastFlushTask = false;
uint32_t lastTaskLevelToFlushTask = 0;
};

View File

@ -51,6 +51,10 @@ class MockDeviceQueueHw : public DeviceQueueHw<GfxFamily> {
using BaseClass::getSlbCS;
using BaseClass::getWaCommandsSize;
using BaseClass::offsetDsh;
using BaseClass::addMediaStateClearCmds;
using BaseClass::getMediaStateClearCmdsSize;
using BaseClass::getProfilingEndCmdsSize;
using BaseClass::getExecutionModelCleanupSectionSize;
bool arbCheckWa;
bool miAtomicWa;