Fix for Execution model PageFaults

- adding PC with MediaStateClear and MEDIA_VFE_STATE in
EMCleanupSection

Change-Id: I0ee0e121bc2fcc09ac79cb3b601591247326482a
This commit is contained in:
Hoppe, Mateusz
2017-12-20 13:24:19 +01:00
committed by sys_ocldev
parent 03646887bf
commit a9f30a5059
12 changed files with 261 additions and 25 deletions

View File

@@ -24,6 +24,8 @@
namespace OCLRT {
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
#define L3SQC_BIT_LQSC_RO_PERF_DIS 0x08000000
#define L3SQC_REG4 0xB118
@@ -32,4 +34,4 @@ void applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const K
template <typename GfxFamily>
size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
}
} // namespace OCLRT

View File

@@ -25,7 +25,6 @@ namespace OCLRT {
#define CS_GPR_R0 0x2600
#define CS_GPR_R1 0x2608
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
#define ALU_OPCODE_LOAD 0x080
#define ALU_OPCODE_STORE 0x180
#define ALU_OPCODE_OR 0x103

View File

@@ -324,19 +324,23 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
}
if (executionModelKernel && devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
if (executionModelKernel) {
commandStreamReceiver.overrideMediaVFEStateDirty(true);
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
}
}
} else {
auto maxTaskCount = this->taskCount;

View File

@@ -44,6 +44,11 @@ class DeviceQueueHw : public DeviceQueue {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename GfxFamily::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
public:
DeviceQueueHw(Context *context,
@@ -92,6 +97,12 @@ class DeviceQueueHw : public DeviceQueue {
void buildSlbDummyCommands();
void addProfilingEndCmds(uint64_t timestampAddress);
static size_t getProfilingEndCmdsSize();
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
static size_t getMediaStateClearCmdsSize();
static size_t getExecutionModelCleanupSectionSize();
LinearStream slbCS;
IGIL_CommandQueue *igilQueue = nullptr;

View File

@@ -25,6 +25,7 @@
#include "runtime/command_queue/dispatch_walker.h"
#include "runtime/command_queue/dispatch_walker_helper.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/string.h"
#include "runtime/memory_manager/memory_manager.h"
@@ -33,8 +34,10 @@ template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
auto slbSize = getMinimumSlbSize() + getWaCommandsSize();
slbSize *= 128; //num of enqueues
slbSize += sizeof(MI_BATCH_BUFFER_START) +
(4 * MemoryConstants::pageSize); // +4 pages spec restriction
slbSize += sizeof(MI_BATCH_BUFFER_START);
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
slbSize += DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize();
slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
slbBuffer = device->getMemoryManager()->allocateGraphicsMemory(slbSize);
@@ -253,6 +256,8 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
pipeControl2->setAddress(tagAddress & (0xffffffff));
pipeControl2->setImmediateData(taskCount);
addMediaStateClearCmds();
auto pBBE = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_END>();
*pBBE = MI_BATCH_BUFFER_END::sInit();
@@ -404,4 +409,55 @@ void DeviceQueueHw<GfxFamily>::addLriCmd(bool setArbCheck) {
lri->setDataDword(0x0);
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::addMediaStateClearCmds() {
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
addPipeControlCmdWa();
auto pipeControl = slbCS.getSpaceForCmd<PIPE_CONTROL>();
*pipeControl = PIPE_CONTROL::sInit();
pipeControl->setGenericMediaStateClear(true);
pipeControl->setCommandStreamerStallEnable(true);
PreambleHelper<GfxFamily>::programVFEState(&slbCS, device->getHardwareInfo(), 0, 0);
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
// PC with GenreicMediaStateClear + WA PC
size_t size = 2 * sizeof(PIPE_CONTROL);
// VFE state cmds
size += sizeof(PIPE_CONTROL);
size += sizeof(MEDIA_VFE_STATE);
return size;
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize() {
size_t totalSize = 0;
totalSize += sizeof(PIPE_CONTROL) +
2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +
sizeof(PIPE_CONTROL) +
sizeof(MI_MATH) +
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
totalSize += getProfilingEndCmdsSize();
totalSize += getMediaStateClearCmdsSize();
totalSize += 4 * sizeof(PIPE_CONTROL);
totalSize += sizeof(MI_BATCH_BUFFER_END);
return totalSize;
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
size_t size = 0;
size += sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM);
size += sizeof(MI_LOAD_REGISTER_IMM);
return size;
}
} // namespace OCLRT

View File

@@ -858,8 +858,9 @@ bool Wddm::submit(void *commandBuffer, size_t size, void *commandHeader) {
monitoredFence.currentFenceValue++;
}
}
UNRECOVERABLE_IF(!success);
getDeviceState();
UNRECOVERABLE_IF(!success);
return success;
}