mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 17:20:26 +08:00
Fix for Execution model PageFaults
- adding PC with MediaStateClear and MEDIA_VFE_STATE in EMCleanupSection Change-Id: I0ee0e121bc2fcc09ac79cb3b601591247326482a
This commit is contained in:
committed by
sys_ocldev
parent
03646887bf
commit
a9f30a5059
@@ -24,6 +24,8 @@
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
|
||||
|
||||
#define L3SQC_BIT_LQSC_RO_PERF_DIS 0x08000000
|
||||
#define L3SQC_REG4 0xB118
|
||||
|
||||
@@ -32,4 +34,4 @@ void applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const K
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -25,7 +25,6 @@ namespace OCLRT {
|
||||
#define CS_GPR_R0 0x2600
|
||||
#define CS_GPR_R1 0x2608
|
||||
|
||||
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
|
||||
#define ALU_OPCODE_LOAD 0x080
|
||||
#define ALU_OPCODE_STORE 0x180
|
||||
#define ALU_OPCODE_OR 0x103
|
||||
|
||||
@@ -324,19 +324,23 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
||||
}
|
||||
|
||||
if (executionModelKernel && devQueueHw->getSchedulerReturnInstance() > 0) {
|
||||
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
|
||||
if (executionModelKernel) {
|
||||
commandStreamReceiver.overrideMediaVFEStateDirty(true);
|
||||
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
|
||||
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
if (devQueueHw->getSchedulerReturnInstance() > 0) {
|
||||
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
|
||||
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
|
||||
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto maxTaskCount = this->taskCount;
|
||||
|
||||
@@ -44,6 +44,11 @@ class DeviceQueueHw : public DeviceQueue {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
|
||||
|
||||
public:
|
||||
DeviceQueueHw(Context *context,
|
||||
@@ -92,6 +97,12 @@ class DeviceQueueHw : public DeviceQueue {
|
||||
void buildSlbDummyCommands();
|
||||
|
||||
void addProfilingEndCmds(uint64_t timestampAddress);
|
||||
static size_t getProfilingEndCmdsSize();
|
||||
|
||||
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
|
||||
static size_t getMediaStateClearCmdsSize();
|
||||
|
||||
static size_t getExecutionModelCleanupSectionSize();
|
||||
|
||||
LinearStream slbCS;
|
||||
IGIL_CommandQueue *igilQueue = nullptr;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "runtime/command_queue/dispatch_walker.h"
|
||||
#include "runtime/command_queue/dispatch_walker_helper.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
@@ -33,8 +34,10 @@ template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
|
||||
auto slbSize = getMinimumSlbSize() + getWaCommandsSize();
|
||||
slbSize *= 128; //num of enqueues
|
||||
slbSize += sizeof(MI_BATCH_BUFFER_START) +
|
||||
(4 * MemoryConstants::pageSize); // +4 pages spec restriction
|
||||
slbSize += sizeof(MI_BATCH_BUFFER_START);
|
||||
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
||||
slbSize += DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize();
|
||||
slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction
|
||||
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
||||
|
||||
slbBuffer = device->getMemoryManager()->allocateGraphicsMemory(slbSize);
|
||||
@@ -253,6 +256,8 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
|
||||
pipeControl2->setAddress(tagAddress & (0xffffffff));
|
||||
pipeControl2->setImmediateData(taskCount);
|
||||
|
||||
addMediaStateClearCmds();
|
||||
|
||||
auto pBBE = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*pBBE = MI_BATCH_BUFFER_END::sInit();
|
||||
|
||||
@@ -404,4 +409,55 @@ void DeviceQueueHw<GfxFamily>::addLriCmd(bool setArbCheck) {
|
||||
lri->setDataDword(0x0);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addMediaStateClearCmds() {
|
||||
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
|
||||
|
||||
addPipeControlCmdWa();
|
||||
|
||||
auto pipeControl = slbCS.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pipeControl = PIPE_CONTROL::sInit();
|
||||
pipeControl->setGenericMediaStateClear(true);
|
||||
pipeControl->setCommandStreamerStallEnable(true);
|
||||
|
||||
PreambleHelper<GfxFamily>::programVFEState(&slbCS, device->getHardwareInfo(), 0, 0);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
|
||||
// PC with GenreicMediaStateClear + WA PC
|
||||
size_t size = 2 * sizeof(PIPE_CONTROL);
|
||||
|
||||
// VFE state cmds
|
||||
size += sizeof(PIPE_CONTROL);
|
||||
size += sizeof(MEDIA_VFE_STATE);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize() {
|
||||
size_t totalSize = 0;
|
||||
totalSize += sizeof(PIPE_CONTROL) +
|
||||
2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_MATH) +
|
||||
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
|
||||
|
||||
totalSize += getProfilingEndCmdsSize();
|
||||
totalSize += getMediaStateClearCmdsSize();
|
||||
|
||||
totalSize += 4 * sizeof(PIPE_CONTROL);
|
||||
totalSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
|
||||
size_t size = 0;
|
||||
size += sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM);
|
||||
size += sizeof(MI_LOAD_REGISTER_IMM);
|
||||
return size;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -858,8 +858,9 @@ bool Wddm::submit(void *commandBuffer, size_t size, void *commandHeader) {
|
||||
monitoredFence.currentFenceValue++;
|
||||
}
|
||||
}
|
||||
UNRECOVERABLE_IF(!success);
|
||||
|
||||
getDeviceState();
|
||||
UNRECOVERABLE_IF(!success);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user