Add Preemption WA waModifyVFEStateAfterGPGPUPreemption for WALKER replay issue

Change-Id: I046e7296157b4b527ad65733ea417fbc364aac9c
This commit is contained in:
Zdanowicz, Zbigniew
2018-01-02 12:10:34 +01:00
parent ca45fce7b3
commit 5cfb102359
17 changed files with 481 additions and 49 deletions

View File

@@ -194,6 +194,7 @@ set (RUNTIME_SRCS_COMMAND_STREAM
command_stream/thread_arbitration_policy.h
command_stream/preemption.h
command_stream/preemption.cpp
command_stream/preemption.inl
)
set (RUNTIME_SRCS_COMPILER_INTERFACE

View File

@@ -27,6 +27,7 @@
#include "runtime/command_queue/command_queue.h"
#include "runtime/command_queue/dispatch_walker_helper.h"
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/device/device_info.h"
#include "runtime/device_queue/device_queue_hw.h"
#include "runtime/event/perf_counter.h"
@@ -599,6 +600,8 @@ void dispatchWalker(
}
}
PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(commandStream, commandQueue.getDevice());
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, kernel, true);
@@ -632,6 +635,8 @@ void dispatchWalker(
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, kernel, false);
PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(commandStream, commandQueue.getDevice());
}
// If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled
@@ -836,10 +841,12 @@ struct EnqueueOperation {
//user registers
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
}
Device &device = commandQueue.getDevice();
for (auto &dispatchInfo : multiDispatchInfo) {
auto &kernel = *dispatchInfo.getKernel();
size += sizeof(typename GfxFamily::GPGPU_WALKER);
size += getSizeForWADisableLSQCROPERFforOCL<GfxFamily>(&kernel);
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(device);
}
return size;
}
@@ -847,7 +854,7 @@ struct EnqueueOperation {
static size_t getSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(commandQueue.getDevice());
if (reserveProfilingCmdsSpace) {
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
}

View File

@@ -24,10 +24,27 @@
namespace OCLRT {
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
constexpr int NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4;
#define L3SQC_BIT_LQSC_RO_PERF_DIS 0x08000000
#define L3SQC_REG4 0xB118
constexpr int L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000;
constexpr int L3SQC_REG4 = 0xB118;
constexpr int GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER = 0xFFFFFFFF;
constexpr int GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER = 0x00000000;
constexpr int CS_GPR_R0 = 0x2600;
constexpr int CS_GPR_R1 = 0x2608;
constexpr int ALU_OPCODE_LOAD = 0x080;
constexpr int ALU_OPCODE_STORE = 0x180;
constexpr int ALU_OPCODE_OR = 0x103;
constexpr int ALU_OPCODE_AND = 0x102;
constexpr int ALU_REGISTER_R_0 = 0x0;
constexpr int ALU_REGISTER_R_1 = 0x1;
constexpr int ALU_REGISTER_R_SRCA = 0x20;
constexpr int ALU_REGISTER_R_SRCB = 0x21;
constexpr int ALU_REGISTER_R_ACCU = 0x31;
template <typename GfxFamily>
void applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode);

View File

@@ -20,22 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_queue/dispatch_walker_helper.h"
namespace OCLRT {
#define CS_GPR_R0 0x2600
#define CS_GPR_R1 0x2608
#define ALU_OPCODE_LOAD 0x080
#define ALU_OPCODE_STORE 0x180
#define ALU_OPCODE_OR 0x103
#define ALU_OPCODE_AND 0x102
#define ALU_REGISTER_R_0 0x0
#define ALU_REGISTER_R_1 0x1
#define ALU_REGISTER_R_SRCA 0x20
#define ALU_REGISTER_R_SRCB 0x21
#define ALU_REGISTER_R_ACCU 0x31
// Performs ReadModifyWrite operation on value of a register: Register = Register Operation Mask
template <typename GfxFamily>
void addAluReadModifyWriteRegister(
@@ -108,4 +96,4 @@ void addAluReadModifyWriteRegister(
pCmd5->setPipeControlFlushEnable(true);
pCmd5->setStateCacheInvalidationEnable(true);
}
}
} // namespace OCLRT

View File

@@ -38,7 +38,7 @@ struct EnqueueOperation<GfxFamily, CL_COMMAND_NDRANGE_KERNEL> {
static size_t getSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(commandQueue.getDevice());
if (reserveProfilingCmdsSpace) {
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
}

View File

@@ -43,5 +43,14 @@ class PreemptionHelper {
template <typename GfxFamily>
static size_t getRequiredCsrSize(PreemptionMode preemptionMode);
template <typename GfxFamily>
static size_t getPreemptionWaCsSize(const Device &device);
template <typename GfxFamily>
static void applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device);
template <typename GfxFamily>
static void applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device);
};
} // namespace OCLRT

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/preemption.h"
#include "runtime/device/device.h"
#include "runtime/command_queue/dispatch_walker_helper.h"
namespace OCLRT {
template <typename GfxFamily>
size_t PreemptionHelper::getPreemptionWaCsSize(const Device &device) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t size = 0;
PreemptionMode preemptionMode = device.getPreemptionMode();
if (preemptionMode == PreemptionMode::ThreadGroup ||
preemptionMode == PreemptionMode::MidThread) {
if (device.getWaTable()->waModifyVFEStateAfterGPGPUPreemption) {
size += 2 * sizeof(MI_LOAD_REGISTER_IMM);
}
}
return size;
}
template <typename GfxFamily>
void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
PreemptionMode preemptionMode = device.getPreemptionMode();
if (preemptionMode == PreemptionMode::ThreadGroup ||
preemptionMode == PreemptionMode::MidThread) {
if (device.getWaTable()->waModifyVFEStateAfterGPGPUPreemption) {
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
pCmd->setRegisterOffset(CS_GPR_R0);
pCmd->setDataDword(GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER);
}
}
}
template <typename GfxFamily>
void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
PreemptionMode preemptionMode = device.getPreemptionMode();
if (preemptionMode == PreemptionMode::ThreadGroup ||
preemptionMode == PreemptionMode::MidThread) {
if (device.getWaTable()->waModifyVFEStateAfterGPGPUPreemption) {
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
pCmd->setRegisterOffset(CS_GPR_R0);
pCmd->setDataDword(GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER);
}
}
}
} // namespace OCLRT

View File

@@ -121,7 +121,7 @@ class Device : public BaseObject<_cl_device_id> {
GFXCORE_FAMILY getRenderCoreFamily() const;
PerformanceCounters *getPerformanceCounters() { return performanceCounters.get(); }
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
PreemptionMode getPreemptionMode() { return preemptionMode; }
PreemptionMode getPreemptionMode() const { return preemptionMode; }
MOCKABLE_VIRTUAL const WhitelistedRegisters &getWhitelistedRegisters() { return hwInfo.capabilityTable.whitelistedRegisters; }
std::vector<unsigned int> simultaneousInterops;
std::string deviceExtensions;

View File

@@ -53,6 +53,7 @@ int HwInfoConfigHw<IGFX_BROADWELL>::configureHardwareCustom(HardwareInfo *hwInfo
pWaTable->waDisableLSQCROPERFforOCL = 1;
pWaTable->waReportPerfCountUseGlobalContextID = 1;
pWaTable->waUseVAlign16OnTileXYBpp816 = 1;
pWaTable->waModifyVFEStateAfterGPGPUPreemption = 1;
if (hwInfo->pPlatform->usDeviceID == IBDW_GT3_HALO_MOBL_DEVICE_F0_ID ||
hwInfo->pPlatform->usDeviceID == IBDW_GT3_SERV_DEVICE_F0_ID) {

View File

@@ -21,6 +21,7 @@
*/
#include "runtime/command_stream/preemption.h"
#include "runtime/command_stream/preemption.inl"
namespace OCLRT {
@@ -50,4 +51,7 @@ size_t PreemptionHelper::getRequiredCsrSize<GfxFamily>(PreemptionMode preemption
return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
}
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
} // namespace OCLRT

View File

@@ -87,6 +87,7 @@ int HwInfoConfigHw<IGFX_SKYLAKE>::configureHardwareCustom(HardwareInfo *hwInfo,
}
if ((1 << hwInfo->pPlatform->usRevId) & 0x0fu) {
pWaTable->waDisablePerCtxtPreemptionGranularityControl = 1;
pWaTable->waModifyVFEStateAfterGPGPUPreemption = 1;
}
if (hwInfo->pPlatform->usDeviceID == ISKL_GT3e_ULT_DEVICE_F0_ID_540 ||

View File

@@ -21,6 +21,7 @@
*/
#include "runtime/command_stream/preemption.h"
#include "runtime/command_stream/preemption.inl"
#include "runtime/memory_manager/graphics_allocation.h"
namespace OCLRT {
@@ -68,4 +69,7 @@ size_t PreemptionHelper::getRequiredCsrSize<GfxFamily>(PreemptionMode preemption
return size;
}
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
} // namespace OCLRT

View File

@@ -155,6 +155,7 @@ struct WorkaroundTable {
bool waDisablePerCtxtPreemptionGranularityControl = false;
bool waLLCCachingUnsupported = false;
bool waUseVAlign16OnTileXYBpp816 = false;
bool waModifyVFEStateAfterGPGPUPreemption = false;
};
struct HardwareInfo {

View File

@@ -202,6 +202,7 @@ void Wddm::setupWorkaroundTableFromAdapterInfo(WorkaroundTable *table, ADAPTER_I
COPY_WA(waDisablePerCtxtPreemptionGranularityControl, WaDisablePerCtxtPreemptionGranularityControl);
COPY_WA(waLLCCachingUnsupported, WaLLCCachingUnsupported);
COPY_WA(waUseVAlign16OnTileXYBpp816, WaUseVAlign16OnTileXYBpp816);
COPY_WA(waModifyVFEStateAfterGPGPUPreemption, WaModifyVFEStateAfterGPGPUPreemption);
#undef COPY_WA
}

View File

@@ -183,3 +183,44 @@ GEN8TEST_F(Gen8PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKe
EXPECT_EQ(1, mockCsr->flushCalledCount);
EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode);
}
GEN8TEST_F(Gen8PreemptionTests, getPreemptionWaCsSizeMidBatch) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::MidBatch);
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN8TEST_F(Gen8PreemptionTests, getPreemptionWaCsSizeThreadGroupNoWa) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::ThreadGroup);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN8TEST_F(Gen8PreemptionTests, getPreemptionWaCsSizeThreadGroupWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM);
device->setPreemptionMode(PreemptionMode::ThreadGroup);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN8TEST_F(Gen8PreemptionTests, getPreemptionWaCsSizeMidThreadNoWa) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::MidThread);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN8TEST_F(Gen8PreemptionTests, getPreemptionWaCsSizeMidThreadWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM);
device->setPreemptionMode(PreemptionMode::MidThread);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}

View File

@@ -184,6 +184,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(1u, outHwInfo.pWaTable->waCompressedResourceRequiresConstVA21);
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
ReleaseOutHwInfoStructs();
@@ -192,6 +193,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(0u, outHwInfo.pWaTable->waCompressedResourceRequiresConstVA21);
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
ReleaseOutHwInfoStructs();
@@ -200,6 +202,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(0u, outHwInfo.pWaTable->waCompressedResourceRequiresConstVA21);
EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
}

View File

@@ -111,29 +111,34 @@ GEN9TEST_F(Gen9PreemptionTests, programMidThreadPreemptionLri) {
EXPECT_EQ(minAlignment, gpgpuCsr->getGpgpuCsrBaseAddress());
}
GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramThreadGroup) {
GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramThreadGroupNoWa) {
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
WhitelistedRegisters regs = {};
regs.csChicken1_0x2580 = true;
pDevice->setForceWhitelistedRegs(true, &regs);
const_cast<WorkaroundTable *>(pDevice->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.getMemoryManager()->setForce32BitAllocations(false);
csr.overrideMediaVFEStateDirty(false);
auto csrSurface = csr.getPreemptionCsrAllocation();
EXPECT_EQ(nullptr, csrSurface);
HardwareParse hwParser;
HardwareParse hwCsrParser;
HardwareParse hwCmdQParser;
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwParser.parseCommands<FamilyType>(csr.commandStream);
hwParser.findHardwareCommands<FamilyType>();
auto offset = csr.commandStream.getUsed();
hwCsrParser.parseCommands<FamilyType>(csr.commandStream);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS());
hwCmdQParser.findHardwareCommands<FamilyType>();
auto offsetCsr = csr.commandStream.getUsed();
auto offsetCmdQ = pCmdQ->getCS().getUsed();
bool foundOne = false;
for (auto it : hwParser.lriList) {
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2580u) {
EXPECT_FALSE(foundOne);
@@ -141,18 +146,125 @@ GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithThe
}
}
EXPECT_TRUE(foundOne);
hwCsrParser.cmdList.clear();
hwCsrParser.lriList.clear();
hwParser.cmdList.clear();
hwParser.lriList.clear();
bool foundWaLri = false;
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2600u) {
foundWaLri = true;
}
}
EXPECT_FALSE(foundWaLri);
hwCmdQParser.cmdList.clear();
hwCmdQParser.lriList.clear();
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
hwParser.findHardwareCommands<FamilyType>();
hwCsrParser.parseCommands<FamilyType>(csr.commandStream, offsetCsr);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS(), offsetCmdQ);
hwCmdQParser.findHardwareCommands<FamilyType>();
for (auto it : hwParser.lriList) {
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u);
}
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2600u);
}
}
GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramThreadGroupWa) {
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
WhitelistedRegisters regs = {};
regs.csChicken1_0x2580 = true;
pDevice->setForceWhitelistedRegs(true, &regs);
const_cast<WorkaroundTable *>(pDevice->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.getMemoryManager()->setForce32BitAllocations(false);
csr.overrideMediaVFEStateDirty(false);
auto csrSurface = csr.getPreemptionCsrAllocation();
EXPECT_EQ(nullptr, csrSurface);
HardwareParse hwCsrParser;
HardwareParse hwCmdQParser;
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwCsrParser.parseCommands<FamilyType>(csr.commandStream);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS());
hwCmdQParser.findHardwareCommands<FamilyType>();
auto offsetCsr = csr.commandStream.getUsed();
auto offsetCmdQ = pCmdQ->getCS().getUsed();
bool foundOne = false;
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2580u) {
EXPECT_FALSE(foundOne);
foundOne = true;
}
}
EXPECT_TRUE(foundOne);
hwCsrParser.cmdList.clear();
hwCsrParser.lriList.clear();
int foundWaLri = 0;
int foundWaLriBegin = 0;
int foundWaLriEnd = 0;
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2600u) {
foundWaLri++;
if (cmd->getDataDword() == 0xFFFFFFFF) {
foundWaLriBegin++;
}
if (cmd->getDataDword() == 0x0) {
foundWaLriEnd++;
}
}
}
EXPECT_EQ(2, foundWaLri);
EXPECT_EQ(1, foundWaLriBegin);
EXPECT_EQ(1, foundWaLriEnd);
hwCmdQParser.cmdList.clear();
hwCmdQParser.lriList.clear();
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwCsrParser.parseCommands<FamilyType>(csr.commandStream, offsetCsr);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS(), offsetCmdQ);
hwCmdQParser.findHardwareCommands<FamilyType>();
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u);
}
foundWaLri = 0;
foundWaLriBegin = 0;
foundWaLriEnd = 0;
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2600u) {
foundWaLri++;
if (cmd->getDataDword() == 0xFFFFFFFF) {
foundWaLriBegin++;
}
if (cmd->getDataDword() == 0x0) {
foundWaLriEnd++;
}
}
}
EXPECT_EQ(2, foundWaLri);
EXPECT_EQ(1, foundWaLriBegin);
EXPECT_EQ(1, foundWaLriEnd);
}
GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledThenPassDevicePreemptionModeThreadGroup) {
@@ -198,32 +310,37 @@ GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnq
EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode);
}
GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThread) {
GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThreadNoWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS;
WhitelistedRegisters regs = {};
regs.csChicken1_0x2580 = true;
pDevice->setForceWhitelistedRegs(true, &regs);
const_cast<WorkaroundTable *>(pDevice->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.getMemoryManager()->setForce32BitAllocations(false);
csr.overrideMediaVFEStateDirty(false);
auto csrSurface = csr.getPreemptionCsrAllocation();
ASSERT_NE(nullptr, csrSurface);
HardwareParse hwParser;
HardwareParse hwCsrParser;
HardwareParse hwCmdQParser;
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwParser.parseCommands<FamilyType>(csr.commandStream);
hwParser.findHardwareCommands<FamilyType>();
auto offset = csr.commandStream.getUsed();
hwCsrParser.parseCommands<FamilyType>(csr.commandStream);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS());
hwCmdQParser.findHardwareCommands<FamilyType>();
auto offsetCsr = csr.commandStream.getUsed();
auto offsetCmdQ = pCmdQ->getCS().getUsed();
bool foundOneLri = false;
for (auto it : hwParser.lriList) {
for (auto it : hwCsrParser.lriList) {
auto cmdLri = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
if (cmdLri->getRegisterOffset() == 0x2580u) {
EXPECT_FALSE(foundOneLri);
@@ -231,27 +348,150 @@ GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSa
}
}
EXPECT_TRUE(foundOneLri);
hwParser.findCsrBaseAddress<FamilyType>();
ASSERT_NE(nullptr, hwParser.cmdGpgpuCsrBaseAddress);
auto cmdCsr = genCmdCast<GPGPU_CSR_BASE_ADDRESS *>(hwParser.cmdGpgpuCsrBaseAddress);
bool foundWaLri = false;
for (auto it : hwCmdQParser.lriList) {
auto cmdLri = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
if (cmdLri->getRegisterOffset() == 0x2600u) {
foundWaLri = true;
}
}
EXPECT_FALSE(foundWaLri);
hwCsrParser.findCsrBaseAddress<FamilyType>();
ASSERT_NE(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress);
auto cmdCsr = genCmdCast<GPGPU_CSR_BASE_ADDRESS *>(hwCsrParser.cmdGpgpuCsrBaseAddress);
ASSERT_NE(nullptr, cmdCsr);
EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress());
hwParser.cmdList.clear();
hwParser.lriList.clear();
hwParser.cmdGpgpuCsrBaseAddress = nullptr;
hwCsrParser.cmdList.clear();
hwCsrParser.lriList.clear();
hwCsrParser.cmdGpgpuCsrBaseAddress = nullptr;
hwCmdQParser.cmdList.clear();
hwCmdQParser.lriList.clear();
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
hwParser.findHardwareCommands<FamilyType>();
hwCsrParser.parseCommands<FamilyType>(csr.commandStream, offsetCsr);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(csr.commandStream, offsetCmdQ);
hwCmdQParser.findHardwareCommands<FamilyType>();
for (auto it : hwParser.lriList) {
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u);
}
hwParser.findCsrBaseAddress<FamilyType>();
EXPECT_EQ(nullptr, hwParser.cmdGpgpuCsrBaseAddress);
hwCsrParser.findCsrBaseAddress<FamilyType>();
EXPECT_EQ(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress);
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2600u);
}
}
GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThreadWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS;
WhitelistedRegisters regs = {};
regs.csChicken1_0x2580 = true;
pDevice->setForceWhitelistedRegs(true, &regs);
const_cast<WorkaroundTable *>(pDevice->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.getMemoryManager()->setForce32BitAllocations(false);
csr.overrideMediaVFEStateDirty(false);
auto csrSurface = csr.getPreemptionCsrAllocation();
ASSERT_NE(nullptr, csrSurface);
HardwareParse hwCsrParser;
HardwareParse hwCmdQParser;
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwCsrParser.parseCommands<FamilyType>(csr.commandStream);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS());
hwCmdQParser.findHardwareCommands<FamilyType>();
auto offsetCsr = csr.commandStream.getUsed();
auto offsetCmdQ = pCmdQ->getCS().getUsed();
bool foundOneLri = false;
for (auto it : hwCsrParser.lriList) {
auto cmdLri = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
if (cmdLri->getRegisterOffset() == 0x2580u) {
EXPECT_FALSE(foundOneLri);
foundOneLri = true;
}
}
EXPECT_TRUE(foundOneLri);
int foundWaLri = 0;
int foundWaLriBegin = 0;
int foundWaLriEnd = 0;
for (auto it : hwCmdQParser.lriList) {
auto cmdLri = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
if (cmdLri->getRegisterOffset() == 0x2600u) {
foundWaLri++;
if (cmdLri->getDataDword() == 0xFFFFFFFF) {
foundWaLriBegin++;
}
if (cmdLri->getDataDword() == 0x0) {
foundWaLriEnd++;
}
}
}
EXPECT_EQ(2, foundWaLri);
EXPECT_EQ(1, foundWaLriBegin);
EXPECT_EQ(1, foundWaLriEnd);
hwCsrParser.findCsrBaseAddress<FamilyType>();
ASSERT_NE(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress);
auto cmdCsr = genCmdCast<GPGPU_CSR_BASE_ADDRESS *>(hwCsrParser.cmdGpgpuCsrBaseAddress);
ASSERT_NE(nullptr, cmdCsr);
EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress());
hwCsrParser.cmdList.clear();
hwCsrParser.lriList.clear();
hwCsrParser.cmdGpgpuCsrBaseAddress = nullptr;
hwCmdQParser.cmdList.clear();
hwCmdQParser.lriList.clear();
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
hwCsrParser.parseCommands<FamilyType>(csr.commandStream, offsetCsr);
hwCsrParser.findHardwareCommands<FamilyType>();
hwCmdQParser.parseCommands<FamilyType>(pCmdQ->getCS(), offsetCmdQ);
hwCmdQParser.findHardwareCommands<FamilyType>();
for (auto it : hwCsrParser.lriList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u);
}
hwCsrParser.findCsrBaseAddress<FamilyType>();
EXPECT_EQ(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress);
foundWaLri = 0;
foundWaLriBegin = 0;
foundWaLriEnd = 0;
for (auto it : hwCmdQParser.lriList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
if (cmd->getRegisterOffset() == 0x2600u) {
foundWaLri++;
if (cmd->getDataDword() == 0xFFFFFFFF) {
foundWaLriBegin++;
}
if (cmd->getDataDword() == 0x0) {
foundWaLriEnd++;
}
}
}
EXPECT_EQ(2, foundWaLri);
EXPECT_EQ(1, foundWaLriBegin);
EXPECT_EQ(1, foundWaLriEnd);
}
GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKernelCalledThenPassDisabledPreemptionMode) {
@@ -271,3 +511,44 @@ GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKe
EXPECT_EQ(1, mockCsr->flushCalledCount);
EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode);
}
GEN9TEST_F(Gen9PreemptionTests, getPreemptionWaCsSizeMidBatch) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::MidBatch);
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN9TEST_F(Gen9PreemptionTests, getPreemptionWaCsSizeThreadGroupNoWa) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::ThreadGroup);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN9TEST_F(Gen9PreemptionTests, getPreemptionWaCsSizeThreadGroupWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM);
device->setPreemptionMode(PreemptionMode::ThreadGroup);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN9TEST_F(Gen9PreemptionTests, getPreemptionWaCsSizeMidThreadNoWa) {
size_t expectedSize = 0;
device->setPreemptionMode(PreemptionMode::MidThread);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = false;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}
GEN9TEST_F(Gen9PreemptionTests, getPreemptionWaCsSizeMidThreadWa) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM);
device->setPreemptionMode(PreemptionMode::MidThread);
const_cast<WorkaroundTable *>(device->getWaTable())->waModifyVFEStateAfterGPGPUPreemption = true;
size_t size = PreemptionHelper::getPreemptionWaCsSize<FamilyType>(*device);
EXPECT_EQ(expectedSize, size);
}