Add patchToken OffsetToSkipSetFFIDGP in device execution

Change-Id: I0634836b787fa371f0b64779732941396a6ba804
Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
Related-To: NEO-3892
This commit is contained in:
Pawel Wilma 2019-11-13 15:37:52 +01:00 committed by sys_ocldev
parent 5ecb9905c9
commit ae0cefc834
21 changed files with 117 additions and 48 deletions

View File

@ -118,5 +118,6 @@ struct WorkaroundTableBase {
bool waUntypedBufferCompression = false;
bool waAuxTable16KGranular = false;
bool waDisableFusedThreadScheduling = false;
bool waUseOffsetToSkipSetFFIDGP = false;
};
} // namespace NEO

View File

@ -19,6 +19,7 @@
#include "runtime/gtpin/gtpin_notify.h"
#include "runtime/helpers/array_count.h"
#include "runtime/helpers/dispatch_info_builder.h"
#include "runtime/helpers/engine_node_helper.h"
#include "runtime/helpers/enqueue_properties.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "runtime/helpers/options.h"
@ -530,6 +531,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
bool &blocking) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
bool isCcsUsed = isCcs(gpgpuEngine->osContext->getEngineType());
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
@ -538,7 +540,8 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
(uint32_t)multiDispatchInfo.size(),
getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
taskCount,
hwTimeStamps);
hwTimeStamps,
isCcsUsed);
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
@ -560,7 +563,8 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
preemptionMode,
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
isCcsUsed);
scheduler.makeResident(getGpgpuCommandStreamReceiver());

View File

@ -142,7 +142,8 @@ class GpgpuWalkerHelper {
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh);
IndirectHeap *dsh,
bool isCcsUsed);
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);

View File

@ -8,7 +8,6 @@
#pragma once
#include "core/helpers/simd_helper.h"
#include "runtime/command_queue/gpgpu_walker_base.inl"
#include "runtime/helpers/engine_node_helper.h"
namespace NEO {
@ -60,7 +59,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh) {
IndirectHeap *dsh,
bool isCcsUsed) {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
@ -125,7 +125,6 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
// Program the walker. Invokes execution so all state should already be programmed
auto pGpGpuWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
auto isCcsUsed = isCcs(devQueueHw.getDevice().getDefaultEngine().osContext->getEngineType());
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,

View File

@ -7,6 +7,7 @@
#pragma once
#include "runtime/command_queue/hardware_interface_base.inl"
#include "runtime/helpers/engine_node_helper.h"
#include "runtime/os_interface/os_context.h"
namespace NEO {

View File

@ -147,12 +147,12 @@ void DeviceQueue::initDeviceQueue() {
}
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel,
uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp, bool isCcsUsed) {
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount, isCcsUsed);
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount);
}
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
return;
}
@ -164,7 +164,7 @@ void DeviceQueue::resetDeviceQueue() {
return;
}
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
return;
}

View File

@ -68,9 +68,9 @@ class DeviceQueue : public BaseObject<_device_queue> {
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet);
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp, bool isCcsUsed);
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed);
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount);
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
@ -80,7 +80,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
}
virtual void resetDeviceQueue();
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh);
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
void acquireEMCriticalSection() {

View File

@ -54,11 +54,11 @@ class DeviceQueueHw : public DeviceQueue {
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
void resetDeviceQueue() override;
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
uint32_t getSchedulerReturnInstance() {
return igilQueue->m_controls.m_SchedulerEarlyReturn;
@ -86,6 +86,7 @@ class DeviceQueueHw : public DeviceQueue {
static size_t getMediaStateClearCmdsSize();
static size_t getExecutionModelCleanupSectionSize();
static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
LinearStream slbCS;
IGIL_CommandQueue *igilQueue = nullptr;

View File

@ -179,13 +179,14 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
*this,
preemptionMode,
scheduler,
ssh,
dsh);
dsh,
isCcsUsed);
return;
}
@ -235,4 +236,17 @@ size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {}
template <typename GfxFamily>
uint64_t DeviceQueueHw<GfxFamily>::getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed) {
auto blockAllocation = blockInfo->getGraphicsAllocation();
DEBUG_BREAK_IF(!blockAllocation);
auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
if (blockAllocation && isCcsUsed && device.getHardwareInfo().workaroundTable.waUseOffsetToSkipSetFFIDGP) {
blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
return blockKernelStartPointer;
}
} // namespace NEO

View File

@ -142,7 +142,7 @@ size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
void *pDSH = dynamicStateHeap.getCpuBase();
@ -174,10 +174,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
auto blockAllocation = pBlockInfo->getGraphicsAllocation();
DEBUG_BREAK_IF(!blockAllocation);
auto gpuAddress = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed);
auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count;
maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount);
@ -196,8 +193,8 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast<const INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset));
pIDDestination[blockIndex + i] = *pBlockID;
pIDDestination[blockIndex + i].setKernelStartPointerHigh(gpuAddress >> 32);
pIDDestination[blockIndex + i].setKernelStartPointer((uint32_t)gpuAddress);
pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32);
pIDDestination[blockIndex + i].setKernelStartPointer(static_cast<uint32_t>(blockKernelStartPointer));
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
HardwareCommandsHelper<GfxFamily>::programBarrierEnable(&pIDDestination[blockIndex + i],
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,

View File

@ -100,6 +100,9 @@ void TGLLP::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) {
workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true;
workaroundTable->waEnablePreemptionGranularityControlByUMD = true;
workaroundTable->waUntypedBufferCompression = true;
if (hwInfo->platform.usRevId == REVISION_A0) {
workaroundTable->waUseOffsetToSkipSetFFIDGP = true;
}
};
const HardwareInfo TGLLP_1x6x16::hwInfo = {

View File

@ -103,9 +103,7 @@ void HardwareCommandsHelper<GfxFamily>::setKernelStartOffset(
}
kernelStartOffset += kernel.getStartOffset();
if ((kernel.getDevice().getHardwareInfo().platform.eProductFamily == IGFX_TIGERLAKE_LP) &&
(kernel.getDevice().getHardwareInfo().platform.usRevId == REVISION_A0) &&
isCssUsed) {
if (isCssUsed && kernel.getDevice().getHardwareInfo().workaroundTable.waUseOffsetToSkipSetFFIDGP) {
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
}

View File

@ -18,6 +18,7 @@
#include "runtime/device_queue/device_queue.h"
#include "runtime/gtpin/gtpin_notify.h"
#include "runtime/helpers/csr_deps.h"
#include "runtime/helpers/engine_node_helper.h"
#include "runtime/helpers/enqueue_properties.h"
#include "runtime/helpers/task_information.inl"
#include "runtime/mem_obj/mem_obj.h"
@ -126,6 +127,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
auto devQueue = commandQueue.getContext().getDefaultDeviceQueue();
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
bool isCcsUsed = isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
if (executionModelKernel) {
while (!devQueue->isEMCriticalSectionFree())
@ -158,7 +160,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
if (executionModelKernel) {
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp);
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
BuiltIns &builtIns = *this->kernel->getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
@ -178,7 +180,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
scheduler,
preemptionMode,
ssh,
dsh);
dsh,
isCcsUsed);
scheduler.makeResident(commandStreamReceiver);

View File

@ -541,7 +541,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectState) {
auto usedBeforeSSH = ssh->getUsed();
auto usedBeforeDSH = dsh->getUsed();
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
auto usedAfterSSH = ssh->getUsed();
auto usedAfterDSH = dsh->getUsed();
@ -571,7 +571,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
uint32_t parentCount = 4;
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
@ -601,7 +601,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
uint32_t parentCount = 1;
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
@ -639,7 +639,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
const_cast<const Kernel &>(*pKernel));
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
auto iddStartIndex = parentCount;
@ -792,3 +792,30 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, getProfilingEndCmdsSi
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOffsetIsAddedToBlockKernelStartPointer) {
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
std::unique_ptr<MockParentKernel> mockParentKernel(MockParentKernel::create(*pContext));
KernelInfo *blockInfo = const_cast<KernelInfo *>(mockParentKernel->mockProgram->getBlockKernelInfo(0));
blockInfo->createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation());
const_cast<SPatchThreadPayload *>(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234;
const_cast<HardwareInfo &>(device->getHardwareInfo()).workaroundTable.waUseOffsetToSkipSetFFIDGP = true;
uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
uint64_t offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, true);
EXPECT_EQ(expectedOffset, offset);
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, false);
EXPECT_EQ(expectedOffset, offset);
const_cast<HardwareInfo &>(device->getHardwareInfo()).workaroundTable.waUseOffsetToSkipSetFFIDGP = false;
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, true);
EXPECT_EQ(expectedOffset, offset);
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, false);
EXPECT_EQ(expectedOffset, offset);
}

View File

@ -27,7 +27,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, setupExecutionModelDispatchDo
size_t size = 20;
IndirectHeap ssh(buffer, size);
IndirectHeap dsh(buffer, size);
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0);
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0, false);
EXPECT_EQ(0u, ssh.getUsed());
@ -325,6 +325,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, dispatchScheduler) {
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
LinearStream cmdStream;
devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr);
devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false);
delete kernel;
}

View File

@ -67,7 +67,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
@ -188,7 +189,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
@ -224,7 +226,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWi
device->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);

View File

@ -52,18 +52,18 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
}
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override {
indirectStateSetup = true;
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount, isCcsUsed);
}
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override {
cleanupSectionAdded = true;
timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tagForCpuAccess : nullptr;
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount);
}
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override {
schedulerDispatched = true;
return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh);
return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed);
}
uint32_t criticalSectioncheckCounter = 0;

View File

@ -14,16 +14,15 @@ using namespace NEO;
using HardwareCommandsGen12LpTests = ::testing::Test;
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenTgllpA0WhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
const uint64_t defaultKernelStartOffset = 0;
const uint64_t additionalOffsetDueToFfid = 0x1234;
SPatchThreadPayload threadPayload{};
threadPayload.OffsetToSkipSetFFIDGP = additionalOffsetDueToFfid;
auto hwInfo = *platformDevices[0];
__REVID revIds[] = {REVISION_A0, REVISION_A1};
for (auto revId : revIds) {
hwInfo.platform.usRevId = revId;
for (auto workaround : ::testing::Bool()) {
hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP = workaround;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
MockKernelWithInternals mockKernelWithInternals{*device};
mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
@ -33,7 +32,7 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenTgllpA0WhenSettingKernelStartOffs
HardwareCommandsHelper<FamilyType>::setKernelStartOffset(kernelStartOffset, false, mockKernelWithInternals.kernelInfo, false,
false, *mockKernelWithInternals.mockKernel, isCcsUsed);
if ((revId == REVISION_A0) && isCcsUsed) {
if (workaround && isCcsUsed) {
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
} else {
EXPECT_EQ(defaultKernelStartOffset, kernelStartOffset);

View File

@ -98,3 +98,19 @@ TGLLPTEST_F(TgllpHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisa
EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption);
EXPECT_FALSE(hwInfo.capabilityTable.supportsVme);
}
TGLLPTEST_F(TgllpHwInfo, givenA0SteppingWhenWaTableIsInitializedThenWaUseOffsetToSkipSetFFIDGPIsSet) {
HardwareInfo hwInfo;
hwInfo.platform.usRevId = REVISION_A0;
TGLLP::setupFeatureAndWorkaroundTable(&hwInfo);
EXPECT_TRUE(hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP);
}
TGLLPTEST_F(TgllpHwInfo, givenA1SteppingWhenWaTableIsInitializedThenWaUseOffsetToSkipSetFFIDGPIsNotSet) {
HardwareInfo hwInfo;
hwInfo.platform.usRevId = REVISION_A1;
TGLLP::setupFeatureAndWorkaroundTable(&hwInfo);
EXPECT_FALSE(hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP);
}

View File

@ -45,7 +45,8 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);

View File

@ -33,6 +33,7 @@ class MockDeviceQueueHw : public DeviceQueueHw<GfxFamily> {
using BaseClass::addPipeControlCmdWa;
using BaseClass::addProfilingEndCmds;
using BaseClass::buildSlbDummyCommands;
using BaseClass::getBlockKernelStartPointer;
using BaseClass::getCSPrefetchSize;
using BaseClass::getExecutionModelCleanupSectionSize;
using BaseClass::getMediaStateClearCmdsSize;