Add patchToken OffsetToSkipSetFFIDGP in device execution
Change-Id: I0634836b787fa371f0b64779732941396a6ba804 Signed-off-by: Pawel Wilma <pawel.wilma@intel.com> Related-To: NEO-3892
This commit is contained in:
parent
5ecb9905c9
commit
ae0cefc834
|
@ -118,5 +118,6 @@ struct WorkaroundTableBase {
|
|||
bool waUntypedBufferCompression = false;
|
||||
bool waAuxTable16KGranular = false;
|
||||
bool waDisableFusedThreadScheduling = false;
|
||||
bool waUseOffsetToSkipSetFFIDGP = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include "runtime/helpers/dispatch_info_builder.h"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
#include "runtime/helpers/enqueue_properties.h"
|
||||
#include "runtime/helpers/hardware_commands_helper.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
|
@ -530,6 +531,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
|
|||
bool &blocking) {
|
||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
|
||||
bool isCcsUsed = isCcs(gpgpuEngine->osContext->getEngineType());
|
||||
|
||||
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
|
||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||
|
@ -538,7 +540,8 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
|
|||
(uint32_t)multiDispatchInfo.size(),
|
||||
getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
|
||||
taskCount,
|
||||
hwTimeStamps);
|
||||
hwTimeStamps,
|
||||
isCcsUsed);
|
||||
|
||||
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
|
||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
||||
|
@ -560,7 +563,8 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
|
|||
preemptionMode,
|
||||
scheduler,
|
||||
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(getGpgpuCommandStreamReceiver());
|
||||
|
||||
|
|
|
@ -142,7 +142,8 @@ class GpgpuWalkerHelper {
|
|||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh);
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed);
|
||||
|
||||
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#pragma once
|
||||
#include "core/helpers/simd_helper.h"
|
||||
#include "runtime/command_queue/gpgpu_walker_base.inl"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -60,7 +59,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh) {
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed) {
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
@ -125,7 +125,6 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
auto pGpGpuWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
auto isCcsUsed = isCcs(devQueueHw.getDevice().getDefaultEngine().osContext->getEngineType());
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/hardware_interface_base.inl"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
#include "runtime/os_interface/os_context.h"
|
||||
|
||||
namespace NEO {
|
||||
|
|
|
@ -147,12 +147,12 @@ void DeviceQueue::initDeviceQueue() {
|
|||
}
|
||||
|
||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel,
|
||||
uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
|
||||
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
||||
uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp, bool isCcsUsed) {
|
||||
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount, isCcsUsed);
|
||||
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount);
|
||||
}
|
||||
|
||||
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -164,7 +164,7 @@ void DeviceQueue::resetDeviceQueue() {
|
|||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,9 +68,9 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp, bool isCcsUsed);
|
||||
|
||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed);
|
||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount);
|
||||
|
||||
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
||||
|
@ -80,7 +80,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||
}
|
||||
|
||||
virtual void resetDeviceQueue();
|
||||
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh);
|
||||
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
|
||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||
|
||||
void acquireEMCriticalSection() {
|
||||
|
|
|
@ -54,11 +54,11 @@ class DeviceQueueHw : public DeviceQueue {
|
|||
|
||||
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
|
||||
|
||||
uint32_t getSchedulerReturnInstance() {
|
||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||
|
@ -86,6 +86,7 @@ class DeviceQueueHw : public DeviceQueue {
|
|||
static size_t getMediaStateClearCmdsSize();
|
||||
|
||||
static size_t getExecutionModelCleanupSectionSize();
|
||||
static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
|
||||
|
||||
LinearStream slbCS;
|
||||
IGIL_CommandQueue *igilQueue = nullptr;
|
||||
|
|
|
@ -179,13 +179,14 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
|
||||
*this,
|
||||
preemptionMode,
|
||||
scheduler,
|
||||
ssh,
|
||||
dsh);
|
||||
dsh,
|
||||
isCcsUsed);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -235,4 +236,17 @@ size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
|
|||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t DeviceQueueHw<GfxFamily>::getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed) {
|
||||
auto blockAllocation = blockInfo->getGraphicsAllocation();
|
||||
DEBUG_BREAK_IF(!blockAllocation);
|
||||
|
||||
auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
|
||||
|
||||
if (blockAllocation && isCcsUsed && device.getHardwareInfo().workaroundTable.waUseOffsetToSkipSetFFIDGP) {
|
||||
blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
return blockKernelStartPointer;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -142,7 +142,7 @@ size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
||||
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) {
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
void *pDSH = dynamicStateHeap.getCpuBase();
|
||||
|
||||
|
@ -174,10 +174,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
|||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
auto blockAllocation = pBlockInfo->getGraphicsAllocation();
|
||||
DEBUG_BREAK_IF(!blockAllocation);
|
||||
|
||||
auto gpuAddress = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
|
||||
auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed);
|
||||
|
||||
auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count;
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount);
|
||||
|
@ -196,8 +193,8 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
|||
const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast<const INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset));
|
||||
|
||||
pIDDestination[blockIndex + i] = *pBlockID;
|
||||
pIDDestination[blockIndex + i].setKernelStartPointerHigh(gpuAddress >> 32);
|
||||
pIDDestination[blockIndex + i].setKernelStartPointer((uint32_t)gpuAddress);
|
||||
pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32);
|
||||
pIDDestination[blockIndex + i].setKernelStartPointer(static_cast<uint32_t>(blockKernelStartPointer));
|
||||
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
|
||||
HardwareCommandsHelper<GfxFamily>::programBarrierEnable(&pIDDestination[blockIndex + i],
|
||||
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,
|
||||
|
|
|
@ -100,6 +100,9 @@ void TGLLP::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) {
|
|||
workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true;
|
||||
workaroundTable->waEnablePreemptionGranularityControlByUMD = true;
|
||||
workaroundTable->waUntypedBufferCompression = true;
|
||||
if (hwInfo->platform.usRevId == REVISION_A0) {
|
||||
workaroundTable->waUseOffsetToSkipSetFFIDGP = true;
|
||||
}
|
||||
};
|
||||
|
||||
const HardwareInfo TGLLP_1x6x16::hwInfo = {
|
||||
|
|
|
@ -103,9 +103,7 @@ void HardwareCommandsHelper<GfxFamily>::setKernelStartOffset(
|
|||
}
|
||||
kernelStartOffset += kernel.getStartOffset();
|
||||
|
||||
if ((kernel.getDevice().getHardwareInfo().platform.eProductFamily == IGFX_TIGERLAKE_LP) &&
|
||||
(kernel.getDevice().getHardwareInfo().platform.usRevId == REVISION_A0) &&
|
||||
isCssUsed) {
|
||||
if (isCssUsed && kernel.getDevice().getHardwareInfo().workaroundTable.waUseOffsetToSkipSetFFIDGP) {
|
||||
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
#include "runtime/helpers/csr_deps.h"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
#include "runtime/helpers/enqueue_properties.h"
|
||||
#include "runtime/helpers/task_information.inl"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
|
@ -126,6 +127,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||
auto devQueue = commandQueue.getContext().getDefaultDeviceQueue();
|
||||
|
||||
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
|
||||
bool isCcsUsed = isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
if (executionModelKernel) {
|
||||
while (!devQueue->isEMCriticalSectionFree())
|
||||
|
@ -158,7 +160,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||
if (executionModelKernel) {
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp);
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
|
||||
|
||||
BuiltIns &builtIns = *this->kernel->getDevice().getExecutionEnvironment()->getBuiltIns();
|
||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
|
||||
|
@ -178,7 +180,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||
scheduler,
|
||||
preemptionMode,
|
||||
ssh,
|
||||
dsh);
|
||||
dsh,
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
|
|
|
@ -541,7 +541,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectState) {
|
|||
auto usedBeforeSSH = ssh->getUsed();
|
||||
auto usedBeforeDSH = dsh->getUsed();
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
|
||||
auto usedAfterSSH = ssh->getUsed();
|
||||
auto usedAfterDSH = dsh->getUsed();
|
||||
|
||||
|
@ -571,7 +571,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
|
|||
|
||||
uint32_t parentCount = 4;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||
|
@ -601,7 +601,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, setupIndirectStateSetsCorre
|
|||
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
|
@ -639,7 +639,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
|
|||
const_cast<const Kernel &>(*pKernel));
|
||||
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount);
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
|
||||
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
|
||||
auto iddStartIndex = parentCount;
|
||||
|
@ -792,3 +792,30 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, getProfilingEndCmdsSi
|
|||
|
||||
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOffsetIsAddedToBlockKernelStartPointer) {
|
||||
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
std::unique_ptr<MockParentKernel> mockParentKernel(MockParentKernel::create(*pContext));
|
||||
KernelInfo *blockInfo = const_cast<KernelInfo *>(mockParentKernel->mockProgram->getBlockKernelInfo(0));
|
||||
blockInfo->createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
|
||||
ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation());
|
||||
const_cast<SPatchThreadPayload *>(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234;
|
||||
const_cast<HardwareInfo &>(device->getHardwareInfo()).workaroundTable.waUseOffsetToSkipSetFFIDGP = true;
|
||||
|
||||
uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
uint64_t offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, true);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
|
||||
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, false);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
|
||||
const_cast<HardwareInfo &>(device->getHardwareInfo()).workaroundTable.waUseOffsetToSkipSetFFIDGP = false;
|
||||
|
||||
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, true);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
|
||||
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(*device, blockInfo, false);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, setupExecutionModelDispatchDo
|
|||
size_t size = 20;
|
||||
IndirectHeap ssh(buffer, size);
|
||||
IndirectHeap dsh(buffer, size);
|
||||
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0);
|
||||
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0, false);
|
||||
|
||||
EXPECT_EQ(0u, ssh.getUsed());
|
||||
|
||||
|
@ -325,6 +325,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, dispatchScheduler) {
|
|||
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
|
||||
LinearStream cmdStream;
|
||||
|
||||
devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr);
|
||||
devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false);
|
||||
delete kernel;
|
||||
}
|
||||
|
|
|
@ -67,7 +67,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
|
|||
pDevice->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
|
||||
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
|
||||
|
@ -188,7 +189,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
|
|||
pDevice->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
|
@ -224,7 +226,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWi
|
|||
device->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
|
|
|
@ -52,18 +52,18 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
|
|||
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
||||
}
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override {
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override {
|
||||
indirectStateSetup = true;
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount, isCcsUsed);
|
||||
}
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override {
|
||||
cleanupSectionAdded = true;
|
||||
timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tagForCpuAccess : nullptr;
|
||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount);
|
||||
}
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override {
|
||||
schedulerDispatched = true;
|
||||
return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh);
|
||||
return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed);
|
||||
}
|
||||
|
||||
uint32_t criticalSectioncheckCounter = 0;
|
||||
|
|
|
@ -14,16 +14,15 @@ using namespace NEO;
|
|||
|
||||
using HardwareCommandsGen12LpTests = ::testing::Test;
|
||||
|
||||
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenTgllpA0WhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
|
||||
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
|
||||
const uint64_t defaultKernelStartOffset = 0;
|
||||
const uint64_t additionalOffsetDueToFfid = 0x1234;
|
||||
SPatchThreadPayload threadPayload{};
|
||||
threadPayload.OffsetToSkipSetFFIDGP = additionalOffsetDueToFfid;
|
||||
auto hwInfo = *platformDevices[0];
|
||||
|
||||
__REVID revIds[] = {REVISION_A0, REVISION_A1};
|
||||
for (auto revId : revIds) {
|
||||
hwInfo.platform.usRevId = revId;
|
||||
for (auto workaround : ::testing::Bool()) {
|
||||
hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP = workaround;
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
MockKernelWithInternals mockKernelWithInternals{*device};
|
||||
mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
@ -33,7 +32,7 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenTgllpA0WhenSettingKernelStartOffs
|
|||
HardwareCommandsHelper<FamilyType>::setKernelStartOffset(kernelStartOffset, false, mockKernelWithInternals.kernelInfo, false,
|
||||
false, *mockKernelWithInternals.mockKernel, isCcsUsed);
|
||||
|
||||
if ((revId == REVISION_A0) && isCcsUsed) {
|
||||
if (workaround && isCcsUsed) {
|
||||
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
|
||||
} else {
|
||||
EXPECT_EQ(defaultKernelStartOffset, kernelStartOffset);
|
||||
|
|
|
@ -98,3 +98,19 @@ TGLLPTEST_F(TgllpHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisa
|
|||
EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption);
|
||||
EXPECT_FALSE(hwInfo.capabilityTable.supportsVme);
|
||||
}
|
||||
|
||||
TGLLPTEST_F(TgllpHwInfo, givenA0SteppingWhenWaTableIsInitializedThenWaUseOffsetToSkipSetFFIDGPIsSet) {
|
||||
HardwareInfo hwInfo;
|
||||
hwInfo.platform.usRevId = REVISION_A0;
|
||||
TGLLP::setupFeatureAndWorkaroundTable(&hwInfo);
|
||||
|
||||
EXPECT_TRUE(hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP);
|
||||
}
|
||||
|
||||
TGLLPTEST_F(TgllpHwInfo, givenA1SteppingWhenWaTableIsInitializedThenWaUseOffsetToSkipSetFFIDGPIsNotSet) {
|
||||
HardwareInfo hwInfo;
|
||||
hwInfo.platform.usRevId = REVISION_A1;
|
||||
TGLLP::setupFeatureAndWorkaroundTable(&hwInfo);
|
||||
|
||||
EXPECT_FALSE(hwInfo.workaroundTable.waUseOffsetToSkipSetFFIDGP);
|
||||
}
|
||||
|
|
|
@ -45,7 +45,8 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
|
|||
pDevice->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
|
|
|
@ -33,6 +33,7 @@ class MockDeviceQueueHw : public DeviceQueueHw<GfxFamily> {
|
|||
using BaseClass::addPipeControlCmdWa;
|
||||
using BaseClass::addProfilingEndCmds;
|
||||
using BaseClass::buildSlbDummyCommands;
|
||||
using BaseClass::getBlockKernelStartPointer;
|
||||
using BaseClass::getCSPrefetchSize;
|
||||
using BaseClass::getExecutionModelCleanupSectionSize;
|
||||
using BaseClass::getMediaStateClearCmdsSize;
|
||||
|
|
Loading…
Reference in New Issue