Add support for OffsetToSkipSetFFIDGP igc parameter
Related-To: NEO-3829 Change-Id: I18b237bac5301f57bbb26636bec94683c3d250a7 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
parent
a59559e516
commit
b6e62528b6
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/gpgpu_walker_base.inl"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -123,7 +124,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
auto pGpGpuWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
||||
auto isCcsUsed = isCcs(devQueueHw.getDevice().getDefaultEngine().osContext->getEngineType());
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
|
@ -138,7 +139,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|||
preemptionMode,
|
||||
pGpGpuWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/hardware_interface_base.inl"
|
||||
#include "runtime/os_interface/os_context.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -112,6 +113,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, walkerCmd, timestampPacketNode, TimestampPacketStorage::WriteOperationType::AfterWalker, commandQueue.getDevice().getHardwareInfo());
|
||||
}
|
||||
|
||||
auto isCcsUsed = isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -125,7 +128,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
preemptionMode,
|
||||
walkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
|
|
|
@ -31,6 +31,8 @@ set(RUNTIME_SRCS_HELPERS_BASE
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_product.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/engine_control.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/engine_node_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool isCcs(aub_stream::EngineType engineType) {
|
||||
return engineType == aub_stream::ENGINE_CCS;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool isCcs(aub_stream::EngineType engineType);
|
||||
|
||||
} // namespace NEO
|
|
@ -112,7 +112,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
|||
PreemptionMode preemptionMode,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
bool localIdsGenerationByRuntime);
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool isCcsUsed);
|
||||
|
||||
static void programPerThreadData(
|
||||
size_t &sizePerThreadData,
|
||||
|
@ -140,7 +141,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
|||
const KernelInfo &kernelInfo,
|
||||
const bool &localIdsGenerationByRuntime,
|
||||
const bool &kernelUsesLocalIds,
|
||||
Kernel &kernel);
|
||||
Kernel &kernel,
|
||||
bool isCssUsed);
|
||||
|
||||
static size_t getSizeRequiredCS(const Kernel *kernel);
|
||||
static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
|
||||
|
|
|
@ -239,7 +239,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||
PreemptionMode preemptionMode,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
bool localIdsGenerationByRuntime) {
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool isCcsUsed) {
|
||||
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
|
||||
|
@ -252,7 +253,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||
const auto &kernelInfo = kernel.getKernelInfo();
|
||||
auto kernelAllocation = kernelInfo.getGraphicsAllocation();
|
||||
DEBUG_BREAK_IF(!kernelAllocation);
|
||||
setKernelStartOffset(kernelStartOffset, kernelAllocation, kernelInfo, localIdsGenerationByRuntime, kernelUsesLocalIds, kernel);
|
||||
setKernelStartOffset(kernelStartOffset, kernelAllocation, kernelInfo, localIdsGenerationByRuntime,
|
||||
kernelUsesLocalIds, kernel, isCcsUsed);
|
||||
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
|
||||
|
|
|
@ -95,12 +95,21 @@ void HardwareCommandsHelper<GfxFamily>::setKernelStartOffset(
|
|||
const KernelInfo &kernelInfo,
|
||||
const bool &localIdsGenerationByRuntime,
|
||||
const bool &kernelUsesLocalIds,
|
||||
Kernel &kernel) {
|
||||
Kernel &kernel,
|
||||
bool isCssUsed) {
|
||||
|
||||
if (kernelAllocation) {
|
||||
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
}
|
||||
kernelStartOffset += kernel.getStartOffset();
|
||||
|
||||
#ifdef WIN32
|
||||
if ((kernel.getDevice().getHardwareInfo().platform.eProductFamily == IGFX_TIGERLAKE_LP) &&
|
||||
(kernel.getDevice().getHardwareInfo().platform.usRevId == REVISION_A0) &&
|
||||
isCssUsed) {
|
||||
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "runtime/api/api.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/helpers/engine_node_helper.h"
|
||||
#include "runtime/helpers/hardware_commands_helper.h"
|
||||
#include "unit_tests/fixtures/execution_model_kernel_fixture.h"
|
||||
#include "unit_tests/fixtures/hello_world_fixture.h"
|
||||
|
@ -326,6 +327,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
|
|||
IDToffset,
|
||||
sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -339,7 +341,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
// It's okay these are EXPECT_GE as they're only going to be used for
|
||||
// estimation purposes to avoid OOM.
|
||||
|
@ -376,6 +379,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
|||
const size_t localWorkSize = 256;
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -389,7 +393,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
|
@ -419,6 +424,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
|
|||
const size_t localWorkSize = 256;
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -432,7 +438,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
|
||||
|
@ -456,6 +463,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
|||
const size_t localWorkSize = 256;
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -469,7 +477,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
|
@ -528,6 +537,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
|||
modifiedKernelInfo.workgroupDimensionsOrder[2] = 0;
|
||||
MockKernel mockKernel{kernel->getProgram(), modifiedKernelInfo, kernel->getDevice(), false};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -541,7 +551,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize());
|
||||
|
@ -607,6 +618,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
|
|||
// force statefull path for buffers
|
||||
const_cast<KernelInfo &>(kernelInfo).requiresSshForBuffers = true;
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -620,7 +632,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0]));
|
||||
EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1]));
|
||||
|
@ -767,6 +780,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
|
|||
|
||||
// push surfaces states and binding table to given ssh heap
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -780,7 +794,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
bti = reinterpret_cast<typename FamilyType::BINDING_TABLE_STATE *>(reinterpret_cast<unsigned char *>(ssh.getCpuBase()) + localSshOffset + btiOffset);
|
||||
for (uint32_t i = 0; i < numSurfaces; ++i) {
|
||||
|
@ -1005,6 +1020,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
|||
mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData));
|
||||
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
|
@ -1018,7 +1034,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
|||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true);
|
||||
true,
|
||||
isCcsUsed);
|
||||
|
||||
bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0;
|
||||
EXPECT_TRUE(isMemorySame);
|
||||
|
|
Loading…
Reference in New Issue