Simplify HardwareCommandsHelper

Related-To: NEO-4175

Change-Id: I39b08353514ea0bf384b6b592f24952d0ed631e6
Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2020-01-23 15:52:49 +01:00
committed by sys_ocldev
parent fd8c5ba67f
commit 60c7587c2b
21 changed files with 185 additions and 109 deletions

View File

@@ -72,6 +72,7 @@ class HwHelper {
uint32_t threadsPerEu) = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
@@ -186,7 +187,7 @@ class HwHelperHw : public HwHelper {
static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
static bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo);
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);

View File

@@ -263,7 +263,7 @@ uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarr
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) {
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
return false;
}

View File

@@ -126,12 +126,15 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
auto pGpGpuWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
*dsh,
*ioh,
*ssh,
scheduler,
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
@@ -139,8 +142,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
preemptionMode,
pGpGpuWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);

View File

@@ -115,6 +115,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
}
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
@@ -122,6 +123,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
ioh,
ssh,
kernel,
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
@@ -129,8 +131,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
preemptionMode,
walkerCmd,
nullptr,
true,
isCcsUsed);
true);
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
numWorkGroups, localWorkSizes, simd, dim,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -237,7 +237,10 @@ uint64_t DeviceQueueHw<GfxFamily>::getBlockKernelStartPointer(const Device &devi
auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
if (blockAllocation && isCcsUsed && HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(device.getHardwareInfo())) {
auto &hardwareInfo = device.getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
return blockKernelStartPointer;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -182,7 +182,11 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, *pBlockInfo);
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
pBlockInfo->heapInfo.pSsh,
pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize,
bindingTableCount,
pBlockInfo->patchInfo.bindingTableState->Offset);
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));

View File

@@ -16,7 +16,7 @@ namespace NEO {
typedef TGLLPFamily Family;
template <>
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) {
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
return (hwInfo.platform.usRevId < REVISION_B);
}

View File

@@ -86,26 +86,13 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) {
return pushBindingTableAndSurfaceStates(dstHeap, (srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0,
srcKernelInfo.heapInfo.pSsh,
srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize,
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0,
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Offset : 0);
}
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
return pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
}
static size_t sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ioh,
IndirectHeap &ssh,
Kernel &kernel,
uint64_t kernelStartOffset,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
@@ -113,8 +100,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
PreemptionMode preemptionMode,
WALKER_TYPE<GfxFamily> *walkerCmd,
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool localIdsGenerationByRuntime,
bool isCcsUsed);
bool localIdsGenerationByRuntime);
static void programPerThreadData(
size_t &sizePerThreadData,
@@ -136,15 +122,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
inline static bool resetBindingTablePrefetch(Kernel &kernel);
static void setKernelStartOffset(
uint64_t &kernelStartOffset,
bool kernelAllocation,
const KernelInfo &kernelInfo,
const bool &localIdsGenerationByRuntime,
const bool &kernelUsesLocalIds,
Kernel &kernel,
bool isCssUsed);
static size_t getSizeRequiredCS(const Kernel *kernel);
static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
static bool isPipeControlWArequired(const HardwareInfo &hwInfo);

View File

@@ -277,6 +277,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
IndirectHeap &ioh,
IndirectHeap &ssh,
Kernel &kernel,
uint64_t kernelStartOffset,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
@@ -284,26 +285,20 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
PreemptionMode preemptionMode,
WALKER_TYPE<GfxFamily> *walkerCmd,
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool localIdsGenerationByRuntime,
bool isCcsUsed) {
bool localIdsGenerationByRuntime) {
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32);
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
auto inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
// Copy the kernel over to the ISH
uint64_t kernelStartOffset = 0llu;
const auto &kernelInfo = kernel.getKernelInfo();
auto kernelAllocation = kernelInfo.getGraphicsAllocation();
DEBUG_BREAK_IF(!kernelAllocation);
setKernelStartOffset(kernelStartOffset, kernelAllocation, kernelInfo, localIdsGenerationByRuntime,
kernelUsesLocalIds, kernel, isCcsUsed);
const auto &patchInfo = kernelInfo.patchInfo;
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, kernel);
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
// Copy our sampler state if it exists
size_t samplerStateOffset = 0;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2019 Intel Corporation
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -89,26 +89,6 @@ void HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
}
}
template <typename GfxFamily>
void HardwareCommandsHelper<GfxFamily>::setKernelStartOffset(
uint64_t &kernelStartOffset,
bool kernelAllocation,
const KernelInfo &kernelInfo,
const bool &localIdsGenerationByRuntime,
const bool &kernelUsesLocalIds,
Kernel &kernel,
bool isCssUsed) {
if (kernelAllocation) {
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
}
kernelStartOffset += kernel.getStartOffset();
if (isCssUsed && HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(kernel.getDevice().getHardwareInfo())) {
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
}
template <typename GfxFamily>
void HardwareCommandsHelper<GfxFamily>::programPerThreadData(
size_t &sizePerThreadData,

View File

@@ -776,11 +776,7 @@ void Kernel::setStartOffset(uint32_t offset) {
this->startOffset = offset;
}
const void *Kernel::getSurfaceStateHeap() const {
return kernelInfo.usesSsh ? pSshLocal.get() : nullptr;
}
void *Kernel::getSurfaceStateHeap() {
void *Kernel::getSurfaceStateHeap() const {
return kernelInfo.usesSsh ? pSshLocal.get() : nullptr;
}
@@ -2405,4 +2401,31 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() {
return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf();
}
uint64_t Kernel::getKernelStartOffset(
const bool localIdsGenerationByRuntime,
const bool kernelUsesLocalIds,
const bool isCssUsed) const {
uint64_t kernelStartOffset = 0;
if (kernelInfo.getGraphicsAllocation()) {
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
DEBUG_BREAK_IF(kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad != 128);
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad;
}
}
kernelStartOffset += getStartOffset();
auto &hardwareInfo = getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
return kernelStartOffset;
}
} // namespace NEO

View File

@@ -147,8 +147,7 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t *paramValueSizeRet) const;
const void *getKernelHeap() const;
const void *getSurfaceStateHeap() const;
void *getSurfaceStateHeap();
void *getSurfaceStateHeap() const;
const void *getDynamicStateHeap() const;
size_t getKernelHeapSize() const;
@@ -404,6 +403,11 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t *localWorkSize);
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const;
uint64_t getKernelStartOffset(
const bool localIdsGenerationByRuntime,
const bool kernelUsesLocalIds,
const bool isCssUsed) const;
protected:
struct ObjectCounts {
uint32_t imageCount;

View File

@@ -62,7 +62,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) {
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
@@ -104,7 +108,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) {
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2019 Intel Corporation
# Copyright (C) 2018-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -15,7 +15,6 @@ if(TESTS_GEN12LP)
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,7 +12,6 @@
#include "unit_tests/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl"
#include "unit_tests/gen12lp/compute_mode_tests_gen12lp.inl"
#include "unit_tests/gen12lp/enqueue_media_kernel_gen12lp.inl"
#include "unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl"
#include "unit_tests/gen12lp/hw_helper_tests_gen12lp.inl"
#include "unit_tests/gen12lp/image_tests_gen12lp.inl"
#include "unit_tests/gen12lp/kernel_tests_gen12lp.inl"

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -7,6 +7,7 @@
if(TESTS_TGLLP)
set(IGDRCL_SRCS_tests_gen12lp_tgllp
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_tgllp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_tgllp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_tgllp.cpp
)

View File

@@ -12,9 +12,9 @@
using namespace NEO;
using HardwareCommandsGen12LpTests = ::testing::Test;
using KernelTgllpTests = ::testing::Test;
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
const uint64_t defaultKernelStartOffset = 0;
const uint64_t additionalOffsetDueToFfid = 0x1234;
SPatchThreadPayload threadPayload{};
@@ -30,9 +30,7 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun
mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
for (auto isCcsUsed : ::testing::Bool()) {
uint64_t kernelStartOffset = defaultKernelStartOffset;
HardwareCommandsHelper<FamilyType>::setKernelStartOffset(kernelStartOffset, false, mockKernelWithInternals.kernelInfo, false,
false, *mockKernelWithInternals.mockKernel, isCcsUsed);
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
if (stepping < REVISION_B && isCcsUsed) {
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
@@ -41,4 +39,4 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun
}
}
}
}
}

View File

@@ -325,6 +325,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
sizeof(INTERFACE_DESCRIPTOR_DATA));
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*kernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
@@ -332,6 +333,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
ioh,
ssh,
*kernel,
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
kernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
IDToffset,
@@ -339,8 +341,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
// It's okay these are EXPECT_GE as they're only going to be used for
// estimation purposes to avoid OOM.
@@ -378,12 +379,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@@ -391,8 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
@@ -423,12 +426,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@@ -436,8 +442,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
@@ -462,12 +467,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@@ -475,8 +483,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
@@ -536,12 +543,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
MockKernel mockKernel{kernel->getProgram(), modifiedKernelInfo, kernel->getDevice(), false};
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(mockKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
mockKernel,
mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
modifiedKernelInfo.getMaxSimdSize(),
localWorkSizes,
IDToffset,
@@ -549,8 +559,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize());
@@ -618,12 +627,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
const_cast<KernelInfo &>(kernelInfo).requiresSshForBuffers = true;
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*kernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*kernel,
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
kernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@@ -631,8 +643,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0]));
EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1]));
@@ -780,12 +791,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
// push surfaces states and binding table to given ssh heap
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*pKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*pKernel,
pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
pKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@@ -793,8 +807,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
bti = reinterpret_cast<typename FamilyType::BINDING_TABLE_STATE *>(reinterpret_cast<unsigned char *>(ssh.getCpuBase()) + localSshOffset + btiOffset);
for (uint32_t i = 0; i < numSurfaces; ++i) {
@@ -859,7 +872,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForKernelWithBuffersNotRequi
EXPECT_EQ(0u, numSurfaceStates);
// set binding table states
auto dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
EXPECT_EQ(0u, dstBindingTablePointer);
auto usedAfter = ssh.getUsed();
@@ -904,10 +917,10 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
auto numSurfaceStates = pKernel->getNumberOfBindingTableStates();
EXPECT_EQ(0u, numSurfaceStates);
auto dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernelInfo);
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
EXPECT_EQ(0u, dstBindingTablePointer);
dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
EXPECT_EQ(0u, dstBindingTablePointer);
SPatchBindingTableState bindingTableState;
@@ -918,7 +931,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
bindingTableState.SurfaceStateOffset = 0;
pKernelInfo->patchInfo.bindingTableState = &bindingTableState;
dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
EXPECT_EQ(0u, dstBindingTablePointer);
pKernelInfo->patchInfo.bindingTableState = nullptr;
@@ -1060,12 +1073,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
HardwareCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
8,
localWorkSizes,
interfaceDescriptorTableOffset,
@@ -1073,8 +1089,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
pDevice->getPreemptionMode(),
pWalkerCmd,
nullptr,
true,
isCcsUsed);
true);
bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0;
EXPECT_TRUE(isMemorySame);

View File

@@ -1,11 +1,12 @@
/*
* Copyright (C) 2018-2019 Intel Corporation
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/built_ins/built_ins.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "runtime/kernel/kernel.h"
#include "test.h"
#include "unit_tests/fixtures/built_in_fixture.h"
@@ -39,4 +40,11 @@ struct HardwareCommandsTest : DeviceFixture,
std::unique_ptr<MockKernelWithInternals> mockKernelWithInternal;
Kernel::SimpleKernelArgInfo kernelArgInfo = {};
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
template <typename GfxFamily>
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
return HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
}
};

View File

@@ -788,7 +788,8 @@ HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsOffsetToSkipSetFFIDGPWARequir
if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) {
GTEST_SKIP();
}
EXPECT_FALSE(HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(hardwareInfo));
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
EXPECT_FALSE(hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo));
}
HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceDefaultRCSEngineWARequiredCalledThenFalseIsReturned) {

View File

@@ -766,6 +766,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
program.setConstantSurface(nullptr);
delete pKernel;
@@ -1014,6 +1015,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
program.setGlobalSurface(nullptr);
delete pKernel;
@@ -1188,6 +1190,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
program.setConstantSurface(nullptr);
delete pKernel;
@@ -2936,6 +2939,60 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre
EXPECT_EQ(KernelExecutionType::Default, kernel.executionType);
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals mockKernel(*device);
SPatchThreadPayload threadPayload = {};
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
EXPECT_EQ(allocationOffset + 256u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals mockKernel(*device);
SPatchThreadPayload threadPayload = {};
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals mockKernel(*device);
SPatchThreadPayload threadPayload = {};
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
namespace NEO {
template <typename GfxFamily>