mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
Simplify HardwareCommandsHelper
Related-To: NEO-4175 Change-Id: I39b08353514ea0bf384b6b592f24952d0ed631e6 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
fd8c5ba67f
commit
60c7587c2b
@@ -72,6 +72,7 @@ class HwHelper {
|
||||
uint32_t threadsPerEu) = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
||||
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
|
||||
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
|
||||
|
||||
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
||||
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
||||
@@ -186,7 +187,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
|
||||
|
||||
static bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo);
|
||||
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
|
||||
|
||||
|
||||
@@ -263,7 +263,7 @@ uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarr
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) {
|
||||
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -126,12 +126,15 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
auto pGpGpuWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
*dsh,
|
||||
*ioh,
|
||||
*ssh,
|
||||
scheduler,
|
||||
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
@@ -139,8 +142,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
preemptionMode,
|
||||
pGpGpuWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
|
||||
|
||||
@@ -115,6 +115,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
}
|
||||
|
||||
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
@@ -122,6 +123,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
@@ -129,8 +131,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
preemptionMode,
|
||||
walkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -237,7 +237,10 @@ uint64_t DeviceQueueHw<GfxFamily>::getBlockKernelStartPointer(const Device &devi
|
||||
|
||||
auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
|
||||
|
||||
if (blockAllocation && isCcsUsed && HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(device.getHardwareInfo())) {
|
||||
auto &hardwareInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
return blockKernelStartPointer;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -182,7 +182,11 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
|
||||
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, *pBlockInfo);
|
||||
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
|
||||
pBlockInfo->heapInfo.pSsh,
|
||||
pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize,
|
||||
bindingTableCount,
|
||||
pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
|
||||
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace NEO {
|
||||
typedef TGLLPFamily Family;
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) {
|
||||
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
|
||||
return (hwInfo.platform.usRevId < REVISION_B);
|
||||
}
|
||||
|
||||
|
||||
@@ -86,26 +86,13 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) {
|
||||
return pushBindingTableAndSurfaceStates(dstHeap, (srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
srcKernelInfo.heapInfo.pSsh,
|
||||
srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize,
|
||||
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Offset : 0);
|
||||
}
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
||||
return pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
|
||||
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
|
||||
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
|
||||
}
|
||||
|
||||
static size_t sendIndirectState(
|
||||
LinearStream &commandStream,
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
Kernel &kernel,
|
||||
uint64_t kernelStartOffset,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
@@ -113,8 +100,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
PreemptionMode preemptionMode,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool isCcsUsed);
|
||||
bool localIdsGenerationByRuntime);
|
||||
|
||||
static void programPerThreadData(
|
||||
size_t &sizePerThreadData,
|
||||
@@ -136,15 +122,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
|
||||
inline static bool resetBindingTablePrefetch(Kernel &kernel);
|
||||
|
||||
static void setKernelStartOffset(
|
||||
uint64_t &kernelStartOffset,
|
||||
bool kernelAllocation,
|
||||
const KernelInfo &kernelInfo,
|
||||
const bool &localIdsGenerationByRuntime,
|
||||
const bool &kernelUsesLocalIds,
|
||||
Kernel &kernel,
|
||||
bool isCssUsed);
|
||||
|
||||
static size_t getSizeRequiredCS(const Kernel *kernel);
|
||||
static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
|
||||
static bool isPipeControlWArequired(const HardwareInfo &hwInfo);
|
||||
|
||||
@@ -277,6 +277,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
Kernel &kernel,
|
||||
uint64_t kernelStartOffset,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
@@ -284,26 +285,20 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
PreemptionMode preemptionMode,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool isCcsUsed) {
|
||||
bool localIdsGenerationByRuntime) {
|
||||
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
|
||||
DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||
auto inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
||||
|
||||
// Copy the kernel over to the ISH
|
||||
uint64_t kernelStartOffset = 0llu;
|
||||
const auto &kernelInfo = kernel.getKernelInfo();
|
||||
auto kernelAllocation = kernelInfo.getGraphicsAllocation();
|
||||
DEBUG_BREAK_IF(!kernelAllocation);
|
||||
setKernelStartOffset(kernelStartOffset, kernelAllocation, kernelInfo, localIdsGenerationByRuntime,
|
||||
kernelUsesLocalIds, kernel, isCcsUsed);
|
||||
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
|
||||
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, kernel);
|
||||
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
|
||||
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
|
||||
|
||||
// Copy our sampler state if it exists
|
||||
size_t samplerStateOffset = 0;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -89,26 +89,6 @@ void HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HardwareCommandsHelper<GfxFamily>::setKernelStartOffset(
|
||||
uint64_t &kernelStartOffset,
|
||||
bool kernelAllocation,
|
||||
const KernelInfo &kernelInfo,
|
||||
const bool &localIdsGenerationByRuntime,
|
||||
const bool &kernelUsesLocalIds,
|
||||
Kernel &kernel,
|
||||
bool isCssUsed) {
|
||||
|
||||
if (kernelAllocation) {
|
||||
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
}
|
||||
kernelStartOffset += kernel.getStartOffset();
|
||||
|
||||
if (isCssUsed && HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(kernel.getDevice().getHardwareInfo())) {
|
||||
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HardwareCommandsHelper<GfxFamily>::programPerThreadData(
|
||||
size_t &sizePerThreadData,
|
||||
|
||||
@@ -776,11 +776,7 @@ void Kernel::setStartOffset(uint32_t offset) {
|
||||
this->startOffset = offset;
|
||||
}
|
||||
|
||||
const void *Kernel::getSurfaceStateHeap() const {
|
||||
return kernelInfo.usesSsh ? pSshLocal.get() : nullptr;
|
||||
}
|
||||
|
||||
void *Kernel::getSurfaceStateHeap() {
|
||||
void *Kernel::getSurfaceStateHeap() const {
|
||||
return kernelInfo.usesSsh ? pSshLocal.get() : nullptr;
|
||||
}
|
||||
|
||||
@@ -2405,4 +2401,31 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() {
|
||||
return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf();
|
||||
}
|
||||
|
||||
uint64_t Kernel::getKernelStartOffset(
|
||||
const bool localIdsGenerationByRuntime,
|
||||
const bool kernelUsesLocalIds,
|
||||
const bool isCssUsed) const {
|
||||
|
||||
uint64_t kernelStartOffset = 0;
|
||||
|
||||
if (kernelInfo.getGraphicsAllocation()) {
|
||||
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
|
||||
DEBUG_BREAK_IF(kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad != 128);
|
||||
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad;
|
||||
}
|
||||
}
|
||||
|
||||
kernelStartOffset += getStartOffset();
|
||||
|
||||
auto &hardwareInfo = getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
return kernelStartOffset;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -147,8 +147,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
size_t *paramValueSizeRet) const;
|
||||
|
||||
const void *getKernelHeap() const;
|
||||
const void *getSurfaceStateHeap() const;
|
||||
void *getSurfaceStateHeap();
|
||||
void *getSurfaceStateHeap() const;
|
||||
const void *getDynamicStateHeap() const;
|
||||
|
||||
size_t getKernelHeapSize() const;
|
||||
@@ -404,6 +403,11 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
size_t *localWorkSize);
|
||||
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const;
|
||||
|
||||
uint64_t getKernelStartOffset(
|
||||
const bool localIdsGenerationByRuntime,
|
||||
const bool kernelUsesLocalIds,
|
||||
const bool isCssUsed) const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
||||
@@ -62,7 +62,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
|
||||
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
|
||||
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) {
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
@@ -104,7 +108,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
|
||||
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
|
||||
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) {
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2019 Intel Corporation
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -15,7 +15,6 @@ if(TESTS_GEN12LP)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests_gen12lp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests_gen12lp.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen12lp.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen12lp.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,7 +12,6 @@
|
||||
#include "unit_tests/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/compute_mode_tests_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/enqueue_media_kernel_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/hw_helper_tests_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/image_tests_gen12lp.inl"
|
||||
#include "unit_tests/gen12lp/kernel_tests_gen12lp.inl"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -7,6 +7,7 @@
|
||||
if(TESTS_TGLLP)
|
||||
set(IGDRCL_SRCS_tests_gen12lp_tgllp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_tgllp.cpp
|
||||
)
|
||||
|
||||
@@ -12,9 +12,9 @@
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using HardwareCommandsGen12LpTests = ::testing::Test;
|
||||
using KernelTgllpTests = ::testing::Test;
|
||||
|
||||
TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
|
||||
TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) {
|
||||
const uint64_t defaultKernelStartOffset = 0;
|
||||
const uint64_t additionalOffsetDueToFfid = 0x1234;
|
||||
SPatchThreadPayload threadPayload{};
|
||||
@@ -30,9 +30,7 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun
|
||||
mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
for (auto isCcsUsed : ::testing::Bool()) {
|
||||
uint64_t kernelStartOffset = defaultKernelStartOffset;
|
||||
HardwareCommandsHelper<FamilyType>::setKernelStartOffset(kernelStartOffset, false, mockKernelWithInternals.kernelInfo, false,
|
||||
false, *mockKernelWithInternals.mockKernel, isCcsUsed);
|
||||
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
|
||||
|
||||
if (stepping < REVISION_B && isCcsUsed) {
|
||||
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
|
||||
@@ -41,4 +39,4 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -325,6 +325,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
|
||||
sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*kernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
@@ -332,6 +333,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
|
||||
ioh,
|
||||
ssh,
|
||||
*kernel,
|
||||
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
IDToffset,
|
||||
@@ -339,8 +341,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
// It's okay these are EXPECT_GE as they're only going to be used for
|
||||
// estimation purposes to avoid OOM.
|
||||
@@ -378,12 +379,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@@ -391,8 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
@@ -423,12 +426,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@@ -436,8 +442,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
|
||||
@@ -462,12 +467,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@@ -475,8 +483,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
@@ -536,12 +543,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
MockKernel mockKernel{kernel->getProgram(), modifiedKernelInfo, kernel->getDevice(), false};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(mockKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
mockKernel,
|
||||
mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
modifiedKernelInfo.getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
IDToffset,
|
||||
@@ -549,8 +559,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize());
|
||||
@@ -618,12 +627,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
|
||||
const_cast<KernelInfo &>(kernelInfo).requiresSshForBuffers = true;
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*kernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*kernel,
|
||||
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@@ -631,8 +643,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer)
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0]));
|
||||
EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1]));
|
||||
@@ -780,12 +791,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
|
||||
// push surfaces states and binding table to given ssh heap
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*pKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*pKernel,
|
||||
pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
pKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@@ -793,8 +807,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
bti = reinterpret_cast<typename FamilyType::BINDING_TABLE_STATE *>(reinterpret_cast<unsigned char *>(ssh.getCpuBase()) + localSshOffset + btiOffset);
|
||||
for (uint32_t i = 0; i < numSurfaces; ++i) {
|
||||
@@ -859,7 +872,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForKernelWithBuffersNotRequi
|
||||
EXPECT_EQ(0u, numSurfaceStates);
|
||||
|
||||
// set binding table states
|
||||
auto dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
|
||||
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
|
||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||
|
||||
auto usedAfter = ssh.getUsed();
|
||||
@@ -904,10 +917,10 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
|
||||
auto numSurfaceStates = pKernel->getNumberOfBindingTableStates();
|
||||
EXPECT_EQ(0u, numSurfaceStates);
|
||||
|
||||
auto dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernelInfo);
|
||||
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
|
||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||
|
||||
dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
|
||||
dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
|
||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||
|
||||
SPatchBindingTableState bindingTableState;
|
||||
@@ -918,7 +931,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
pKernelInfo->patchInfo.bindingTableState = &bindingTableState;
|
||||
|
||||
dstBindingTablePointer = HardwareCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
|
||||
dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
|
||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||
|
||||
pKernelInfo->patchInfo.bindingTableState = nullptr;
|
||||
@@ -1060,12 +1073,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
||||
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
8,
|
||||
localWorkSizes,
|
||||
interfaceDescriptorTableOffset,
|
||||
@@ -1073,8 +1089,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
isCcsUsed);
|
||||
true);
|
||||
|
||||
bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0;
|
||||
EXPECT_TRUE(isMemorySame);
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/helpers/hardware_commands_helper.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "test.h"
|
||||
#include "unit_tests/fixtures/built_in_fixture.h"
|
||||
@@ -39,4 +40,11 @@ struct HardwareCommandsTest : DeviceFixture,
|
||||
std::unique_ptr<MockKernelWithInternals> mockKernelWithInternal;
|
||||
Kernel::SimpleKernelArgInfo kernelArgInfo = {};
|
||||
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
||||
return HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
|
||||
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
|
||||
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -788,7 +788,8 @@ HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsOffsetToSkipSetFFIDGPWARequir
|
||||
if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
EXPECT_FALSE(HwHelperHw<FamilyType>::isOffsetToSkipSetFFIDGPWARequired(hardwareInfo));
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
EXPECT_FALSE(hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo));
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceDefaultRCSEngineWARequiredCalledThenFalseIsReturned) {
|
||||
|
||||
@@ -766,6 +766,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
|
||||
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
|
||||
|
||||
program.setConstantSurface(nullptr);
|
||||
delete pKernel;
|
||||
@@ -1014,6 +1015,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
|
||||
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
|
||||
|
||||
program.setGlobalSurface(nullptr);
|
||||
delete pKernel;
|
||||
@@ -1188,6 +1190,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
|
||||
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
|
||||
|
||||
program.setConstantSurface(nullptr);
|
||||
delete pKernel;
|
||||
@@ -2936,6 +2939,60 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre
|
||||
EXPECT_EQ(KernelExecutionType::Default, kernel.executionType);
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
|
||||
EXPECT_EQ(allocationOffset + 256u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager());
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
Reference in New Issue
Block a user