From 60c7587c2b9a3f7b3b7ad13173bc7e5bfd211956 Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Thu, 23 Jan 2020 15:52:49 +0100 Subject: [PATCH] Simplify HardwareCommandsHelper Related-To: NEO-4175 Change-Id: I39b08353514ea0bf384b6b592f24952d0ed631e6 Signed-off-by: Mateusz Hoppe --- core/helpers/hw_helper.h | 3 +- core/helpers/hw_helper_base.inl | 2 +- .../command_queue/gpgpu_walker_bdw_plus.inl | 6 +- .../hardware_interface_bdw_plus.inl | 5 +- runtime/device_queue/device_queue_hw_base.inl | 7 ++- .../device_queue/device_queue_hw_bdw_plus.inl | 8 ++- runtime/gen12lp/hw_helper_gen12lp.cpp | 2 +- runtime/helpers/hardware_commands_helper.h | 27 +-------- runtime/helpers/hardware_commands_helper.inl | 15 ++--- .../helpers/hardware_commands_helper_base.inl | 22 +------ runtime/kernel/kernel.cpp | 33 +++++++++-- runtime/kernel/kernel.h | 8 ++- .../enqueue_execution_model_kernel_tests.cpp | 12 +++- unit_tests/gen12lp/CMakeLists.txt | 3 +- unit_tests/gen12lp/gen12lp_tests_wrapper.cpp | 3 +- unit_tests/gen12lp/tgllp/CMakeLists.txt | 3 +- .../kernel_tests_tgllp.cpp} | 10 ++-- .../hardware_commands_helper_tests.cpp | 55 +++++++++++------- .../helpers/hardware_commands_helper_tests.h | 10 +++- unit_tests/helpers/hw_helper_tests.cpp | 3 +- unit_tests/kernel/kernel_tests.cpp | 57 +++++++++++++++++++ 21 files changed, 185 insertions(+), 109 deletions(-) rename unit_tests/gen12lp/{hardware_commands_helper_tests_gen12lp.inl => tgllp/kernel_tests_tgllp.cpp} (70%) diff --git a/core/helpers/hw_helper.h b/core/helpers/hw_helper.h index 074b689cd4..86cdd42f3b 100644 --- a/core/helpers/hw_helper.h +++ b/core/helpers/hw_helper.h @@ -72,6 +72,7 @@ class HwHelper { uint32_t threadsPerEu) = 0; virtual uint32_t alignSlmSize(uint32_t slmSize) = 0; virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0; + virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0; static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo); static uint32_t getEnginesCount(const HardwareInfo &hwInfo); @@ -186,7 +187,7 @@ class HwHelperHw : public HwHelper { static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo); - static bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo); + bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override; static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo); diff --git a/core/helpers/hw_helper_base.inl b/core/helpers/hw_helper_base.inl index eeb70dc1f2..a2ca78d4a1 100644 --- a/core/helpers/hw_helper_base.inl +++ b/core/helpers/hw_helper_base.inl @@ -263,7 +263,7 @@ uint32_t HwHelperHw::getBarriersCountFromHasBarriers(uint32_t hasBarr } template -bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) { +bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return false; } diff --git a/runtime/command_queue/gpgpu_walker_bdw_plus.inl b/runtime/command_queue/gpgpu_walker_bdw_plus.inl index b0c5603e5c..c1b30843fd 100644 --- a/runtime/command_queue/gpgpu_walker_bdw_plus.inl +++ b/runtime/command_queue/gpgpu_walker_bdw_plus.inl @@ -126,12 +126,15 @@ void GpgpuWalkerHelper::dispatchScheduler( auto pGpGpuWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker; bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler); + HardwareCommandsHelper::sendIndirectState( commandStream, *dsh, *ioh, *ssh, scheduler, + scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -139,8 +142,7 @@ void GpgpuWalkerHelper::dispatchScheduler( preemptionMode, pGpGpuWalkerCmd, nullptr, - true, - isCcsUsed); + true); // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true); diff --git a/runtime/command_queue/hardware_interface_bdw_plus.inl b/runtime/command_queue/hardware_interface_bdw_plus.inl index 96a77eac1a..0ccb9e7deb 100644 --- a/runtime/command_queue/hardware_interface_bdw_plus.inl +++ b/runtime/command_queue/hardware_interface_bdw_plus.inl @@ -115,6 +115,7 @@ inline void HardwareInterface::programWalker( } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -122,6 +123,7 @@ inline void HardwareInterface::programWalker( ioh, ssh, kernel, + kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -129,8 +131,7 @@ inline void HardwareInterface::programWalker( preemptionMode, walkerCmd, nullptr, - true, - isCcsUsed); + true); GpgpuWalkerHelper::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, diff --git a/runtime/device_queue/device_queue_hw_base.inl b/runtime/device_queue/device_queue_hw_base.inl index 77e66a3e80..9c2e831ba5 100644 --- a/runtime/device_queue/device_queue_hw_base.inl +++ b/runtime/device_queue/device_queue_hw_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -237,7 +237,10 @@ uint64_t DeviceQueueHw::getBlockKernelStartPointer(const Device &devi auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu; - if (blockAllocation && isCcsUsed && HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(device.getHardwareInfo())) { + auto &hardwareInfo = device.getHardwareInfo(); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } return blockKernelStartPointer; diff --git a/runtime/device_queue/device_queue_hw_bdw_plus.inl b/runtime/device_queue/device_queue_hw_bdw_plus.inl index 122b440782..fafc1ae8d7 100644 --- a/runtime/device_queue/device_queue_hw_bdw_plus.inl +++ b/runtime/device_queue/device_queue_hw_bdw_plus.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -182,7 +182,11 @@ void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - auto btOffset = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(surfaceStateHeap, *pBlockInfo); + auto btOffset = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount, + pBlockInfo->heapInfo.pSsh, + pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, + bindingTableCount, + pBlockInfo->patchInfo.bindingTableState->Offset); parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast(btOffset)); diff --git a/runtime/gen12lp/hw_helper_gen12lp.cpp b/runtime/gen12lp/hw_helper_gen12lp.cpp index 22a9f0778a..e9bae0a305 100644 --- a/runtime/gen12lp/hw_helper_gen12lp.cpp +++ b/runtime/gen12lp/hw_helper_gen12lp.cpp @@ -16,7 +16,7 @@ namespace NEO { typedef TGLLPFamily Family; template <> -bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) { +bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return (hwInfo.platform.usRevId < REVISION_B); } diff --git a/runtime/helpers/hardware_commands_helper.h b/runtime/helpers/hardware_commands_helper.h index 8bd9e6f47b..d2f2b898fb 100644 --- a/runtime/helpers/hardware_commands_helper.h +++ b/runtime/helpers/hardware_commands_helper.h @@ -86,26 +86,13 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { const void *srcKernelSsh, size_t srcKernelSshSize, size_t numberOfBindingTableStates, size_t offsetOfBindingTable); - static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) { - return pushBindingTableAndSurfaceStates(dstHeap, (srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0, - srcKernelInfo.heapInfo.pSsh, - srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize, - (srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0, - (srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Offset : 0); - } - - static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { - return pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0, - srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), - srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); - } - static size_t sendIndirectState( LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, + uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, @@ -113,8 +100,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, - bool localIdsGenerationByRuntime, - bool isCcsUsed); + bool localIdsGenerationByRuntime); static void programPerThreadData( size_t &sizePerThreadData, @@ -136,15 +122,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { inline static bool resetBindingTablePrefetch(Kernel &kernel); - static void setKernelStartOffset( - uint64_t &kernelStartOffset, - bool kernelAllocation, - const KernelInfo &kernelInfo, - const bool &localIdsGenerationByRuntime, - const bool &kernelUsesLocalIds, - Kernel &kernel, - bool isCssUsed); - static size_t getSizeRequiredCS(const Kernel *kernel); static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static bool isPipeControlWArequired(const HardwareInfo &hwInfo); diff --git a/runtime/helpers/hardware_commands_helper.inl b/runtime/helpers/hardware_commands_helper.inl index 1f12841cb0..1eeb588520 100644 --- a/runtime/helpers/hardware_commands_helper.inl +++ b/runtime/helpers/hardware_commands_helper.inl @@ -277,6 +277,7 @@ size_t HardwareCommandsHelper::sendIndirectState( IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, + uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, @@ -284,26 +285,20 @@ size_t HardwareCommandsHelper::sendIndirectState( PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, - bool localIdsGenerationByRuntime, - bool isCcsUsed) { + bool localIdsGenerationByRuntime) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); // Copy the kernel over to the ISH - uint64_t kernelStartOffset = 0llu; const auto &kernelInfo = kernel.getKernelInfo(); - auto kernelAllocation = kernelInfo.getGraphicsAllocation(); - DEBUG_BREAK_IF(!kernelAllocation); - setKernelStartOffset(kernelStartOffset, kernelAllocation, kernelInfo, localIdsGenerationByRuntime, - kernelUsesLocalIds, kernel, isCcsUsed); - const auto &patchInfo = kernelInfo.patchInfo; - auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, kernel); + auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0, + kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), + kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists size_t samplerStateOffset = 0; diff --git a/runtime/helpers/hardware_commands_helper_base.inl b/runtime/helpers/hardware_commands_helper_base.inl index 778715aab9..cab9e9477c 100644 --- a/runtime/helpers/hardware_commands_helper_base.inl +++ b/runtime/helpers/hardware_commands_helper_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -89,26 +89,6 @@ void HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( } } -template -void HardwareCommandsHelper::setKernelStartOffset( - uint64_t &kernelStartOffset, - bool kernelAllocation, - const KernelInfo &kernelInfo, - const bool &localIdsGenerationByRuntime, - const bool &kernelUsesLocalIds, - Kernel &kernel, - bool isCssUsed) { - - if (kernelAllocation) { - kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); - } - kernelStartOffset += kernel.getStartOffset(); - - if (isCssUsed && HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(kernel.getDevice().getHardwareInfo())) { - kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP; - } -} - template void HardwareCommandsHelper::programPerThreadData( size_t &sizePerThreadData, diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 42032294e8..d3a16e3a63 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -776,11 +776,7 @@ void Kernel::setStartOffset(uint32_t offset) { this->startOffset = offset; } -const void *Kernel::getSurfaceStateHeap() const { - return kernelInfo.usesSsh ? pSshLocal.get() : nullptr; -} - -void *Kernel::getSurfaceStateHeap() { +void *Kernel::getSurfaceStateHeap() const { return kernelInfo.usesSsh ? pSshLocal.get() : nullptr; } @@ -2405,4 +2401,31 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf(); } +uint64_t Kernel::getKernelStartOffset( + const bool localIdsGenerationByRuntime, + const bool kernelUsesLocalIds, + const bool isCssUsed) const { + + uint64_t kernelStartOffset = 0; + + if (kernelInfo.getGraphicsAllocation()) { + kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); + if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { + DEBUG_BREAK_IF(kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad != 128); + kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad; + } + } + + kernelStartOffset += getStartOffset(); + + auto &hardwareInfo = getDevice().getHardwareInfo(); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { + kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + } + + return kernelStartOffset; +} + } // namespace NEO diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index f0ac60c269..b28e065ff1 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -147,8 +147,7 @@ class Kernel : public BaseObject<_cl_kernel> { size_t *paramValueSizeRet) const; const void *getKernelHeap() const; - const void *getSurfaceStateHeap() const; - void *getSurfaceStateHeap(); + void *getSurfaceStateHeap() const; const void *getDynamicStateHeap() const; size_t getKernelHeapSize() const; @@ -404,6 +403,11 @@ class Kernel : public BaseObject<_cl_kernel> { size_t *localWorkSize); uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const; + uint64_t getKernelStartOffset( + const bool localIdsGenerationByRuntime, + const bool kernelUsesLocalIds, + const bool isCssUsed) const; + protected: struct ObjectCounts { uint32_t imageCount; diff --git a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp index cfe74e6de4..9d309abacf 100644 --- a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -62,7 +62,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); - if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) { + + auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } @@ -104,7 +108,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer(); uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); - if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(pKernel->getDevice().getHardwareInfo())) { + + auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } diff --git a/unit_tests/gen12lp/CMakeLists.txt b/unit_tests/gen12lp/CMakeLists.txt index de6b65da4d..23de2f53ae 100644 --- a/unit_tests/gen12lp/CMakeLists.txt +++ b/unit_tests/gen12lp/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2019 Intel Corporation +# Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -15,7 +15,6 @@ if(TESTS_GEN12LP) ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl diff --git a/unit_tests/gen12lp/gen12lp_tests_wrapper.cpp b/unit_tests/gen12lp/gen12lp_tests_wrapper.cpp index b1fb6f0bb5..886aaa8af2 100644 --- a/unit_tests/gen12lp/gen12lp_tests_wrapper.cpp +++ b/unit_tests/gen12lp/gen12lp_tests_wrapper.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,7 +12,6 @@ #include "unit_tests/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl" #include "unit_tests/gen12lp/compute_mode_tests_gen12lp.inl" #include "unit_tests/gen12lp/enqueue_media_kernel_gen12lp.inl" -#include "unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl" #include "unit_tests/gen12lp/hw_helper_tests_gen12lp.inl" #include "unit_tests/gen12lp/image_tests_gen12lp.inl" #include "unit_tests/gen12lp/kernel_tests_gen12lp.inl" diff --git a/unit_tests/gen12lp/tgllp/CMakeLists.txt b/unit_tests/gen12lp/tgllp/CMakeLists.txt index e20cf7eb0c..9f314842ef 100644 --- a/unit_tests/gen12lp/tgllp/CMakeLists.txt +++ b/unit_tests/gen12lp/tgllp/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -7,6 +7,7 @@ if(TESTS_TGLLP) set(IGDRCL_SRCS_tests_gen12lp_tgllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_tgllp.cpp ) diff --git a/unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl b/unit_tests/gen12lp/tgllp/kernel_tests_tgllp.cpp similarity index 70% rename from unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl rename to unit_tests/gen12lp/tgllp/kernel_tests_tgllp.cpp index 5ce76d2356..a27b8a893a 100644 --- a/unit_tests/gen12lp/hardware_commands_helper_tests_gen12lp.inl +++ b/unit_tests/gen12lp/tgllp/kernel_tests_tgllp.cpp @@ -12,9 +12,9 @@ using namespace NEO; -using HardwareCommandsGen12LpTests = ::testing::Test; +using KernelTgllpTests = ::testing::Test; -TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) { +TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) { const uint64_t defaultKernelStartOffset = 0; const uint64_t additionalOffsetDueToFfid = 0x1234; SPatchThreadPayload threadPayload{}; @@ -30,9 +30,7 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload; for (auto isCcsUsed : ::testing::Bool()) { - uint64_t kernelStartOffset = defaultKernelStartOffset; - HardwareCommandsHelper::setKernelStartOffset(kernelStartOffset, false, mockKernelWithInternals.kernelInfo, false, - false, *mockKernelWithInternals.mockKernel, isCcsUsed); + uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed); if (stepping < REVISION_B && isCcsUsed) { EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset); @@ -41,4 +39,4 @@ TGLLPTEST_F(HardwareCommandsGen12LpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroun } } } -} +} \ No newline at end of file diff --git a/unit_tests/helpers/hardware_commands_helper_tests.cpp b/unit_tests/helpers/hardware_commands_helper_tests.cpp index 59f759f5dc..55fb6eb360 100644 --- a/unit_tests/helpers/hardware_commands_helper_tests.cpp +++ b/unit_tests/helpers/hardware_commands_helper_tests.cpp @@ -325,6 +325,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage sizeof(INTERFACE_DESCRIPTOR_DATA)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -332,6 +333,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage ioh, ssh, *kernel, + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, IDToffset, @@ -339,8 +341,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); // It's okay these are EXPECT_GE as they're only going to be used for // estimation purposes to avoid OOM. @@ -378,12 +379,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -391,8 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (HardwareCommandsHelper::doBindingTablePrefetch()) { @@ -423,12 +426,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -436,8 +442,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); @@ -462,12 +467,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -475,8 +483,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (HardwareCommandsHelper::doBindingTablePrefetch()) { @@ -536,12 +543,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe MockKernel mockKernel{kernel->getProgram(), modifiedKernelInfo, kernel->getDevice(), false}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, mockKernel, + mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), modifiedKernelInfo.getMaxSimdSize(), localWorkSizes, IDToffset, @@ -549,8 +559,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ; numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize()); @@ -618,12 +627,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer) const_cast(kernelInfo).requiresSshForBuffers = true; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *kernel, + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -631,8 +643,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer) pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0])); EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1])); @@ -780,12 +791,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF // push surfaces states and binding table to given ssh heap uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *pKernel, + pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), pKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -793,8 +807,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersF pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); bti = reinterpret_cast(reinterpret_cast(ssh.getCpuBase()) + localSshOffset + btiOffset); for (uint32_t i = 0; i < numSurfaces; ++i) { @@ -859,7 +872,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForKernelWithBuffersNotRequi EXPECT_EQ(0u, numSurfaceStates); // set binding table states - auto dstBindingTablePointer = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(ssh, *pKernel); + auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); auto usedAfter = ssh.getUsed(); @@ -904,10 +917,10 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) { auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); - auto dstBindingTablePointer = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(ssh, *pKernelInfo); + auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); - dstBindingTablePointer = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(ssh, *pKernel); + dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); SPatchBindingTableState bindingTableState; @@ -918,7 +931,7 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) { bindingTableState.SurfaceStateOffset = 0; pKernelInfo->patchInfo.bindingTableState = &bindingTableState; - dstBindingTablePointer = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(ssh, *pKernel); + dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); pKernelInfo->patchInfo.bindingTableState = nullptr; @@ -1060,12 +1073,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), 8, localWorkSizes, interfaceDescriptorTableOffset, @@ -1073,8 +1089,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd pDevice->getPreemptionMode(), pWalkerCmd, nullptr, - true, - isCcsUsed); + true); bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0; EXPECT_TRUE(isMemorySame); diff --git a/unit_tests/helpers/hardware_commands_helper_tests.h b/unit_tests/helpers/hardware_commands_helper_tests.h index 30d5d5fa3a..fd258a34bc 100644 --- a/unit_tests/helpers/hardware_commands_helper_tests.h +++ b/unit_tests/helpers/hardware_commands_helper_tests.h @@ -1,11 +1,12 @@ /* - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/built_ins/built_ins.h" +#include "runtime/helpers/hardware_commands_helper.h" #include "runtime/kernel/kernel.h" #include "test.h" #include "unit_tests/fixtures/built_in_fixture.h" @@ -39,4 +40,11 @@ struct HardwareCommandsTest : DeviceFixture, std::unique_ptr mockKernelWithInternal; Kernel::SimpleKernelArgInfo kernelArgInfo = {}; std::vector kernelArguments; + + template + size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { + return HardwareCommandsHelper::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0, + srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), + srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); + } }; diff --git a/unit_tests/helpers/hw_helper_tests.cpp b/unit_tests/helpers/hw_helper_tests.cpp index e88a0aacb9..006d7e13a3 100644 --- a/unit_tests/helpers/hw_helper_tests.cpp +++ b/unit_tests/helpers/hw_helper_tests.cpp @@ -788,7 +788,8 @@ HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsOffsetToSkipSetFFIDGPWARequir if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } - EXPECT_FALSE(HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + EXPECT_FALSE(hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceDefaultRCSEngineWARequiredCalledThenFalseIsReturned) { diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index f20588fa95..5a99b484f5 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -766,6 +766,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; @@ -1014,6 +1015,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setGlobalSurface(nullptr); delete pKernel; @@ -1188,6 +1190,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; @@ -2936,6 +2939,60 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); } +TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + + MockKernelWithInternals mockKernel(*device); + SPatchThreadPayload threadPayload = {}; + + threadPayload.OffsetToSkipPerThreadDataLoad = 128u; + mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + + mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); + auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); + + mockKernel.mockKernel->setStartOffset(128); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false); + EXPECT_EQ(allocationOffset + 256u, offset); + device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); +} + +TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + + MockKernelWithInternals mockKernel(*device); + SPatchThreadPayload threadPayload = {}; + + threadPayload.OffsetToSkipPerThreadDataLoad = 128u; + mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + + mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); + auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); + + mockKernel.mockKernel->setStartOffset(128); + auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false); + EXPECT_EQ(allocationOffset + 128u, offset); + device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); +} + +TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + + MockKernelWithInternals mockKernel(*device); + SPatchThreadPayload threadPayload = {}; + + threadPayload.OffsetToSkipPerThreadDataLoad = 128u; + mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + + mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); + auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); + + mockKernel.mockKernel->setStartOffset(128); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false); + EXPECT_EQ(allocationOffset + 128u, offset); + device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); +} + namespace NEO { template