diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index db0bd2162c..a869d928fb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -81,7 +81,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z auto mocs = device->getMOCS(true, false); NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), debugSurface->getUnderlyingBufferSize(), mocs, - false, false, neoDevice->getNumAvailableDevices(), + false, false, false, neoDevice->getNumAvailableDevices(), debugSurface, neoDevice->getGmmHelper()); } diff --git a/level_zero/core/source/kernel/kernel_hw.h b/level_zero/core/source/kernel/kernel_hw.h index eb022fd127..cd44e649a3 100644 --- a/level_zero/core/source/kernel/kernel_hw.h +++ b/level_zero/core/source/kernel/kernel_hw.h @@ -53,7 +53,7 @@ struct KernelHw : public KernelImp { auto mocs = this->module->getDevice()->getMOCS(true, false); NEO::Device *neoDevice = module->getDevice()->getNEODevice(); NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, - false, false, neoDevice->getNumAvailableDevices(), + false, false, false, neoDevice->getNumAvailableDevices(), alloc, neoDevice->getGmmHelper()); } diff --git a/opencl/source/mem_obj/buffer_base.inl b/opencl/source/mem_obj/buffer_base.inl index 20bcd9e222..d3c1349d32 100644 --- a/opencl/source/mem_obj/buffer_base.inl +++ b/opencl/source/mem_obj/buffer_base.inl @@ -36,10 +36,11 @@ template void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); + const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; EncodeSurfaceState::encodeBuffer(memory, getBufferAddress(rootDeviceIndex), getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex), - getMocsValue(disableL3, isReadOnlyArgument, rootDeviceIndex), - true, forceNonAuxMode, device.getNumAvailableDevices(), + getMocsValue(disableL3, isReadOnly, rootDeviceIndex), + true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(), graphicsAllocation, device.getGmmHelper()); appendSurfaceStateExt(memory); } diff --git a/opencl/test/unit_test/gen12lp/CMakeLists.txt b/opencl/test/unit_test/gen12lp/CMakeLists.txt index 0f43e88ef1..88f6b6dd14 100644 --- a/opencl/test/unit_test/gen12lp/CMakeLists.txt +++ b/opencl/test/unit_test/gen12lp/CMakeLists.txt @@ -8,6 +8,7 @@ if(TESTS_GEN12LP) set(IGDRCL_SRCS_tests_gen12lp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_tests_gen12lp.inl + ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl diff --git a/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl new file mode 100644 index 0000000000..e14f55e209 --- /dev/null +++ b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2019-2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device/device.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" + +#include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/mem_obj/buffer.h" +#include "opencl/test/unit_test/mocks/mock_context.h" +#include "test.h" + +using namespace NEO; + +struct BufferTestsTgllp : ::testing::Test { + void SetUp() override { + context = std::make_unique(); + device = context->getDevice(0); + } + + std::unique_ptr context{}; + ClDevice *device{}; + + cl_int retVal = CL_SUCCESS; +}; + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStateThenUseL3) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateThenUseL1) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateThenUseL1) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateThenUseL1) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(1); + + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammingSurfaceStateThenUseL3) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(0); + + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} diff --git a/opencl/test/unit_test/gen12lp/dg1/excludes_dg1.cpp b/opencl/test/unit_test/gen12lp/dg1/excludes_dg1.cpp index 7e8dc8a59d..4b6d30caca 100644 --- a/opencl/test/unit_test/gen12lp/dg1/excludes_dg1.cpp +++ b/opencl/test/unit_test/gen12lp/dg1/excludes_dg1.cpp @@ -26,3 +26,7 @@ HWCMDTEST_EXCLUDE_FAMILY(TimestampEventCreate, givenEventTimestampsWhenQueryKern HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_DG1); HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsNotTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_DG1); + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_DG1) + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_DG1); diff --git a/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp b/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp index c078157046..4b8d0fe39a 100644 --- a/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp +++ b/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp @@ -6,6 +6,7 @@ */ #include "opencl/test/unit_test/gen12lp/aub_command_stream_receiver_tests_gen12lp.inl" +#include "opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl" diff --git a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl index 17b14d4ed8..1890077a86 100644 --- a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl @@ -378,3 +378,49 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenGen12WhenCallIsPackedSupportedThenReturn auto &helper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(helper.packedFormatsSupported()); } + +GEN12LPTEST_F(HwHelperTestGen12Lp, whenRequestingMocsThenProperMocsIndicesAreBeingReturned) { + auto &helper = HwHelper::get(renderCoreFamily); + auto gmmHelper = this->pDevice->getGmmHelper(); + + const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; + const auto mocsL3 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; + const auto mocsL1 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; + + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); + EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, false)); + EXPECT_EQ(mocsL1, helper.getMocsIndex(*gmmHelper, true, true)); +} + +GEN12LPTEST_F(HwHelperTestGen12Lp, givenL1ForceEnabledWhenRequestingMocsThenProperMocsIndicesAreBeingReturned) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(1); + + auto &helper = HwHelper::get(renderCoreFamily); + auto gmmHelper = this->pDevice->getGmmHelper(); + + const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; + const auto mocsL1 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; + + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); + EXPECT_EQ(mocsL1, helper.getMocsIndex(*gmmHelper, true, false)); + EXPECT_EQ(mocsL1, helper.getMocsIndex(*gmmHelper, true, true)); +} + +GEN12LPTEST_F(HwHelperTestGen12Lp, givenL1ForceDisabledWhenRequestingMocsThenProperMocsIndicesAreBeingReturned) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(0); + + auto &helper = HwHelper::get(renderCoreFamily); + auto gmmHelper = this->pDevice->getGmmHelper(); + + const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; + const auto mocsL3 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; + + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); + EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); + EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, false)); + EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, true)); +} diff --git a/opencl/test/unit_test/gen12lp/rkl/excludes_rkl.cpp b/opencl/test/unit_test/gen12lp/rkl/excludes_rkl.cpp index a6a74e14b1..d51ef5de1a 100644 --- a/opencl/test/unit_test/gen12lp/rkl/excludes_rkl.cpp +++ b/opencl/test/unit_test/gen12lp/rkl/excludes_rkl.cpp @@ -26,3 +26,7 @@ HWCMDTEST_EXCLUDE_FAMILY(TimestampEventCreate, givenEventTimestampsWhenQueryKern HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_ROCKETLAKE); HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsNotTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_ROCKETLAKE); + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_ROCKETLAKE) + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_ROCKETLAKE); \ No newline at end of file diff --git a/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp new file mode 100644 index 0000000000..e14f55e209 --- /dev/null +++ b/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2019-2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device/device.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" + +#include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/mem_obj/buffer.h" +#include "opencl/test/unit_test/mocks/mock_context.h" +#include "test.h" + +using namespace NEO; + +struct BufferTestsTgllp : ::testing::Test { + void SetUp() override { + context = std::make_unique(); + device = context->getDevice(0); + } + + std::unique_ptr context{}; + ClDevice *device{}; + + cl_int retVal = CL_SUCCESS; +}; + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStateThenUseL3) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateThenUseL1) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateThenUseL1) { + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateThenUseL1) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(1); + + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} + +GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammingSurfaceStateThenUseL3) { + DebugManagerStateRestore restore{}; + DebugManager.flags.ForceL1Caching.set(0); + + auto buffer = std::unique_ptr(Buffer::create( + context.get(), + CL_MEM_READ_WRITE, + MemoryConstants::pageSize, + nullptr, + retVal)); + ASSERT_EQ(CL_SUCCESS, retVal); + + typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; + buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice()); + + const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + const auto actualMocs = surfaceState.getMemoryObjectControlState(); + EXPECT_EQ(expectedMocs, actualMocs); +} diff --git a/opencl/test/unit_test/gen12lp/tgllp/excludes_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/excludes_tgllp.cpp index 372d82c659..d6a77747d8 100644 --- a/opencl/test/unit_test/gen12lp/tgllp/excludes_tgllp.cpp +++ b/opencl/test/unit_test/gen12lp/tgllp/excludes_tgllp.cpp @@ -26,3 +26,7 @@ HWCMDTEST_EXCLUDE_FAMILY(TimestampEventCreate, givenEventTimestampsWhenQueryKern HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_TIGERLAKE_LP); HWCMDTEST_EXCLUDE_FAMILY(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsNotTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_TIGERLAKE_LP); + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_TIGERLAKE_LP) + +HWCMDTEST_EXCLUDE_FAMILY(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_TIGERLAKE_LP); diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index db907ae2ce..77f6fcb3c0 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -27,6 +27,7 @@ AUBDumpAllocsOnEnqueueReadOnly = 0 AUBDumpAllocsOnEnqueueSVMMemcpyOnly = 0 AUBDumpForceAllToLocalMemory = 0 ForceDeviceId = unk +ForceL1Caching = -1 SchedulerSimulationReturnInstance = 0 SchedulerGWS = 0 EnableExperimentalCommandBuffer = 0 diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index a881327796..8f1191dc87 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -194,10 +194,10 @@ struct EncodeSurfaceState { using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, - bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices, + bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, GraphicsAllocation *allocation, GmmHelper *gmmHelper); static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - uint32_t numAvailableDevices); + bool isReadOnly, uint32_t numAvailableDevices); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { return ~(getSurfaceBaseAddressAlignment() - 1); diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index a423ee2a21..b5ddadba99 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -267,7 +267,7 @@ void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_ template void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, - bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices, + bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, GraphicsAllocation *allocation, GmmHelper *gmmHelper) { auto surfaceState = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); @@ -306,7 +306,7 @@ void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_ surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } - EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, numAvailableDevices); + EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices); } template diff --git a/shared/source/command_container/command_encoder_base.inl b/shared/source/command_container/command_encoder_base.inl index 7a876eacc3..822405e79d 100644 --- a/shared/source/command_container/command_encoder_base.inl +++ b/shared/source/command_container/command_encoder_base.inl @@ -391,7 +391,7 @@ inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &devic template void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - uint32_t numAvailableDevices) { + bool isReadOnly, uint32_t numAvailableDevices) { } } // namespace NEO diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 105bbe0683..2cc70a9c6f 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -55,6 +55,7 @@ DECLARE_DEBUG_VARIABLE(bool, ZebinAppendElws, false, "Append crossthread data wi DECLARE_DEBUG_VARIABLE(bool, ZebinIgnoreIcbeVersion, false, "Ignore IGC\'s ICBE version") DECLARE_DEBUG_VARIABLE(bool, UseExternalAllocatorForSshAndDsh, false, "Use 32 bit external Allocator for ssh and dsh in Level Zero") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing") +DECLARE_DEBUG_VARIABLE(int32_t, ForceL1Caching, -1, "-1: default, 0: disable, 1: enable, When set to true driver will program L1 cache policy for surface state and stateless accessess") DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationEnabled, -1, "-1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information") DECLARE_DEBUG_VARIABLE(int32_t, SchedulerGWS, 0, "Forces gws of scheduler kernel, only multiple of 24 allowed or 0 - default selected") diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index ecf6eee9a8..37ee80d70e 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -47,6 +47,24 @@ void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStre } } +template <> +void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, + bool isReadOnly, uint32_t numAvailableDevices) { + const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + if (isL3Allowed) { + const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE; + bool useL1 = isReadOnly || isConstantSurface; + + if (DebugManager.flags.ForceL1Caching.get() != -1) { + useL1 = !!DebugManager.flags.ForceL1Caching.get(); + } + + if (useL1) { + surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); + } + } +} + template struct EncodeDispatchKernel; template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen12lp/hw_helper_gen12lp.cpp b/shared/source/gen12lp/hw_helper_gen12lp.cpp index d498cfb135..6bb73e6e9b 100644 --- a/shared/source/gen12lp/hw_helper_gen12lp.cpp +++ b/shared/source/gen12lp/hw_helper_gen12lp.cpp @@ -240,6 +240,23 @@ bool HwHelperHw::useOnlyGlobalTimestamps() const { return true; } +template <> +uint32_t HwHelperHw::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const { + if (l3enabled) { + if (DebugManager.flags.ForceL1Caching.get() != -1) { + l1enabled = !!DebugManager.flags.ForceL1Caching.get(); + } + + if (l1enabled) { + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; + } else { + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; + } + } + + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; +} + template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index 9394f3a7e7..790ef1c887 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -47,7 +47,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr length.Length = static_cast(allocSize - 1); GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - false, false, 1u, + false, false, false, 1u, &allocation, pDevice->getGmmHelper()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); @@ -74,7 +74,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo length.Length = static_cast(allocSize - 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - true, false, 1u, + true, false, false, 1u, nullptr, pDevice->getGmmHelper()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType()); @@ -100,7 +100,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency length.Length = static_cast(allocSize - 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - false, false, 1u, + false, false, false, 1u, nullptr, pDevice->getGmmHelper()); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());