From 394e626db999a6838e2f39e63832a7e0b05d2b0f Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 15 Sep 2020 20:27:18 +0200 Subject: [PATCH] Refactor programming of surface states Related-To: NEO-5069 Change-Id: Id7442fcdcc8c7df57f00e8dc383c11869bf1a677 Signed-off-by: Zbigniew Zdanowicz --- .../core/source/cmdlist/cmdlist_hw_base.inl | 10 ++-- level_zero/core/source/kernel/kernel_hw.h | 8 +-- opencl/source/gen11/buffer_gen11.cpp | 2 +- opencl/source/gen12lp/buffer_gen12lp.cpp | 2 +- opencl/source/gen8/buffer_gen8.cpp | 2 +- opencl/source/gen9/buffer_gen9.cpp | 2 +- opencl/source/mem_obj/CMakeLists.txt | 1 - opencl/source/mem_obj/buffer.h | 1 - opencl/source/mem_obj/buffer_base.inl | 8 ++- opencl/source/mem_obj/buffer_bdw_plus.inl | 16 ------ .../command_container/command_encoder.h | 6 ++- .../command_container/command_encoder.inl | 50 +++++++++---------- .../command_encoder_base.inl | 5 ++ .../unit_test/encoders/test_encode_states.cpp | 9 ++-- 14 files changed, 54 insertions(+), 68 deletions(-) delete mode 100644 opencl/source/mem_obj/buffer_bdw_plus.inl diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index e73dad3d6b..a9d4f27e6f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -75,14 +75,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z if (device->getNEODevice()->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); - auto surfaceState = device->getNEODevice()->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + NEO::Device *neoDevice = device->getNEODevice(); + auto surfaceState = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto debugSurface = device->getDebugSurface(); auto mocs = device->getMOCS(true, false); NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), - debugSurface->getUnderlyingBufferSize(), - mocs, true); - NEO::EncodeSurfaceState::encodeExtraBufferParams(debugSurface, - device->getNEODevice()->getGmmHelper(), surfaceState, false, false); + debugSurface->getUnderlyingBufferSize(), mocs, + true, false, neoDevice->getNumAvailableDevices(), + debugSurface, neoDevice->getGmmHelper()); } appendSignalEventPostWalker(hEvent); diff --git a/level_zero/core/source/kernel/kernel_hw.h b/level_zero/core/source/kernel/kernel_hw.h index ffd347799d..dc4be5d553 100644 --- a/level_zero/core/source/kernel/kernel_hw.h +++ b/level_zero/core/source/kernel/kernel_hw.h @@ -51,10 +51,10 @@ struct KernelHw : public KernelImp { bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment); auto mocs = this->module->getDevice()->getMOCS(true, false); - bool requiresCoherency; - NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, - bufferAddressForSsh, bufferSizeForSsh, mocs, - requiresCoherency = false); + NEO::Device *neoDevice = module->getDevice()->getNEODevice(); + NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, + true, false, neoDevice->getNumAvailableDevices(), + alloc, neoDevice->getGmmHelper()); } std::unique_ptr clone() const override { diff --git a/opencl/source/gen11/buffer_gen11.cpp b/opencl/source/gen11/buffer_gen11.cpp index 74b0493878..9417b09972 100644 --- a/opencl/source/gen11/buffer_gen11.cpp +++ b/opencl/source/gen11/buffer_gen11.cpp @@ -7,7 +7,7 @@ #include "shared/source/gen11/hw_cmds.h" -#include "opencl/source/mem_obj/buffer_bdw_plus.inl" +#include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { diff --git a/opencl/source/gen12lp/buffer_gen12lp.cpp b/opencl/source/gen12lp/buffer_gen12lp.cpp index 578a33311d..d6d8cd8537 100644 --- a/opencl/source/gen12lp/buffer_gen12lp.cpp +++ b/opencl/source/gen12lp/buffer_gen12lp.cpp @@ -7,7 +7,7 @@ #include "shared/source/gen12lp/hw_cmds.h" -#include "opencl/source/mem_obj/buffer_bdw_plus.inl" +#include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { diff --git a/opencl/source/gen8/buffer_gen8.cpp b/opencl/source/gen8/buffer_gen8.cpp index 8f1f4e1510..99a16222f6 100644 --- a/opencl/source/gen8/buffer_gen8.cpp +++ b/opencl/source/gen8/buffer_gen8.cpp @@ -7,7 +7,7 @@ #include "shared/source/gen8/hw_cmds.h" -#include "opencl/source/mem_obj/buffer_bdw_plus.inl" +#include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { diff --git a/opencl/source/gen9/buffer_gen9.cpp b/opencl/source/gen9/buffer_gen9.cpp index 106f394761..c3f945bb98 100644 --- a/opencl/source/gen9/buffer_gen9.cpp +++ b/opencl/source/gen9/buffer_gen9.cpp @@ -7,7 +7,7 @@ #include "shared/source/gen9/hw_cmds.h" -#include "opencl/source/mem_obj/buffer_bdw_plus.inl" +#include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { diff --git a/opencl/source/mem_obj/CMakeLists.txt b/opencl/source/mem_obj/CMakeLists.txt index a45707c66b..7b1cce932c 100644 --- a/opencl/source/mem_obj/CMakeLists.txt +++ b/opencl/source/mem_obj/CMakeLists.txt @@ -9,7 +9,6 @@ set(RUNTIME_SRCS_MEM_OBJ ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_base.inl - ${CMAKE_CURRENT_SOURCE_DIR}/buffer_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/buffer_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.h diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h index b8c4a9b034..d757533a5b 100644 --- a/opencl/source/mem_obj/buffer.h +++ b/opencl/source/mem_obj/buffer.h @@ -206,7 +206,6 @@ class BufferHw : public Buffer { zeroCopy, isHostPtrSVM, isObjectRedescribed) {} void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) override; - void appendBufferState(void *memory, const Device &device, bool isReadOnlyArgument); void appendSurfaceStateExt(void *memory); static Buffer *create(Context *context, diff --git a/opencl/source/mem_obj/buffer_base.inl b/opencl/source/mem_obj/buffer_base.inl index 87ac674d0a..20bcd9e222 100644 --- a/opencl/source/mem_obj/buffer_base.inl +++ b/opencl/source/mem_obj/buffer_base.inl @@ -38,11 +38,9 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, boo auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); EncodeSurfaceState::encodeBuffer(memory, getBufferAddress(rootDeviceIndex), getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex), - getMocsValue(disableL3, isReadOnlyArgument, rootDeviceIndex), true); - EncodeSurfaceState::encodeExtraBufferParams(graphicsAllocation, - device.getGmmHelper(), memory, forceNonAuxMode, isReadOnlyArgument); - - appendBufferState(memory, device, isReadOnlyArgument); + getMocsValue(disableL3, isReadOnlyArgument, rootDeviceIndex), + true, forceNonAuxMode, device.getNumAvailableDevices(), + graphicsAllocation, device.getGmmHelper()); appendSurfaceStateExt(memory); } } // namespace NEO diff --git a/opencl/source/mem_obj/buffer_bdw_plus.inl b/opencl/source/mem_obj/buffer_bdw_plus.inl deleted file mode 100644 index db2af900ac..0000000000 --- a/opencl/source/mem_obj/buffer_bdw_plus.inl +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (C) 2017-2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/mem_obj/buffer_base.inl" - -namespace NEO { - -template -void BufferHw::appendBufferState(void *memory, const Device &device, bool isReadOnly) { -} - -} // namespace NEO diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 0f59d735ba..9e87915133 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -194,8 +194,10 @@ struct EncodeSurfaceState { using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, - bool cpuCoherent); - static void encodeExtraBufferParams(GraphicsAllocation *allocation, GmmHelper *gmmHelper, void *memory, bool forceNonAuxMode, bool isReadOnlyArgument); + bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices, + GraphicsAllocation *allocation, GmmHelper *gmmHelper); + static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, + uint32_t numAvailableDevices); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { return ~(getSurfaceBaseAddressAlignment() - 1); diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 80b20e3b7b..15853e5acd 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -267,52 +267,48 @@ void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_ template void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, - bool cpuCoherent) { - auto ss = reinterpret_cast(dst); + bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices, + GraphicsAllocation *allocation, GmmHelper *gmmHelper) { + auto surfaceState = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(size - 1); - ss->setWidth(Length.SurfaceState.Width + 1); - ss->setHeight(Length.SurfaceState.Height + 1); - ss->setDepth(Length.SurfaceState.Depth + 1); + surfaceState->setWidth(Length.SurfaceState.Width + 1); + surfaceState->setHeight(Length.SurfaceState.Height + 1); + surfaceState->setDepth(Length.SurfaceState.Depth + 1); - ss->setSurfaceType((address != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER - : R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); - ss->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); - ss->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); - ss->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); + surfaceState->setSurfaceType((address != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER + : R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); + surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); + surfaceState->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); + surfaceState->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); - ss->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR); - ss->setVerticalLineStride(0); - ss->setVerticalLineStrideOffset(0); - ss->setMemoryObjectControlState(mocs); - ss->setSurfaceBaseAddress(address); + surfaceState->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR); + surfaceState->setVerticalLineStride(0); + surfaceState->setVerticalLineStrideOffset(0); + surfaceState->setMemoryObjectControlState(mocs); + surfaceState->setSurfaceBaseAddress(address); - ss->setCoherencyType(cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT - : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); - ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); -} + surfaceState->setCoherencyType(cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT + : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); + surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); -template -void EncodeSurfaceState::encodeExtraBufferParams(GraphicsAllocation *allocation, GmmHelper *gmmHelper, void *memory, bool forceNonAuxMode, bool isReadOnlyArgument) { - using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE; - using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; - - auto surfaceState = reinterpret_cast(memory); Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr; - if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) { // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios - surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); + surfaceState->setCoherencyType(R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } if (DebugManager.flags.DisableCachingForStatefulBufferAccess.get()) { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } + + EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, numAvailableDevices); } + template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { diff --git a/shared/source/command_container/command_encoder_base.inl b/shared/source/command_container/command_encoder_base.inl index 491e30270f..cf5a6457b3 100644 --- a/shared/source/command_container/command_encoder_base.inl +++ b/shared/source/command_container/command_encoder_base.inl @@ -388,4 +388,9 @@ inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &devic return 0; } +template +void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, + uint32_t numAvailableDevices) { +} + } // namespace NEO diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index 12b0e1c3dd..122eeb8722 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -47,7 +47,8 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr length.Length = static_cast(allocSize - 1); GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); + true, false, 1u, + &allocation, pDevice->getGmmHelper()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); @@ -73,7 +74,8 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo length.Length = static_cast(allocSize - 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); + true, false, 1u, + nullptr, pDevice->getGmmHelper()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType()); @@ -97,7 +99,8 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency length.Length = static_cast(allocSize - 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, - RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); + false, false, 1u, + nullptr, pDevice->getGmmHelper()); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());