From 754d6e40e0cb409ddffa2117d1eea51140a5560c Mon Sep 17 00:00:00 2001 From: "Milczarek, Slawomir" Date: Wed, 1 Sep 2021 00:34:29 +0000 Subject: [PATCH] Fixed blitter path for printf with stateless compression in sba Related-To: NEO-5107 Signed-off-by: Milczarek, Slawomir --- opencl/source/program/printf_handler.cpp | 27 ++++++++++++++----- opencl/source/program/printf_handler.h | 2 +- .../program/printf_handler_tests.cpp | 24 ++++++++++++++--- .../xe_hp_core/hw_helper_tests_xe_hp_core.cpp | 18 +++++++++++++ .../extra_allocation_data_xehp_and_later.inl | 10 +++++++ 5 files changed, 70 insertions(+), 11 deletions(-) diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index 6a61197c23..decfb445e3 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -22,9 +22,10 @@ namespace NEO { -const uint32_t PrintfHandler::printfSurfaceInitialDataSize; - -PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) {} +PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) { + printfSurfaceInitialDataSizePtr = std::make_unique(); + *printfSurfaceInitialDataSizePtr = sizeof(uint32_t); +} PrintfHandler::~PrintfHandler() { device.getMemoryManager()->freeGraphicsMemory(printfSurface); @@ -55,8 +56,8 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); MemoryTransferHelper::transferMemoryToAllocation(helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface), - device.getDevice(), printfSurface, 0, &printfSurfaceInitialDataSize, - sizeof(printfSurfaceInitialDataSize)); + device.getDevice(), printfSurface, 0, printfSurfaceInitialDataSizePtr.get(), + sizeof(*printfSurfaceInitialDataSizePtr.get())); const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData()), printfSurfaceArg.stateless); @@ -78,12 +79,24 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { void PrintfHandler::printEnqueueOutput() { auto &helper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); if (helper.allowStatelessCompression(device.getHardwareInfo())) { + auto printOutputSize = static_cast(printfSurface->getUnderlyingBufferSize()); + auto printOutputDecompressed = std::make_unique(printOutputSize); auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(device.getHardwareInfo(), device.getDeviceBitfield(), device.getSelectorCopyEngine(), true), EngineUsage::Regular); + BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back( - BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux, - printfSurface, bcsEngine.commandStreamReceiver->getClearColorAllocation())); + BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, + *bcsEngine.commandStreamReceiver, printfSurface, nullptr, + printOutputDecompressed.get(), + printfSurface->getGpuAddress(), + 0, 0, 0, Vec3(printOutputSize, 0, 0), 0, 0, 0, 0)); bcsEngine.commandStreamReceiver->blitBuffer(blitPropertiesContainer, true, false, device.getDevice()); + + PrintFormatter printFormatter(printOutputDecompressed.get(), printOutputSize, + kernel->is32Bit(), + kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr); + printFormatter.printKernelOutput(); + return; } PrintFormatter printFormatter(reinterpret_cast(printfSurface->getUnderlyingBuffer()), static_cast(printfSurface->getUnderlyingBufferSize()), diff --git a/opencl/source/program/printf_handler.h b/opencl/source/program/printf_handler.h index 8c8692689e..12d6c11adf 100644 --- a/opencl/source/program/printf_handler.h +++ b/opencl/source/program/printf_handler.h @@ -33,7 +33,7 @@ class PrintfHandler { protected: PrintfHandler(ClDevice &device); - static const uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t); + std::unique_ptr printfSurfaceInitialDataSizePtr; ClDevice &device; Kernel *kernel = nullptr; GraphicsAllocation *printfSurface = nullptr; diff --git a/opencl/test/unit_test/program/printf_handler_tests.cpp b/opencl/test/unit_test/program/printf_handler_tests.cpp index 4d027b1543..3ece6ca6d8 100644 --- a/opencl/test/unit_test/program/printf_handler_tests.cpp +++ b/opencl/test/unit_test/program/printf_handler_tests.cpp @@ -26,6 +26,22 @@ using namespace NEO; using PrintfHandlerTests = ::testing::Test; +TEST_F(PrintfHandlerTests, givenPrintfHandlerWhenBeingConstructedThenStorePrintfSurfaceInitialDataSize) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + struct MockPrintfHandler : public PrintfHandler { + using PrintfHandler::PrintfHandler; + using PrintfHandler::printfSurfaceInitialDataSizePtr; + + MockPrintfHandler(ClDevice &device) : PrintfHandler(device) {} + }; + + MockPrintfHandler printfHandler(*device); + + EXPECT_NE(nullptr, printfHandler.printfSurfaceInitialDataSizePtr); + EXPECT_EQ(sizeof(uint32_t), *printfHandler.printfSurfaceInitialDataSizePtr); +} + TEST_F(PrintfHandlerTests, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResultIsNullptr) { MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; MockContext context; @@ -73,8 +89,10 @@ TEST_F(PrintfHandlerTests, givenPreparedPrintfHandlerWithUndefinedSshOffsetWhenG delete device; } -HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedForAuxTranslation) { - REQUIRE_BLITTER_OR_SKIP(defaultHwInfo.get()); +HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToDecompressPrintfOutput) { + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(&hwInfo); DebugManagerStateRestore restore; @@ -105,7 +123,7 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut if (enable > 0) { EXPECT_EQ(1u, bcsCsr->blitBufferCalled); - EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, bcsCsr->receivedBlitProperties[0].auxTranslationDirection); + EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection); } else { EXPECT_EQ(0u, bcsCsr->blitBufferCalled); } diff --git a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp index e52b582949..17a53e96f5 100644 --- a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp @@ -8,6 +8,7 @@ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/program/kernel_info.h" @@ -116,6 +117,23 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDifferentBufferSizesWhenEnableSta } } +XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenStatelessCompressionEnabledWhenSetExtraAllocationDataThenDontRequireCpuAccessNorMakeResourceLocableForCompressedAllocations) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableStatelessCompression.set(1); + + HardwareInfo hwInfo = *defaultHwInfo; + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + for (auto allocType : {GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::PRINTF_SURFACE}) { + AllocationData allocData; + AllocationProperties allocProperties(mockRootDeviceIndex, true, allocType, mockDeviceBitfield); + + hwHelper.setExtraAllocationData(allocData, allocProperties, hwInfo); + EXPECT_FALSE(allocData.flags.requiresCpuAccess); + EXPECT_FALSE(allocData.storageInfo.isLockable); + } +} + XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumAndPlatformFamilyTypeThenProperValueForIsWorkaroundRequiredIsReturned) { uint32_t steppings[] = { REVISION_A0, diff --git a/shared/source/helpers/extra_allocation_data_xehp_and_later.inl b/shared/source/helpers/extra_allocation_data_xehp_and_later.inl index 180297cc3f..567458d2ec 100644 --- a/shared/source/helpers/extra_allocation_data_xehp_and_later.inl +++ b/shared/source/helpers/extra_allocation_data_xehp_and_later.inl @@ -26,5 +26,15 @@ void HwHelperHw::setExtraAllocationData(AllocationData &allocationData, allocationData.storageInfo.isLockable = false; } } + + auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (helper.allowStatelessCompression(hwInfo)) { + if (properties.allocationType == GraphicsAllocation::AllocationType::GLOBAL_SURFACE || + properties.allocationType == GraphicsAllocation::AllocationType::CONSTANT_SURFACE || + properties.allocationType == GraphicsAllocation::AllocationType::PRINTF_SURFACE) { + allocationData.flags.requiresCpuAccess = false; + allocationData.storageInfo.isLockable = false; + } + } } } // namespace NEO