From d62049035aaf30477604483637ad8f17bb470da1 Mon Sep 17 00:00:00 2001 From: Krystian Chmielewski Date: Wed, 17 Mar 2021 15:31:36 +0100 Subject: [PATCH] printf support with ZEBinary Resolves: NEO-5657 Signed-off-by: Krystian Chmielewski --- .../source/printf_handler/printf_handler.cpp | 9 ++-- opencl/source/program/printf_handler.cpp | 3 +- .../test/unit_test/mocks/mock_kernel_info.h | 5 +- .../unit_test/program/printf_helper_tests.cpp | 34 ++++++++++-- .../device_binary_format/elf/zebin_elf.h | 4 +- .../device_binary_format/zebin_decoder.cpp | 9 ++++ shared/source/kernel/kernel_descriptor.h | 1 + .../kernel_descriptor_from_patchtokens.cpp | 1 + shared/source/program/print_formatter.cpp | 54 +++++++++++++------ shared/source/program/print_formatter.h | 5 +- .../zebin_decoder_tests.cpp | 37 ++++++++++++- 11 files changed, 132 insertions(+), 30 deletions(-) diff --git a/level_zero/core/source/printf_handler/printf_handler.cpp b/level_zero/core/source/printf_handler/printf_handler.cpp index 98aed9164e..f259d3c5f9 100644 --- a/level_zero/core/source/printf_handler/printf_handler.cpp +++ b/level_zero/core/source/printf_handler/printf_handler.cpp @@ -29,10 +29,11 @@ NEO::GraphicsAllocation *PrintfHandler::createPrintfBuffer(Device *device) { void PrintfHandler::printOutput(const KernelImmutableData *kernelData, NEO::GraphicsAllocation *printfBuffer, Device *device) { bool using32BitGpuPointers = kernelData->getDescriptor().kernelAttributes.gpuPointerSize == 4u; - NEO::PrintFormatter printfFormatter{static_cast(printfBuffer->getUnderlyingBuffer()), - static_cast(printfBuffer->getUnderlyingBufferSize()), - using32BitGpuPointers, - kernelData->getDescriptor().kernelMetadata.printfStringsMap}; + NEO::PrintFormatter printfFormatter{ + static_cast(printfBuffer->getUnderlyingBuffer()), + static_cast(printfBuffer->getUnderlyingBufferSize()), + using32BitGpuPointers, + kernelData->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernelData->getDescriptor().kernelMetadata.printfStringsMap : nullptr}; printfFormatter.printKernelOutput(); *reinterpret_cast(printfBuffer->getUnderlyingBuffer()) = diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index bd4e287cf6..e9160b8a20 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -77,7 +77,8 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { void PrintfHandler::printEnqueueOutput() { PrintFormatter printFormatter(reinterpret_cast(printfSurface->getUnderlyingBuffer()), static_cast(printfSurface->getUnderlyingBufferSize()), - kernel->is32Bit(), kernel->getKernelInfo().kernelDescriptor.kernelMetadata.printfStringsMap); + kernel->is32Bit(), + kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr); printFormatter.printKernelOutput(); } } // namespace NEO diff --git a/opencl/test/unit_test/mocks/mock_kernel_info.h b/opencl/test/unit_test/mocks/mock_kernel_info.h index 7788aea6e1..2a4a878dd8 100644 --- a/opencl/test/unit_test/mocks/mock_kernel_info.h +++ b/opencl/test/unit_test/mocks/mock_kernel_info.h @@ -52,7 +52,10 @@ class MockKernelInfo : public KernelInfo { void addExtendedDeviceSideEnqueueDescriptor(uint32_t index, uint32_t objectId); - inline void addToPrintfStringsMap(uint32_t index, const std::string &string) { kernelDescriptor.kernelMetadata.printfStringsMap.insert(std::make_pair(index, string)); } + inline void addToPrintfStringsMap(uint32_t index, const std::string &string) { + kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true; + kernelDescriptor.kernelMetadata.printfStringsMap.insert(std::make_pair(index, string)); + } void setPrintfSurface(uint8_t dataParamSize = sizeof(uintptr_t), CrossThreadDataOffset crossThreadDataOffset = undefined, SurfaceStateHeapOffset sshOffset = undefined); void setBindingTable(SurfaceStateHeapOffset tableOffset, uint8_t numEntries); void setGlobalVariablesSurface(uint8_t pointerSize, CrossThreadDataOffset stateless, SurfaceStateHeapOffset bindful = undefined); diff --git a/opencl/test/unit_test/program/printf_helper_tests.cpp b/opencl/test/unit_test/program/printf_helper_tests.cpp index b9004d9415..adfa6951bf 100644 --- a/opencl/test/unit_test/program/printf_helper_tests.cpp +++ b/opencl/test/unit_test/program/printf_helper_tests.cpp @@ -54,7 +54,7 @@ class PrintFormatterTest : public testing::Test { program = std::make_unique(toClDeviceVector(*device)); kernel = new MockKernel(program.get(), *kernelInfo, *device); - printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, is32bit, kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); + printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, is32bit, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); underlyingBuffer[0] = 0; underlyingBuffer[1] = 0; @@ -259,7 +259,7 @@ TEST_P(PrintfUint32Test, GivenFormatContainingUintWhenPrintingThenValueIsInserte TEST_P(PrintfUint32Test, GivenBufferSizeGreaterThanPrintBufferWhenPrintingThenBufferIsTrimmed) { auto input = GetParam(); - printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), 0, is32bit, kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); + printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), 0, is32bit, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); @@ -807,7 +807,7 @@ TEST_F(PrintFormatterTest, GivenPointerWhenPrintingThenValueIsInserted) { } TEST_F(PrintFormatterTest, GivenPointerWith32BitKernelWhenPrintingThen32BitPointerIsPrinted) { - printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true, kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); + printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); auto stringIndex = injectFormatString("%p"); storeData(stringIndex); kernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; @@ -878,6 +878,34 @@ TEST_F(PrintFormatterTest, GivenEmptyBufferWhenPrintingThenFailSafely) { EXPECT_STREQ("", actualOutput); } +TEST_F(PrintFormatterTest, GivenNoStringMapAndBufferWithFormatStringThenItIsPrintedProperly) { + printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true)); + const char *formatString = "test string"; + storeData(formatString); + + char output[maxPrintfOutputLength]; + printFormatter->printKernelOutput([&output](char *str) { strncpy_s(output, maxPrintfOutputLength, str, maxPrintfOutputLength); }); + EXPECT_STREQ(formatString, output); +} + +TEST_F(PrintFormatterTest, GivenNoStringMapAndBufferWithFormatStringAnd2StringsThenDataIsParsedAndPrintedProperly) { + printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true)); + const char *formatString = "%s %s"; + storeData(formatString); + + const char *string1 = "str1"; + storeData(PRINTF_DATA_TYPE::STRING); + storeData(string1); + const char *string2 = "str2"; + storeData(PRINTF_DATA_TYPE::STRING); + storeData(string2); + + const char *expectedOutput = "str1 str2"; + char output[maxPrintfOutputLength]; + printFormatter->printKernelOutput([&output](char *str) { strncpy_s(output, maxPrintfOutputLength, str, maxPrintfOutputLength); }); + EXPECT_STREQ(expectedOutput, output); +} + TEST(printToSTDOUTTest, GivenStringWhenPrintingToStdoutThenOutputOccurs) { testing::internal::CaptureStdout(); printToSTDOUT("test"); diff --git a/shared/source/device_binary_format/elf/zebin_elf.h b/shared/source/device_binary_format/elf/zebin_elf.h index d3ad458f34..8d741987de 100644 --- a/shared/source/device_binary_format/elf/zebin_elf.h +++ b/shared/source/device_binary_format/elf/zebin_elf.h @@ -136,6 +136,7 @@ static constexpr ConstStringRef privateBaseStateless("private_base_stateless"); static constexpr ConstStringRef argByvalue("arg_byvalue"); static constexpr ConstStringRef argBypointer("arg_bypointer"); static constexpr ConstStringRef bufferOffset("buffer_offset"); +static constexpr ConstStringRef printfBuffer("printf_buffer"); } // namespace ArgType namespace MemoryAddressingMode { static constexpr ConstStringRef stateless("stateless"); @@ -298,7 +299,8 @@ enum ArgType : uint8_t { ArgTypePrivateBaseStateless, ArgTypeArgByvalue, ArgTypeArgBypointer, - ArgTypeBufferOffset + ArgTypeBufferOffset, + ArgTypePrintfBuffer }; namespace PerThreadPayloadArgument { diff --git a/shared/source/device_binary_format/zebin_decoder.cpp b/shared/source/device_binary_format/zebin_decoder.cpp index 5490c3878d..348d231297 100644 --- a/shared/source/device_binary_format/zebin_decoder.cpp +++ b/shared/source/device_binary_format/zebin_decoder.cpp @@ -260,6 +260,8 @@ bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Ty out = ArgTypeT::ArgTypeArgBypointer; } else if (tokenValue == PayloadArgument::ArgType::bufferOffset) { out = ArgTypeT::ArgTypeBufferOffset; + } else if (tokenValue == PayloadArgument::ArgType::printfBuffer) { + out = ArgTypeT::ArgTypePrintfBuffer; } else { outErrReason.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unhandled \"" + tokenValue.str() + "\" argument type in context of " + context.str() + "\n"); return false; @@ -762,6 +764,13 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type argAsPointer.bufferOffset = src.offset; break; } + + case NEO::Elf::ZebinKernelMetadata::Types::Kernel::ArgTypePrintfBuffer: { + dst.kernelAttributes.flags.usesPrintf = true; + dst.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = src.offset; + dst.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = src.size; + break; + } } return DecodeError::Success; diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 3d504ac5dc..ae66d06fa7 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -82,6 +82,7 @@ struct KernelDescriptor final { union { struct { + bool usesStringMapForPrintf : 1; bool usesPrintf : 1; bool usesFencesForReadWriteImages : 1; bool usesFlattenedLocalIds; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index c4a9567944..6681ca3ac9 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -171,6 +171,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatele void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrintfSurface &token) { dst.kernelAttributes.flags.usesPrintf = true; + dst.kernelAttributes.flags.usesStringMapForPrintf = true; populatePointerKernelArg(dst.payloadMappings.implicitArgs.printfSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); } diff --git a/shared/source/program/print_formatter.cpp b/shared/source/program/print_formatter.cpp index 36aeb570e3..e46fc170ff 100644 --- a/shared/source/program/print_formatter.cpp +++ b/shared/source/program/print_formatter.cpp @@ -14,11 +14,12 @@ namespace NEO { PrintFormatter::PrintFormatter(const uint8_t *printfOutputBuffer, uint32_t printfOutputBufferMaxSize, - bool using32BitPointers, const StringMap &stringLiteralMap) + bool using32BitPointers, const StringMap *stringLiteralMap) : printfOutputBuffer(printfOutputBuffer), printfOutputBufferSize(printfOutputBufferMaxSize), - stringLiteralMap(stringLiteralMap), - using32BitPointers(using32BitPointers) { + using32BitPointers(using32BitPointers), + usesStringMap(stringLiteralMap != nullptr), + stringLiteralMap(stringLiteralMap) { output.reset(new char[maxSinglePrintStringLength]); } @@ -31,11 +32,19 @@ void PrintFormatter::printKernelOutput(const std::function &print) read(&printfOutputBufferSizeRead); printfOutputBufferSize = std::min(printfOutputBufferSizeRead, printfOutputBufferSize); - uint32_t stringIndex = 0; - while (currentOffset + 4 <= printfOutputBufferSize) { - read(&stringIndex); - const char *formatString = queryPrintfString(stringIndex); - if (formatString != nullptr) { + if (usesStringMap) { + uint32_t stringIndex = 0; + while (currentOffset + 4 <= printfOutputBufferSize) { + read(&stringIndex); + const char *formatString = queryPrintfString(stringIndex); + if (formatString != nullptr) { + printString(formatString, print); + } + } + } else { + while (currentOffset + sizeof(char *) <= printfOutputBufferSize) { + char *formatString = nullptr; + read(&formatString); printString(formatString, print); } } @@ -140,16 +149,27 @@ size_t PrintFormatter::printToken(char *output, size_t size, const char *formatS } size_t PrintFormatter::printStringToken(char *output, size_t size, const char *formatString) { - int index = 0; int type = 0; - // additional read to discard the token read(&type); - read(&index); + + const char *string = nullptr; + if (usesStringMap) { + int index = 0; + read(&index); + string = queryPrintfString(index); + } else { + char *str = nullptr; + read(&str); + string = str; + } + if (type == static_cast(PRINTF_DATA_TYPE::STRING)) { - return simple_sprintf(output, size, formatString, queryPrintfString(index)); + return simple_sprintf(output, size, formatString, string); } else { return simple_sprintf(output, size, formatString, 0); } + + return 0; } size_t PrintFormatter::printPointerToken(char *output, size_t size, const char *formatString) { @@ -163,6 +183,11 @@ size_t PrintFormatter::printPointerToken(char *output, size_t size, const char * return simple_sprintf(output, size, formatString, value); } +const char *PrintFormatter::queryPrintfString(uint32_t index) const { + auto stringEntry = stringLiteralMap->find(index); + return stringEntry == stringLiteralMap->end() ? nullptr : stringEntry->second.c_str(); +} + char PrintFormatter::escapeChar(char escape) { switch (escape) { case 'n': @@ -197,9 +222,4 @@ bool PrintFormatter::isConversionSpecifier(char c) { } } -const char *PrintFormatter::queryPrintfString(uint32_t index) const { - auto stringEntry = stringLiteralMap.find(index); - return stringEntry == stringLiteralMap.end() ? nullptr : stringEntry->second.c_str(); -} - } // namespace NEO diff --git a/shared/source/program/print_formatter.h b/shared/source/program/print_formatter.h index 28c0965d4a..05d40d2727 100644 --- a/shared/source/program/print_formatter.h +++ b/shared/source/program/print_formatter.h @@ -45,7 +45,7 @@ enum class PRINTF_DATA_TYPE : int { class PrintFormatter { public: PrintFormatter(const uint8_t *printfOutputBuffer, uint32_t printfOutputBufferMaxSize, - bool using32BitPointers, const StringMap &stringLiteralMap); + bool using32BitPointers, const StringMap *stringLiteralMap = nullptr); void printKernelOutput(const std::function &print = [](char *str) { printToSTDOUT(str); }); constexpr static size_t maxSinglePrintStringLength = 16 * MemoryConstants::kiloByte; @@ -118,8 +118,9 @@ class PrintFormatter { const uint8_t *printfOutputBuffer = nullptr; // buffer extracted from the kernel, contains values to be printed uint32_t printfOutputBufferSize = 0; // size of the data contained in the buffer - const StringMap &stringLiteralMap; bool using32BitPointers = false; + const bool usesStringMap; + const StringMap *stringLiteralMap; uint32_t currentOffset = 0; // current position in currently parsed buffer }; diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index c98a0d5d14..edb854c991 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -5,7 +5,6 @@ * */ -#include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/zebin_elf.h" #include "shared/source/device_binary_format/zebin_decoder.h" #include "shared/source/helpers/ptr_math.h" @@ -4250,3 +4249,39 @@ TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypeBufferOffsetWhenSizeIs EXPECT_TRUE(warnings.empty()) << warnings; } } + +TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypePrintfBufferWhenOffsetAndSizeIsValidThenPopulatesKernelDescriptor) { + NEO::ConstStringRef zeinfo = R"===( + kernels: + - name : some_kernel + execution_env: + simd_size: 32 + payload_arguments: + - arg_type: printf_buffer + offset: 32 + size: 8 +)==="; + NEO::ProgramInfo programInfo; + ZebinTestData::ValidEmptyProgram zebin; + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); + std::string errors, warnings; + auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings); + ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings; + + NEO::Yaml::YamlParser parser; + bool parseSuccess = parser.parse(zeinfo, errors, warnings); + ASSERT_TRUE(parseSuccess) << errors << " " << warnings; + + NEO::ZebinSections zebinSections; + auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings); + ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings; + + auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin(); + auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings); + EXPECT_EQ(NEO::DecodeError::Success, err); + EXPECT_TRUE(errors.empty()) << errors; + EXPECT_TRUE(warnings.empty()) << warnings; + const auto printfSurfaceAddress = programInfo.kernelInfos[0]->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; + ASSERT_EQ(32U, printfSurfaceAddress.stateless); + EXPECT_EQ(8U, printfSurfaceAddress.pointerSize); +}