Correct setting usesStringMap flag in printf

This commit fixes setting usesStringMap flag for printf, taking into
account using indirect functions in legacy (non-zebinary) path. It also
adds new field to kernelDescriptor, specifying the binary type
(legacy/zebin).

Related-To: NEO-6604
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
Kacper Nowak
2022-02-14 16:05:39 +00:00
committed by Compute-Runtime-Automation
parent 143f67f2fe
commit cd9cc53159
10 changed files with 105 additions and 6 deletions

View File

@@ -30,7 +30,7 @@ void PrintfHandler::printOutput(const KernelImmutableData *kernelData,
NEO::GraphicsAllocation *printfBuffer, Device *device) {
bool using32BitGpuPointers = kernelData->getDescriptor().kernelAttributes.gpuPointerSize == 4u;
auto usesStringMap = kernelData->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf;
auto usesStringMap = kernelData->getDescriptor().kernelAttributes.usesStringMap();
NEO::PrintFormatter printfFormatter{
static_cast<uint8_t *>(printfBuffer->getUnderlyingBuffer()),
static_cast<uint32_t>(printfBuffer->getUnderlyingBufferSize()),

View File

@@ -2187,14 +2187,15 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
mockKernel.crossThreadData.release();
}
using KernelPrintfStringMapTests = Test<ModuleImmutableDataFixture>;
using KernelPatchtokensPrintfStringMapTests = Test<ModuleImmutableDataFixture>;
TEST_F(KernelPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageEnabledWhenPrintOutputThenProperStringIsPrinted) {
TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageEnabledWhenPrintOutputThenProperStringIsPrinted) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.usesPrintf = true;
kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = true;
kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
std::string expectedString("test123");
kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString));
@@ -2215,12 +2216,14 @@ TEST_F(KernelPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageEnabledWh
EXPECT_STREQ(expectedString.c_str(), output.c_str());
}
TEST_F(KernelPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledWhenPrintOutputThenNothingIsPrinted) {
TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledAndNoImplicitArgsWhenPrintOutputThenNothingIsPrinted) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.usesPrintf = true;
kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
std::string expectedString("test123");
kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString));
@@ -2241,6 +2244,34 @@ TEST_F(KernelPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledW
EXPECT_STREQ("", output.c_str());
}
TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledAndWithImplicitArgsWhenPrintOutputThenOutputIsPrinted) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.usesPrintf = true;
kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
std::string expectedString("test123");
kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString));
createModuleFromBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernel->initialize(&kernelDesc);
auto printfAllocation = reinterpret_cast<uint32_t *>(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer());
printfAllocation[0] = 8;
printfAllocation[1] = 0;
testing::internal::CaptureStdout();
kernel->printPrintfOutput();
std::string output = testing::internal::GetCapturedStdout();
EXPECT_STREQ(expectedString.c_str(), output.c_str());
}
using KernelImplicitArgTests = Test<ModuleImmutableDataFixture>;
TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs) {

View File

@@ -88,7 +88,7 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
void PrintfHandler::printEnqueueOutput() {
auto &hwInfo = device.getHardwareInfo();
auto usesStringMap = kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf;
auto usesStringMap = kernel->getDescriptor().kernelAttributes.usesStringMap();
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto printfOutputBuffer = reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer());
auto printfOutputSize = static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize());

View File

@@ -631,6 +631,54 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
*mockKernel.mockKernel->pImplicitArgs = {};
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
FillValues();
cl_event blockedEvent = userEvent.get();
cl_event outEvent{};
auto retVal = pCmdQ->enqueueKernel(
mockKernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
1,
&blockedEvent,
&outEvent);
ASSERT_EQ(CL_SUCCESS, retVal);
auto pOutEvent = castToObject<Event>(outEvent);
auto printfAllocation = reinterpret_cast<uint32_t *>(static_cast<CommandComputeKernel *>(pOutEvent->peekCommand())->peekPrintfHandler()->getSurface()->getUnderlyingBuffer());
printfAllocation[0] = 8;
printfAllocation[1] = 0;
pOutEvent->release();
testing::internal::CaptureStdout();
userEvent->setStatus(CL_COMPLETE);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_STREQ("test", output.c_str());
}
HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWithStringMapDisbaledAndImplicitArgsBlockedByEventWhenEventUnblockedThenOutputPrinted) {
auto userEvent = make_releaseable<UserEvent>(context);
MockKernelWithInternals mockKernel(*pClDevice);
std::string testString = "test";
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = false;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
*mockKernel.mockKernel->pImplicitArgs = {};

View File

@@ -586,6 +586,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
auto pKernel = mockKernelWithInternals.mockKernel;
auto &kernelInfo = mockKernelWithInternals.kernelInfo;
kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
kernelInfo.addToPrintfStringsMap(0, testString);

View File

@@ -1012,6 +1012,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
kernelDescriptor.kernelAttributes.hasNonKernelArgStore = outExperimentalProperties.hasNonKernelArgStore;
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = outExperimentalProperties.hasNonKernelArgAtomic;
}
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin;
kernelDescriptor.entryPoints.skipPerThreadDataLoad = execEnv.offsetToSkipPerThreadDataLoad;
kernelDescriptor.entryPoints.skipSetFFIDGP = execEnv.offsetToSkipSetFfidGp;

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/device_binary_format/device_binary_formats.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/kernel/debug_data.h"
@@ -164,6 +165,8 @@ struct KernelDescriptor {
AddressingMode imageAddressingMode = Bindful;
AddressingMode samplerAddressingMode = Bindful;
DeviceBinaryFormat binaryFormat = DeviceBinaryFormat::Unknown;
uint8_t workgroupWalkOrder[3] = {0, 1, 2};
uint8_t workgroupDimensionsOrder[3] = {0, 1, 2};
@@ -206,6 +209,13 @@ struct KernelDescriptor {
uint32_t packed;
} flags;
static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), "");
bool usesStringMap() const {
if (binaryFormat == DeviceBinaryFormat::Patchtokens) {
return flags.usesStringMapForPrintf || flags.requiresImplicitArgs;
}
return false;
}
} kernelAttributes;
struct {

View File

@@ -523,6 +523,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
dst.external.igcInfoForGtpin = (src.tokens.gtpinInfo + 1);
}
dst.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
dst.kernelAttributes.gpuPointerSize = gpuPointerSizeInBytes;
if (DebugManager.flags.UpdateCrossThreadDataSize.get()) {

View File

@@ -2740,6 +2740,8 @@ kernels:
EXPECT_STREQ("some_other_kernel", programInfo.kernelInfos[1]->kernelDescriptor.kernelMetadata.kernelName.c_str());
EXPECT_EQ(8, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.simdSize);
EXPECT_EQ(32, programInfo.kernelInfos[1]->kernelDescriptor.kernelAttributes.simdSize);
EXPECT_EQ(DeviceBinaryFormat::Zebin, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.binaryFormat);
EXPECT_EQ(DeviceBinaryFormat::Zebin, programInfo.kernelInfos[1]->kernelDescriptor.kernelAttributes.binaryFormat);
}
TEST(PopulateKernelDescriptor, GivenMinimalExecutionEnvThenPopulateKernelDescriptorWithDefaults) {
@@ -2785,6 +2787,7 @@ TEST(PopulateKernelDescriptor, GivenMinimalExecutionEnvThenPopulateKernelDescrip
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode, Defaults::hasDpas);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesStatelessWrites, (false == Defaults::hasNoStatelessWrite));
EXPECT_EQ(kernelDescriptor.kernelAttributes.barrierCount, static_cast<uint8_t>(Defaults::barrierCount));
EXPECT_EQ(kernelDescriptor.kernelAttributes.binaryFormat, DeviceBinaryFormat::Zebin);
EXPECT_EQ(kernelDescriptor.kernelAttributes.bufferAddressingMode, (Defaults::has4GBBuffers) ? KernelDescriptor::Stateless : KernelDescriptor::BindfulAndStateless);
EXPECT_EQ(kernelDescriptor.kernelAttributes.inlineDataPayloadSize, static_cast<uint16_t>(Defaults::inlineDataPayloadSize));
EXPECT_EQ(kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], static_cast<uint16_t>(Defaults::requiredWorkGroupSize[0]));

View File

@@ -14,15 +14,19 @@
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/device_binary_format/patchtokens_tests.h"
TEST(KernelDescriptorFromPatchtokens, GivenEmptyInputKernelFromPatchtokensThenOnlySetsUpPointerSize) {
TEST(KernelDescriptorFromPatchtokens, GivenEmptyInputKernelFromPatchtokensThenOnlySetsUpPointerSizeAndBinaryType) {
NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens;
iOpenCL::SKernelBinaryHeaderCommon kernelHeader;
kernelTokens.header = &kernelHeader;
NEO::KernelDescriptor kernelDescriptor;
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
EXPECT_EQ(kernelDescriptor.kernelAttributes.gpuPointerSize, 4);
EXPECT_EQ(kernelDescriptor.kernelAttributes.binaryFormat, DeviceBinaryFormat::Patchtokens);
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Unknown;
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 8);
EXPECT_EQ(kernelDescriptor.kernelAttributes.gpuPointerSize, 8);
EXPECT_EQ(kernelDescriptor.kernelAttributes.binaryFormat, DeviceBinaryFormat::Patchtokens);
}
TEST(KernelDescriptorFromPatchtokens, GivenKernelFromPatchtokensWhenKernelNameIsSpecifiedThenItIsCopiedIntoKernelDescriptor) {