Files
compute-runtime/opencl/source/program/printf_handler.cpp
Kacper Nowak 1390af6efe Make usesStringMap flag independent of implicit args requirements
This commits removes part of condition requiring requiresImplicitArgs
flag set in kernel descriptor in order to set usesStringMap flag.

Related-To: NEO-6604
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
2022-02-09 09:51:01 +01:00

119 lines
5.8 KiB
C++

/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "printf_handler.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/program/print_formatter.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/buffer.h"
namespace NEO {
PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) {
printfSurfaceInitialDataSizePtr = std::make_unique<uint32_t>();
*printfSurfaceInitialDataSizePtr = sizeof(uint32_t);
}
PrintfHandler::~PrintfHandler() {
device.getMemoryManager()->freeGraphicsMemory(printfSurface);
}
PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &device) {
if (multiDispatchInfo.usesStatelessPrintfSurface()) {
return new PrintfHandler(device);
}
auto mainKernel = multiDispatchInfo.peekMainKernel();
if (mainKernel != nullptr) {
if (mainKernel->getImplicitArgs()) {
return new PrintfHandler(device);
}
}
return nullptr;
}
void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) {
auto printfSurfaceSize = device.getSharedDeviceInfo().printfBufferSize;
if (printfSurfaceSize == 0) {
return;
}
auto rootDeviceIndex = device.getRootDeviceIndex();
kernel = multiDispatchInfo.peekMainKernel();
printfSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, printfSurfaceSize, AllocationType::PRINTF_SURFACE, device.getDeviceBitfield()});
auto &hwInfo = device.getHardwareInfo();
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
MemoryTransferHelper::transferMemoryToAllocation(helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface),
device.getDevice(), printfSurface, 0, printfSurfaceInitialDataSizePtr.get(),
sizeof(*printfSurfaceInitialDataSizePtr.get()));
if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesPrintf) {
const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress;
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()), printfSurfaceArg.stateless);
patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
if (isValidOffset(printfSurfaceArg.bindful)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()), printfSurfaceArg.bindful);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0,
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
kernel->areMultipleSubDevicesInContext());
}
}
auto pImplicitArgs = kernel->getImplicitArgs();
if (pImplicitArgs) {
pImplicitArgs->printfBufferPtr = printfSurface->getGpuAddress();
}
}
void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
commandStreamReceiver.makeResident(*printfSurface);
}
void PrintfHandler::printEnqueueOutput() {
auto &hwInfo = device.getHardwareInfo();
auto usesStringMap = kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf;
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto printfOutputBuffer = reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer());
auto printfOutputSize = static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize());
std::unique_ptr<uint8_t[]> printfOutputDecompressed;
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (hwInfoConfig.allowStatelessCompression(hwInfo) || helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface)) {
printfOutputDecompressed = std::make_unique<uint8_t[]>(printfOutputSize);
printfOutputBuffer = printfOutputDecompressed.get();
auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(hwInfo, device.getDeviceBitfield(), device.getSelectorCopyEngine(), true), EngineUsage::Regular);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(
BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
*bcsEngine.commandStreamReceiver, printfSurface, nullptr,
printfOutputDecompressed.get(),
printfSurface->getGpuAddress(),
0, 0, 0, Vec3<size_t>(printfOutputSize, 0, 0), 0, 0, 0, 0));
bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice());
}
PrintFormatter printFormatter(printfOutputBuffer, printfOutputSize, kernel->is32Bit(),
usesStringMap ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
printFormatter.printKernelOutput();
}
} // namespace NEO