mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Handle printf output when implicit args are required
Related-To: NEO-5081 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8b36473a9a
commit
34ad95ae12
@@ -139,6 +139,7 @@ class CommandComputeKernel : public Command {
|
||||
|
||||
LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); }
|
||||
Kernel *peekKernel() const { return kernel; }
|
||||
PrintfHandler *peekPrintfHandler() const { return printfHandler.get(); }
|
||||
|
||||
protected:
|
||||
std::vector<Surface *> surfaces;
|
||||
|
||||
@@ -61,16 +61,19 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
|
||||
device.getDevice(), printfSurface, 0, printfSurfaceInitialDataSizePtr.get(),
|
||||
sizeof(*printfSurfaceInitialDataSizePtr.get()));
|
||||
|
||||
const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress;
|
||||
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()), printfSurfaceArg.stateless);
|
||||
patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
|
||||
if (isValidOffset(printfSurfaceArg.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()), printfSurfaceArg.bindful);
|
||||
void *addressToPatch = printfSurface->getUnderlyingBuffer();
|
||||
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0,
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->areMultipleSubDevicesInContext());
|
||||
if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesPrintf) {
|
||||
|
||||
const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress;
|
||||
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()), printfSurfaceArg.stateless);
|
||||
patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
|
||||
if (isValidOffset(printfSurfaceArg.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()), printfSurfaceArg.bindful);
|
||||
void *addressToPatch = printfSurface->getUnderlyingBuffer();
|
||||
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0,
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->areMultipleSubDevicesInContext());
|
||||
}
|
||||
}
|
||||
auto pImplicitArgs = kernel->getImplicitArgs();
|
||||
if (pImplicitArgs) {
|
||||
@@ -83,31 +86,28 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
}
|
||||
|
||||
void PrintfHandler::printEnqueueOutput() {
|
||||
auto usesStringMap = kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf || nullptr != kernel->getImplicitArgs();
|
||||
const auto &hwInfoConfig = *HwInfoConfig::get(device.getHardwareInfo().platform.eProductFamily);
|
||||
auto printfOutputBuffer = reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer());
|
||||
auto printfOutputSize = static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize());
|
||||
std::unique_ptr<uint8_t[]> printfOutputDecompressed;
|
||||
if (hwInfoConfig.allowStatelessCompression(device.getHardwareInfo())) {
|
||||
auto printOutputSize = static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize());
|
||||
auto printOutputDecompressed = std::make_unique<uint8_t[]>(printOutputSize);
|
||||
printfOutputDecompressed = std::make_unique<uint8_t[]>(printfOutputSize);
|
||||
printfOutputBuffer = printfOutputDecompressed.get();
|
||||
auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(device.getHardwareInfo(), device.getDeviceBitfield(), device.getSelectorCopyEngine(), true), EngineUsage::Regular);
|
||||
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(
|
||||
BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
|
||||
*bcsEngine.commandStreamReceiver, printfSurface, nullptr,
|
||||
printOutputDecompressed.get(),
|
||||
printfOutputDecompressed.get(),
|
||||
printfSurface->getGpuAddress(),
|
||||
0, 0, 0, Vec3<size_t>(printOutputSize, 0, 0), 0, 0, 0, 0));
|
||||
0, 0, 0, Vec3<size_t>(printfOutputSize, 0, 0), 0, 0, 0, 0));
|
||||
bcsEngine.commandStreamReceiver->blitBuffer(blitPropertiesContainer, true, false, device.getDevice());
|
||||
|
||||
PrintFormatter printFormatter(printOutputDecompressed.get(), printOutputSize,
|
||||
kernel->is32Bit(),
|
||||
kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
|
||||
printFormatter.printKernelOutput();
|
||||
return;
|
||||
}
|
||||
|
||||
PrintFormatter printFormatter(reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer()), static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize()),
|
||||
kernel->is32Bit(),
|
||||
kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
|
||||
PrintFormatter printFormatter(printfOutputBuffer, printfOutputSize, kernel->is32Bit(),
|
||||
usesStringMap ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
|
||||
printFormatter.printKernelOutput();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user