Fixed blitter path for printf with stateless compression in sba

Related-To: NEO-5107

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2021-09-01 00:34:29 +00:00
committed by Compute-Runtime-Automation
parent c656866b74
commit 754d6e40e0
5 changed files with 70 additions and 11 deletions

View File

@@ -22,9 +22,10 @@
namespace NEO {
const uint32_t PrintfHandler::printfSurfaceInitialDataSize;
PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) {}
PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) {
printfSurfaceInitialDataSizePtr = std::make_unique<uint32_t>();
*printfSurfaceInitialDataSizePtr = sizeof(uint32_t);
}
PrintfHandler::~PrintfHandler() {
device.getMemoryManager()->freeGraphicsMemory(printfSurface);
@@ -55,8 +56,8 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
MemoryTransferHelper::transferMemoryToAllocation(helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface),
device.getDevice(), printfSurface, 0, &printfSurfaceInitialDataSize,
sizeof(printfSurfaceInitialDataSize));
device.getDevice(), printfSurface, 0, printfSurfaceInitialDataSizePtr.get(),
sizeof(*printfSurfaceInitialDataSizePtr.get()));
const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress;
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()), printfSurfaceArg.stateless);
@@ -78,12 +79,24 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
void PrintfHandler::printEnqueueOutput() {
auto &helper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
if (helper.allowStatelessCompression(device.getHardwareInfo())) {
auto printOutputSize = static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize());
auto printOutputDecompressed = std::make_unique<uint8_t[]>(printOutputSize);
auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(device.getHardwareInfo(), device.getDeviceBitfield(), device.getSelectorCopyEngine(), true), EngineUsage::Regular);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(
BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
printfSurface, bcsEngine.commandStreamReceiver->getClearColorAllocation()));
BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
*bcsEngine.commandStreamReceiver, printfSurface, nullptr,
printOutputDecompressed.get(),
printfSurface->getGpuAddress(),
0, 0, 0, Vec3<size_t>(printOutputSize, 0, 0), 0, 0, 0, 0));
bcsEngine.commandStreamReceiver->blitBuffer(blitPropertiesContainer, true, false, device.getDevice());
PrintFormatter printFormatter(printOutputDecompressed.get(), printOutputSize,
kernel->is32Bit(),
kernel->getDescriptor().kernelAttributes.flags.usesStringMapForPrintf ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
printFormatter.printKernelOutput();
return;
}
PrintFormatter printFormatter(reinterpret_cast<const uint8_t *>(printfSurface->getUnderlyingBuffer()), static_cast<uint32_t>(printfSurface->getUnderlyingBufferSize()),

View File

@@ -33,7 +33,7 @@ class PrintfHandler {
protected:
PrintfHandler(ClDevice &device);
static const uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t);
std::unique_ptr<uint32_t> printfSurfaceInitialDataSizePtr;
ClDevice &device;
Kernel *kernel = nullptr;
GraphicsAllocation *printfSurface = nullptr;

View File

@@ -26,6 +26,22 @@ using namespace NEO;
using PrintfHandlerTests = ::testing::Test;
TEST_F(PrintfHandlerTests, givenPrintfHandlerWhenBeingConstructedThenStorePrintfSurfaceInitialDataSize) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
struct MockPrintfHandler : public PrintfHandler {
using PrintfHandler::PrintfHandler;
using PrintfHandler::printfSurfaceInitialDataSizePtr;
MockPrintfHandler(ClDevice &device) : PrintfHandler(device) {}
};
MockPrintfHandler printfHandler(*device);
EXPECT_NE(nullptr, printfHandler.printfSurfaceInitialDataSizePtr);
EXPECT_EQ(sizeof(uint32_t), *printfHandler.printfSurfaceInitialDataSizePtr);
}
TEST_F(PrintfHandlerTests, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResultIsNullptr) {
MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)};
MockContext context;
@@ -73,8 +89,10 @@ TEST_F(PrintfHandlerTests, givenPreparedPrintfHandlerWithUndefinedSshOffsetWhenG
delete device;
}
HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedForAuxTranslation) {
REQUIRE_BLITTER_OR_SKIP(defaultHwInfo.get());
HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToDecompressPrintfOutput) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(&hwInfo);
DebugManagerStateRestore restore;
@@ -105,7 +123,7 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut
if (enable > 0) {
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, bcsCsr->receivedBlitProperties[0].auxTranslationDirection);
EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection);
} else {
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
}

View File

@@ -8,6 +8,7 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/program/kernel_info.h"
@@ -116,6 +117,23 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDifferentBufferSizesWhenEnableSta
}
}
XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenStatelessCompressionEnabledWhenSetExtraAllocationDataThenDontRequireCpuAccessNorMakeResourceLocableForCompressedAllocations) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableStatelessCompression.set(1);
HardwareInfo hwInfo = *defaultHwInfo;
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
for (auto allocType : {GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::PRINTF_SURFACE}) {
AllocationData allocData;
AllocationProperties allocProperties(mockRootDeviceIndex, true, allocType, mockDeviceBitfield);
hwHelper.setExtraAllocationData(allocData, allocProperties, hwInfo);
EXPECT_FALSE(allocData.flags.requiresCpuAccess);
EXPECT_FALSE(allocData.storageInfo.isLockable);
}
}
XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumAndPlatformFamilyTypeThenProperValueForIsWorkaroundRequiredIsReturned) {
uint32_t steppings[] = {
REVISION_A0,