Add method to dispatch blit operation from hostPtr to Buffer

Related-To: NEO-3020

Change-Id: If76f2c659c3ee343693a6d3ced86a47d7ed0bf61
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-04-03 15:59:31 +02:00
committed by sys_ocldev
parent fac6ddaebc
commit ccd93e1ea8
15 changed files with 243 additions and 5 deletions

View File

@ -180,6 +180,8 @@ class CommandStreamReceiver {
this->latestSentTaskCount = latestSentTaskCount;
}
virtual void blitFromHostPtr(MemObj &destinationMemObj, void *sourceHostPtr, uint64_t sourceSize) = 0;
protected:
void cleanupResources();

View File

@ -70,6 +70,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}
void blitFromHostPtr(MemObj &destinationMemObj, void *sourceHostPtr, uint64_t sourceSize) override;
protected:
using CommandStreamReceiver::osContext;

View File

@ -14,6 +14,7 @@
#include "runtime/device/device.h"
#include "runtime/event/event.h"
#include "runtime/gtpin/gtpin_notify.h"
#include "runtime/helpers/blit_commands_helper.h"
#include "runtime/helpers/cache_policy.h"
#include "runtime/helpers/flat_batch_buffer_helper_hw.h"
#include "runtime/helpers/flush_stamp.h"
@ -775,4 +776,34 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
return DebugManager.flags.ForceCsrReprogramming.get();
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::blitFromHostPtr(MemObj &destinationMemObj, void *sourceHostPtr, uint64_t sourceSize) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
UNRECOVERABLE_IF(osContext->getEngineType() != aub_stream::EngineType::ENGINE_BCS);
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(sourceSize));
HostPtrSurface hostPtrSurface(sourceHostPtr, static_cast<size_t>(sourceSize), true);
bool success = createAllocationForHostSurface(hostPtrSurface, false);
UNRECOVERABLE_IF(!success);
UNRECOVERABLE_IF(destinationMemObj.peekClMemObjType() != CL_MEM_OBJECT_BUFFER);
BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(static_cast<Buffer &>(destinationMemObj), commandStream, *hostPtrSurface.getAllocation(), sourceSize);
auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(commandStream.getSpace(sizeof(MI_FLUSH_DW)));
*miFlushDwCmd = GfxFamily::cmdInitMiFlushDw;
miFlushDwCmd->setPostSyncOperation(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
miFlushDwCmd->setDestinationAddress(tagAllocation->getGpuAddress());
miFlushDwCmd->setImmediateData(taskCount + 1);
auto batchBufferEnd = reinterpret_cast<MI_BATCH_BUFFER_END *>(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END)));
*batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd;
alignToCacheLine(commandStream);
taskCount++;
}
} // namespace NEO

View File

@ -7,6 +7,7 @@
#include "runtime/command_stream/command_stream_receiver_hw.inl"
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/helpers/blit_commands_helper.inl"
#include "hw_cmds.h"
#include "hw_info.h"
@ -38,8 +39,8 @@ void populateFactoryTable<CommandStreamReceiverHw<Family>>() {
commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver<Family>::create;
}
// Explicitly instantiate CommandStreamReceiverHw for this device family
template class CommandStreamReceiverHw<Family>;
template struct BlitCommandsHelper<Family>;
const Family::GPGPU_WALKER Family::cmdInitGpgpuWalker = Family::GPGPU_WALKER::sInit();
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();

View File

@ -8,6 +8,7 @@
#include "runtime/command_stream/command_stream_receiver_hw.inl"
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/gen11/reg_configs.h"
#include "runtime/helpers/blit_commands_helper.inl"
#include "runtime/os_interface/debug_settings_manager.h"
namespace NEO {
@ -141,8 +142,8 @@ void populateFactoryTable<CommandStreamReceiverHw<Family>>() {
commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver<Family>::create;
}
// Explicitly instantiate CommandStreamReceiverHw for this device family
template class CommandStreamReceiverHw<Family>;
template struct BlitCommandsHelper<Family>;
const Family::GPGPU_WALKER Family::cmdInitGpgpuWalker = Family::GPGPU_WALKER::sInit();
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();

View File

@ -7,6 +7,7 @@
#include "runtime/command_stream/command_stream_receiver_hw.inl"
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/helpers/blit_commands_helper.inl"
#include "hw_cmds.h"
#include "hw_info.h"
@ -35,8 +36,8 @@ void CommandStreamReceiverHw<Family>::addClearSLMWorkAround(Family::PIPE_CONTROL
pCmd->setProtectedMemoryDisable(1);
}
// Explicitly instantiate CommandStreamReceiverHw for this device family
template class CommandStreamReceiverHw<Family>;
template struct BlitCommandsHelper<Family>;
const Family::GPGPU_WALKER Family::cmdInitGpgpuWalker = Family::GPGPU_WALKER::sInit();
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();

View File

@ -7,6 +7,7 @@
#include "runtime/command_stream/command_stream_receiver_hw.inl"
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/helpers/blit_commands_helper.inl"
#include "hw_cmds.h"
#include "hw_info.h"
@ -30,8 +31,8 @@ void populateFactoryTable<CommandStreamReceiverHw<Family>>() {
commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver<Family>::create;
}
// Explicitly instantiate CommandStreamReceiverHw for this device family
template class CommandStreamReceiverHw<Family>;
template struct BlitCommandsHelper<Family>;
const Family::GPGPU_WALKER Family::cmdInitGpgpuWalker = Family::GPGPU_WALKER::sInit();
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();

View File

@ -14,6 +14,8 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/base_object.h
${CMAKE_CURRENT_SOURCE_DIR}/base_object_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.inl
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h

View File

@ -0,0 +1,21 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
class Buffer;
class GraphicsAllocation;
class LinearStream;
template <typename GfxFamily>
struct BlitCommandsHelper {
static size_t estimateBlitCommandsSize(uint64_t copySize);
static void dispatchBlitCommandsForBuffer(Buffer &buffer, LinearStream &linearStream, GraphicsAllocation &hostPtrAllocation, uint64_t copySize);
};
} // namespace NEO

View File

@ -0,0 +1,78 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/helpers/blit_commands_helper.h"
namespace NEO {
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize) {
size_t numberOfBlits = 0;
uint64_t sizeToBlit = copySize;
uint64_t width = 1;
uint64_t height = 1;
while (sizeToBlit != 0) {
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
// 2D: maxBlitWidth x (1 .. maxBlitHeight)
width = BlitterConstants::maxBlitWidth;
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
} else {
// 1D: (1 .. maxBlitWidth) x 1
width = sizeToBlit;
height = 1;
}
sizeToBlit -= (width * height);
numberOfBlits++;
}
size_t size = (sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
sizeof(typename GfxFamily::MI_FLUSH_DW) +
sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
return alignUp(size, MemoryConstants::cacheLineSize);
}
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(Buffer &buffer, LinearStream &linearStream,
GraphicsAllocation &hostPtrAllocation, uint64_t copySize) {
uint64_t sizeToBlit = copySize;
uint64_t width = 1;
uint64_t height = 1;
uint64_t offset = 0;
while (sizeToBlit != 0) {
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
// dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
width = BlitterConstants::maxBlitWidth;
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
} else {
// dispatch 1D blt: (1 .. maxBlitWidth) x 1
width = sizeToBlit;
height = 1;
}
auto bltCmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
*bltCmd = GfxFamily::cmdInitXyCopyBlt;
bltCmd->setDestinationX1CoordinateLeft(0);
bltCmd->setDestinationY1CoordinateTop(0);
bltCmd->setSourceX1CoordinateLeft(0);
bltCmd->setSourceY1CoordinateTop(0);
bltCmd->setDestinationX2CoordinateRight(static_cast<uint32_t>(width));
bltCmd->setDestinationY2CoordinateBottom(static_cast<uint32_t>(height));
bltCmd->setDestinationBaseAddress(buffer.getGraphicsAllocation()->getGpuAddress() + offset);
bltCmd->setSourceBaseAddress(hostPtrAllocation.getGpuAddress() + offset);
auto blitSize = width * height;
sizeToBlit -= blitSize;
offset += blitSize;
}
}
} // namespace NEO

View File

@ -45,4 +45,11 @@ static const uintptr_t page4kEntryMask = std::numeric_limits<uintptr_t>::max() &
static const uintptr_t page64kEntryMask = std::numeric_limits<uintptr_t>::max() & ~MemoryConstants::page64kMask;
static const int GfxAddressBits = is64bit ? 48 : 32;
static const uint64_t maxSvmAddress = is64bit ? maxNBitValue<47> : maxNBitValue<32>;
} // namespace MemoryConstants
namespace BlitterConstants {
static constexpr uint64_t maxBlitWidth = 0x7FFF;
static constexpr uint64_t maxBlitHeight = 0x7FFF;
static constexpr uint64_t max2dBlitSize = maxBlitWidth * maxBlitHeight;
} // namespace BlitterConstants

View File

@ -104,7 +104,11 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
auto gpuAddress = allocator32Bit->allocate(allocationSize);
if (allocationData.size < 0xfffff000) {
ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment);
if (fakeBigAllocations) {
ptrAlloc = reinterpret_cast<void *>(dummyAddress);
} else {
ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment);
}
}
MemoryAllocation *memoryAllocation = nullptr;

View File

@ -14,6 +14,7 @@
#include "runtime/command_stream/scratch_space_controller.h"
#include "runtime/event/user_event.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/blit_commands_helper.h"
#include "runtime/helpers/cache_policy.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/ptr_math.h"
@ -21,6 +22,7 @@
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include "runtime/os_interface/os_context.h"
#include "runtime/utilities/linux/debug_env_reader.h"
#include "test.h"
#include "unit_tests/fixtures/built_in_fixture.h"
@ -38,6 +40,7 @@
#include "unit_tests/mocks/mock_event.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_submissions_aggregator.h"
#include "unit_tests/utilities/base_object_utils.h"
#include "reg_configs_common.h"
@ -250,3 +253,84 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddre
scratchController->getScratchSpaceAllocation()->setCpuPtrAndGpuAddress(scratchController->getScratchSpaceAllocation()->getUnderlyingBuffer(), expectedScratchAddress);
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateGshAddressForScratchSpace((expectedScratchAddress - MemoryConstants::pageSize), scratchController->calculateNewGSH()));
}
HWTEST_F(CommandStreamReceiverHwTest, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
uint64_t alignedBltSize = (3 * BlitterConstants::max2dBlitSize) + 1;
uint64_t notAlignedBltSize = (3 * BlitterConstants::max2dBlitSize);
uint32_t alignedNumberOfBlts = 4;
uint32_t notAlignedNumberOfBlts = 3;
size_t expectedSize = sizeof(typename FamilyType::MI_FLUSH_DW) + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
auto expectedAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto expectedNotAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
}
HWTEST_F(CommandStreamReceiverHwTest, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommands) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
MockContext context(pDevice);
static_cast<OsAgnosticMemoryManager *>(csr.getMemoryManager())->turnOnFakingBigAllocations();
auto engine = csr.getMemoryManager()->getRegisteredEngineForCsr(&csr);
auto contextId = engine->osContext->getContextId();
delete engine->osContext;
engine->osContext = OsContext::create(nullptr, contextId, 0, aub_stream::EngineType::ENGINE_BCS, PreemptionMode::Disabled, false);
engine->osContext->incRefInternal();
csr.setupContext(*engine->osContext);
uint32_t bltLeftover = 17;
uint64_t bltSize = (2 * BlitterConstants::max2dBlitSize) + bltLeftover;
uint32_t numberOfBlts = 3;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(&context, CL_MEM_READ_WRITE, static_cast<size_t>(bltSize), nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
uint32_t newTaskCount = 19;
csr.taskCount = newTaskCount - 1;
csr.blitFromHostPtr(*buffer, hostPtr, bltSize);
EXPECT_EQ(newTaskCount, csr.taskCount);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto &cmdList = hwParser.cmdList;
auto cmdIterator = cmdList.begin();
for (uint32_t i = 0; i < numberOfBlts; i++) {
auto bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*(cmdIterator++));
EXPECT_NE(nullptr, bltCmd);
EXPECT_EQ(0u, bltCmd->getDestinationX1CoordinateLeft());
EXPECT_EQ(0u, bltCmd->getDestinationY1CoordinateTop());
EXPECT_EQ(0u, bltCmd->getSourceX1CoordinateLeft());
EXPECT_EQ(0u, bltCmd->getSourceY1CoordinateTop());
if (i == (numberOfBlts - 1)) {
EXPECT_EQ(bltLeftover, bltCmd->getDestinationX2CoordinateRight());
EXPECT_EQ(1u, bltCmd->getDestinationY2CoordinateBottom());
} else {
EXPECT_EQ(static_cast<uint32_t>(BlitterConstants::maxBlitWidth), bltCmd->getDestinationX2CoordinateRight());
EXPECT_EQ(static_cast<uint32_t>(BlitterConstants::maxBlitWidth), bltCmd->getDestinationY2CoordinateBottom());
}
}
auto miFlushCmd = genCmdCast<MI_FLUSH_DW *>(*(cmdIterator++));
EXPECT_NE(nullptr, miFlushCmd);
EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation());
EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress());
EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData());
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_BATCH_BUFFER_END *>(*(cmdIterator++)));
// padding
while (cmdIterator != cmdList.end()) {
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_NOOP *>(*(cmdIterator++)));
}
}

View File

@ -544,6 +544,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitFromHostPtr(MemObj &destinationMemObj, void *sourceHostPtr, uint64_t sourceSize) override{};
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@ -245,6 +245,8 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitFromHostPtr(MemObj &destinationMemObj, void *sourceHostPtr, uint64_t sourceSize) override{};
void setOSInterface(OSInterface *osInterface);
CommandStreamReceiverType getType() override {