Move Scratch Space functionality to dedicated class

Change-Id: Ic7655c4b971513961aba6823478a139ffc943466
This commit is contained in:
Zdanowicz, Zbigniew
2018-11-22 15:16:20 +01:00
parent a9d3575919
commit 7dbd0ea4f3
41 changed files with 934 additions and 91 deletions

View File

@@ -32,6 +32,10 @@ set(RUNTIME_SRCS_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.inl
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.h
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.h
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
@@ -46,3 +50,4 @@ set(RUNTIME_SRCS_COMMAND_STREAM
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_STREAM})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_STREAM ${RUNTIME_SRCS_COMMAND_STREAM})
add_subdirectories()

View File

@@ -24,7 +24,8 @@ CommandStreamReceiver *AUBCommandStreamReceiver::create(const HardwareInfo &hwIn
// Generate the full filename
const auto &gtSystemInfo = *hwInfo.pSysInfo;
std::stringstream strfilename;
strfilename << hwPrefix << "_" << gtSystemInfo.SliceCount << "x" << gtSystemInfo.SubSliceCount << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << baseName << ".aub";
uint32_t subSlicesPerSlice = gtSystemInfo.SubSliceCount / gtSystemInfo.SliceCount;
strfilename << hwPrefix << "_" << gtSystemInfo.SliceCount << "x" << subSlicesPerSlice << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << baseName << ".aub";
// clean-up any fileName issues because of the file system incompatibilities
auto fileName = strfilename.str();

View File

@@ -9,6 +9,7 @@
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_stream/experimental_command_buffer.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/command_stream/scratch_space_controller.h"
#include "runtime/device/device.h"
#include "runtime/event/event.h"
#include "runtime/gtpin/gtpin_notify.h"
@@ -157,11 +158,6 @@ void CommandStreamReceiver::cleanupResources() {
waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, TEMPORARY_ALLOCATION);
waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, REUSABLE_ALLOCATION);
if (scratchAllocation) {
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
scratchAllocation = nullptr;
}
if (debugSurface) {
getMemoryManager()->freeGraphicsMemory(debugSurface);
debugSurface = nullptr;
@@ -217,6 +213,10 @@ void CommandStreamReceiver::setRequiredScratchSize(uint32_t newRequiredScratchSi
}
}
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {
return scratchSpaceController->getScratchSpaceAllocation();
}
void CommandStreamReceiver::initProgrammingFlags() {
isPreambleSent = false;
GSBAFor32BitProgrammed = false;
@@ -310,6 +310,7 @@ void CommandStreamReceiver::allocateHeapMemory(IndirectHeap::Type heapType,
indirectHeap = new IndirectHeap(heapMemory, requireInternalHeap);
indirectHeap->overrideMaxSize(finalHeapSize);
}
scratchSpaceController->reserveHeap(heapType, indirectHeap);
}
void CommandStreamReceiver::releaseIndirectHeap(IndirectHeap::Type heapType) {

View File

@@ -35,6 +35,7 @@ class LinearStream;
class MemoryManager;
class OsContext;
class OSInterface;
class ScratchSpaceController;
class TimestampPacket;
struct HwPerfCounter;
struct HwTimeStamps;
@@ -116,7 +117,7 @@ class CommandStreamReceiver {
virtual void overrideMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
void setRequiredScratchSize(uint32_t newRequiredScratchSize);
GraphicsAllocation *getScratchAllocation() const { return scratchAllocation; }
GraphicsAllocation *getScratchAllocation();
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
GraphicsAllocation *allocateDebugSurface(size_t size);
@@ -180,6 +181,7 @@ class CommandStreamReceiver {
std::unique_ptr<ExperimentalCommandBuffer> experimentalCmdBuffer;
std::unique_ptr<InternalAllocationStorage> internalAllocationStorage;
std::unique_ptr<KmdNotifyHelper> kmdNotifyHelper;
std::unique_ptr<ScratchSpaceController> scratchSpaceController;
std::unique_ptr<TagAllocator<HwTimeStamps>> profilingTimeStampAllocator;
std::unique_ptr<TagAllocator<HwPerfCounter>> perfCounterAllocator;
std::unique_ptr<TagAllocator<TimestampPacket>> timestampPacketAllocator;
@@ -194,7 +196,6 @@ class CommandStreamReceiver {
volatile uint32_t *tagAddress = nullptr;
GraphicsAllocation *tagAllocation = nullptr;
GraphicsAllocation *scratchAllocation = nullptr;
GraphicsAllocation *preemptionCsrAllocation = nullptr;
GraphicsAllocation *debugSurface = nullptr;
OSInterface *osInterface = nullptr;

View File

@@ -81,7 +81,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void handleEventsTimestampPacketTags(LinearStream &csr, DispatchFlags &dispatchFlags, Device &currentDevice);
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};
void createScratchSpaceAllocation(size_t requiredScratchSizeInBytes);
void addPipeControlWA(LinearStream &commandStream, bool flushDC);
void addDcFlushToPipeControl(typename GfxFamily::PIPE_CONTROL *pCmd, bool flushDC);
@@ -90,6 +89,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getSshHeapSize();
uint64_t getScratchPatchAddress();
void createScratchSpaceController(const HardwareInfo &hwInfoIn);
static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate);

View File

@@ -8,6 +8,7 @@
#include "runtime/command_stream/command_stream_receiver_hw.h"
#include "runtime/command_stream/experimental_command_buffer.h"
#include "runtime/command_stream/linear_stream.h"
#include "runtime/command_stream/scratch_space_controller_base.h"
#include "runtime/device/device.h"
#include "runtime/event/event.h"
#include "runtime/gtpin/gtpin_notify.h"
@@ -51,6 +52,7 @@ CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(const HardwareInfo &
if (DebugManager.flags.EnableTimestampPacket.get() != -1) {
timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get();
}
createScratchSpaceController(hwInfoIn);
}
template <typename GfxFamily>
@@ -237,22 +239,21 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode;
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;
auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations();
bool stateBaseAddressDirty = false;
if (requiredScratchSize && (!scratchAllocation || scratchAllocation->getUnderlyingBufferSize() < requiredScratchSizeInBytes)) {
if (scratchAllocation) {
scratchAllocation->updateTaskCount(this->taskCount, this->deviceIndex);
internalAllocationStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
}
createScratchSpaceAllocation(requiredScratchSizeInBytes);
overrideMediaVFEStateDirty(true);
if (is64bit && !force32BitAllocations) {
stateBaseAddressDirty = true;
bool checkVfeStateDirty = false;
if (requiredScratchSize) {
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
requiredScratchSize,
this->taskCount,
this->deviceIndex,
stateBaseAddressDirty,
checkVfeStateDirty);
if (checkVfeStateDirty) {
overrideMediaVFEStateDirty(true);
}
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
}
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
@@ -308,8 +309,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
uint64_t newGSHbase = 0;
GSBAFor32BitProgrammed = false;
if (is64bit && scratchAllocation && !force32BitAllocations) {
newGSHbase = (uint64_t)scratchAllocation->getUnderlyingBuffer() - PreambleHelper<GfxFamily>::getScratchSpaceOffsetFor64bit();
if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) {
newGSHbase = scratchSpaceController->calculateNewGSH();
} else if (is64bit && force32BitAllocations && dispatchFlags.GSBA32BitRequired) {
newGSHbase = getMemoryManager()->allocator32Bit->getBase();
GSBAFor32BitProgrammed = true;
@@ -381,9 +382,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->makeResident(*tagAllocation);
if (requiredScratchSize)
makeResident(*scratchAllocation);
if (preemptionCsrAllocation)
makeResident(*preemptionCsrAllocation);
@@ -608,22 +606,6 @@ void CommandStreamReceiverHw<GfxFamily>::addPipeControl(LinearStream &commandStr
}
}
template <typename GfxFamily>
uint64_t CommandStreamReceiverHw<GfxFamily>::getScratchPatchAddress() {
//for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address
//for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
uint64_t scratchAddress = 0;
if (requiredScratchSize) {
scratchAddress = scratchAllocation->getGpuAddressToPatch();
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
//this is to avoid scractch allocation offset "0"
scratchAddress = PreambleHelper<GfxFamily>::getScratchSpaceOffsetFor64bit();
}
}
return scratchAddress;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
size_t size = getRequiredCmdStreamSize(dispatchFlags, device);
@@ -821,7 +803,12 @@ void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearS
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceAllocation(size_t requiredScratchSizeInBytes) {
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryInPreferredPool(AllocationFlags(true), 0, nullptr, requiredScratchSizeInBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE);
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController(const HardwareInfo &hwInfoIn) {
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(hwInfoIn, executionEnvironment, *internalAllocationStorage.get());
}
template <typename GfxFamily>
uint64_t CommandStreamReceiverHw<GfxFamily>::getScratchPatchAddress() {
return scratchSpaceController->getScratchPatchAddress();
}
} // namespace OCLRT

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/command_stream/scratch_space_controller.h"
#include "runtime/execution_environment/execution_environment.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/memory_manager/internal_allocation_storage.h"
#include "runtime/memory_manager/memory_manager.h"
namespace OCLRT {
ScratchSpaceController::ScratchSpaceController(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage)
: hwInfo(info), executionEnvironment(environment), csrAllocationStorage(allocationStorage) {
auto &hwHelper = HwHelper::get(info.pPlatform->eRenderCoreFamily);
computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(&hwInfo);
}
ScratchSpaceController::~ScratchSpaceController() {
if (scratchAllocation) {
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
}
}
MemoryManager *ScratchSpaceController::getMemoryManager() const {
UNRECOVERABLE_IF(executionEnvironment.memoryManager.get() == nullptr);
return executionEnvironment.memoryManager.get();
}
} // namespace OCLRT

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/indirect_heap/indirect_heap.h"
#include <cstddef>
#include <cstdint>
namespace OCLRT {
class Device;
class ExecutionEnvironment;
class GraphicsAllocation;
class InternalAllocationStorage;
class MemoryManager;
struct HardwareInfo;
class ScratchSpaceController {
public:
ScratchSpaceController(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
virtual ~ScratchSpaceController();
GraphicsAllocation *getScratchSpaceAllocation() {
return scratchAllocation;
}
virtual void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t requiredPerThreadScratchSize,
uint32_t currentTaskCount,
uint32_t deviceIdx,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
virtual uint64_t calculateNewGSH() = 0;
virtual uint64_t getScratchPatchAddress() = 0;
virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0;
protected:
MemoryManager *getMemoryManager() const;
const HardwareInfo &hwInfo;
ExecutionEnvironment &executionEnvironment;
GraphicsAllocation *scratchAllocation = nullptr;
InternalAllocationStorage &csrAllocationStorage;
size_t scratchSizeBytes = 0;
bool force32BitAllocation = false;
uint32_t computeUnitsUsedForScratch = 0;
};
} // namespace OCLRT

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/command_stream/scratch_space_controller_base.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/preamble.h"
#include "runtime/memory_manager/memory_constants.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/memory_manager/internal_allocation_storage.h"
#include "runtime/memory_manager/memory_manager.h"
namespace OCLRT {
ScratchSpaceControllerBase::ScratchSpaceControllerBase(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage)
: ScratchSpaceController(info, environment, allocationStorage) {
}
void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t requiredPerThreadScratchSize,
uint32_t currentTaskCount,
uint32_t deviceIdx,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch;
if (requiredScratchSizeInBytes && (!scratchAllocation || scratchSizeBytes < requiredScratchSizeInBytes)) {
if (scratchAllocation) {
scratchAllocation->updateTaskCount(currentTaskCount, deviceIdx);
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
}
scratchSizeBytes = requiredScratchSizeInBytes;
createScratchSpaceAllocation();
vfeStateDirty = true;
force32BitAllocation = getMemoryManager()->peekForce32BitAllocations();
if (is64bit && !force32BitAllocation) {
stateBaseAddressDirty = true;
}
}
}
void ScratchSpaceControllerBase::createScratchSpaceAllocation() {
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryInPreferredPool(AllocationFlags(true), 0, nullptr, scratchSizeBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE);
UNRECOVERABLE_IF(scratchAllocation == nullptr);
}
uint64_t ScratchSpaceControllerBase::calculateNewGSH() {
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
auto scratchSpaceOffsetFor64bit = hwHelper.getScratchSpaceOffsetFor64bit();
return reinterpret_cast<uint64_t>(scratchAllocation->getUnderlyingBuffer()) - scratchSpaceOffsetFor64bit;
}
uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() {
//for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address
//for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
uint64_t scratchAddress = 0;
if (scratchAllocation) {
scratchAddress = scratchAllocation->getGpuAddressToPatch();
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
auto scratchSpaceOffsetFor64bit = hwHelper.getScratchSpaceOffsetFor64bit();
//this is to avoid scractch allocation offset "0"
scratchAddress = scratchSpaceOffsetFor64bit;
}
}
return scratchAddress;
}
void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) {
}
} // namespace OCLRT

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/command_stream/scratch_space_controller.h"
namespace OCLRT {
class ScratchSpaceControllerBase : public ScratchSpaceController {
public:
ScratchSpaceControllerBase(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t requiredPerThreadScratchSize,
uint32_t currentTaskCount,
uint32_t deviceIdx,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
uint64_t calculateNewGSH() override;
uint64_t getScratchPatchAddress() override;
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override;
protected:
void createScratchSpaceAllocation();
};
} // namespace OCLRT