Revert "refactor: create new members for storing spill and private memory in ...

This reverts commit 87eb5f554a.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2024-01-23 08:46:41 +01:00
committed by Compute-Runtime-Automation
parent b34e8646ac
commit f9f9035b95
63 changed files with 480 additions and 452 deletions

View File

@@ -566,17 +566,17 @@ FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {
return flushStamp->peekStamp();
}
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredScratchSlot1Size) {
if (newRequiredScratchSlot0Size > requiredScratchSlot0Size) {
requiredScratchSlot0Size = newRequiredScratchSlot0Size;
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) {
if (newRequiredScratchSize > requiredScratchSize) {
requiredScratchSize = newRequiredScratchSize;
}
if (newRequiredScratchSlot1Size > requiredScratchSlot1Size) {
requiredScratchSlot1Size = newRequiredScratchSlot1Size;
if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) {
requiredPrivateScratchSize = newRequiredPrivateScratchSize;
}
}
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {
return scratchSpaceController->getScratchSpaceSlot0Allocation();
return scratchSpaceController->getScratchSpaceAllocation();
}
void CommandStreamReceiver::overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) {

View File

@@ -179,7 +179,7 @@ class CommandStreamReceiver {
bool getBtdCommandDirty() const { return btdCommandDirty; }
bool isRayTracingStateProgramingNeeded(Device &device) const;
void setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredPrivateScratchSlot1Size);
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
GraphicsAllocation *getScratchAllocation();
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
GraphicsAllocation *allocateDebugSurface(size_t size);
@@ -534,8 +534,8 @@ class CommandStreamReceiver {
uint32_t latestSentStatelessMocsConfig = 0;
uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount;
uint32_t requiredScratchSlot0Size = 0;
uint32_t requiredScratchSlot1Size = 0;
uint32_t requiredScratchSize = 0;
uint32_t requiredPrivateScratchSize = 0;
uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::notSet;
KernelExecutionType lastKernelExecutionType = KernelExecutionType::defaultType;
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::notApplicable;

View File

@@ -269,13 +269,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
flushData.stateComputeModeFullConfigurationNeeded = getStateComputeModeDirty();
flushData.stateBaseAddressFullConfigurationNeeded = getGSBAStateDirty();
if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSlot0Size > 0 || this->requiredScratchSlot1Size > 0)) {
if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSize > 0 || this->requiredPrivateScratchSize > 0)) {
bool checkFeStateDirty = false;
bool checkSbaStateDirty = false;
scratchSpaceController->setRequiredScratchSpace(dispatchFlags.sshCpuBase,
0u,
this->requiredScratchSlot0Size,
this->requiredScratchSlot1Size,
this->requiredScratchSize,
this->requiredPrivateScratchSize,
this->taskCount,
*this->osContext,
checkSbaStateDirty,
@@ -283,11 +283,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
flushData.frontEndFullConfigurationNeeded |= checkFeStateDirty;
flushData.stateBaseAddressFullConfigurationNeeded |= checkSbaStateDirty;
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
if (scratchSpaceController->getScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
}
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
}
}
@@ -446,11 +446,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
bool stateBaseAddressDirty = false;
bool checkVfeStateDirty = false;
if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) {
if (ssh && (requiredScratchSize || requiredPrivateScratchSize)) {
scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(),
0u,
requiredScratchSlot0Size,
requiredScratchSlot1Size,
requiredScratchSize,
requiredPrivateScratchSize,
this->taskCount,
*this->osContext,
stateBaseAddressDirty,
@@ -458,11 +458,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (checkVfeStateDirty) {
setMediaVFEStateDirty(true);
}
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
if (scratchSpaceController->getScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
}
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
}
}
@@ -1036,7 +1036,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
PreambleHelper<GfxFamily>::programVfeState(
pVfeState, peekRootDeviceEnvironment(), requiredScratchSlot0Size, getScratchPatchAddress(),
pVfeState, peekRootDeviceEnvironment(), requiredScratchSize, getScratchPatchAddress(),
maxFrontEndThreads, streamProperties);
auto commandOffset = PreambleHelper<GfxFamily>::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState);
@@ -1757,10 +1757,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::reprogramStateBaseAddress(const
uint64_t newGshBase = 0;
gsbaFor32BitProgrammed = false;
if (is64bit && scratchSpaceController->getScratchSpaceSlot0Allocation() && !force32BitAllocations) {
if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) {
newGshBase = scratchSpaceController->calculateNewGSH();
} else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) {
bool useLocalMemory = scratchSpaceController->getScratchSpaceSlot0Allocation() ? scratchSpaceController->getScratchSpaceSlot0Allocation()->isAllocatedInLocalMemoryPool() : false;
bool useLocalMemory = scratchSpaceController->getScratchSpaceAllocation() ? scratchSpaceController->getScratchSpaceAllocation()->isAllocatedInLocalMemoryPool() : false;
newGshBase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex, useLocalMemory);
gsbaFor32BitProgrammed = true;
}
@@ -1950,7 +1950,7 @@ void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushFrontEndCommand(I
auto feStateCmdSpace = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType);
PreambleHelper<GfxFamily>::programVfeState(feStateCmdSpace,
peekRootDeviceEnvironment(),
requiredScratchSlot0Size,
requiredScratchSize,
getScratchPatchAddress(),
device.getDeviceInfo().maxFrontEndThreads,
this->streamProperties);

View File

@@ -24,11 +24,11 @@ ScratchSpaceController::ScratchSpaceController(uint32_t rootDeviceIndex, Executi
}
ScratchSpaceController::~ScratchSpaceController() {
if (scratchSlot0Allocation) {
getMemoryManager()->freeGraphicsMemory(scratchSlot0Allocation);
if (scratchAllocation) {
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
}
if (scratchSlot1Allocation) {
getMemoryManager()->freeGraphicsMemory(scratchSlot1Allocation);
if (privateScratchAllocation) {
getMemoryManager()->freeGraphicsMemory(privateScratchAllocation);
}
}

View File

@@ -34,16 +34,16 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
virtual ~ScratchSpaceController();
MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceSlot0Allocation() {
return scratchSlot0Allocation;
MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceAllocation() {
return scratchAllocation;
}
GraphicsAllocation *getScratchSpaceSlot1Allocation() {
return scratchSlot1Allocation;
GraphicsAllocation *getPrivateScratchSpaceAllocation() {
return privateScratchAllocation;
}
virtual void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -51,25 +51,25 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
virtual uint64_t calculateNewGSH() = 0;
virtual uint64_t getScratchPatchAddress() = 0;
inline uint32_t getPerThreadScratchSpaceSizeSlot0() {
return static_cast<uint32_t>(scratchSlot0SizeInBytes / computeUnitsUsedForScratch);
inline uint32_t getPerThreadScratchSpaceSize() {
return static_cast<uint32_t>(scratchSizeBytes / computeUnitsUsedForScratch);
}
inline uint32_t getPerThreadScratchSizeSlot1() {
return static_cast<uint32_t>(scratchSlot1SizeInBytes / computeUnitsUsedForScratch);
inline uint32_t getPerThreadPrivateScratchSize() {
return static_cast<uint32_t>(privateScratchSizeBytes / computeUnitsUsedForScratch);
}
virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0;
virtual void programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -81,11 +81,11 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
const uint32_t rootDeviceIndex;
ExecutionEnvironment &executionEnvironment;
GraphicsAllocation *scratchSlot0Allocation = nullptr;
GraphicsAllocation *scratchSlot1Allocation = nullptr;
GraphicsAllocation *scratchAllocation = nullptr;
GraphicsAllocation *privateScratchAllocation = nullptr;
InternalAllocationStorage &csrAllocationStorage;
size_t scratchSlot0SizeInBytes = 0;
size_t scratchSlot1SizeInBytes = 0;
size_t scratchSizeBytes = 0;
size_t privateScratchSizeBytes = 0;
bool force32BitAllocation = false;
uint32_t computeUnitsUsedForScratch = 0;
};

View File

@@ -26,19 +26,19 @@ ScratchSpaceControllerBase::ScratchSpaceControllerBase(uint32_t rootDeviceIndex,
void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeSlot0 * computeUnitsUsedForScratch;
if (requiredScratchSizeInBytes && (scratchSlot0SizeInBytes < requiredScratchSizeInBytes)) {
if (scratchSlot0Allocation) {
scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot0Allocation), TEMPORARY_ALLOCATION);
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch;
if (requiredScratchSizeInBytes && (scratchSizeBytes < requiredScratchSizeInBytes)) {
if (scratchAllocation) {
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
}
scratchSlot0SizeInBytes = requiredScratchSizeInBytes;
scratchSizeBytes = requiredScratchSizeInBytes;
createScratchSpaceAllocation();
vfeStateDirty = true;
force32BitAllocation = getMemoryManager()->peekForce32BitAllocations();
@@ -49,14 +49,14 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
}
void ScratchSpaceControllerBase::createScratchSpaceAllocation() {
scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSlot0SizeInBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()});
UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr);
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSizeBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()});
UNRECOVERABLE_IF(scratchAllocation == nullptr);
}
uint64_t ScratchSpaceControllerBase::calculateNewGSH() {
uint64_t gsh = 0;
if (scratchSlot0Allocation) {
gsh = scratchSlot0Allocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
if (scratchAllocation) {
gsh = scratchAllocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
}
return gsh;
}
@@ -65,8 +65,8 @@ uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() {
// for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
uint64_t scratchAddress = 0;
if (scratchSlot0Allocation) {
scratchAddress = scratchSlot0Allocation->getGpuAddressToPatch();
if (scratchAllocation) {
scratchAddress = scratchAllocation->getGpuAddressToPatch();
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
// this is to avoid scractch allocation offset "0"
scratchAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
@@ -85,8 +85,8 @@ void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, Indire
void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
uint32_t offset,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -94,8 +94,8 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
}
void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,

View File

@@ -16,8 +16,8 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -29,15 +29,15 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override;
void programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,

View File

@@ -30,9 +30,9 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t
auto &gfxCoreHelper = environment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>();
singleSurfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
if (debugManager.flags.EnablePrivateScratchSlot1.get() != -1) {
twoSlotScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get();
privateScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get();
}
if (twoSlotScratchSpaceSupported) {
if (privateScratchSpaceSupported) {
ScratchSpaceControllerXeHPAndLater::stateSlotsCount *= 2;
}
}
@@ -40,7 +40,7 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t
void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) {
if (surfaceStateHeap != newSsh) {
surfaceStateHeap = static_cast<char *>(newSsh);
if (scratchSlot0Allocation == nullptr) {
if (scratchAllocation == nullptr) {
cfeDirty = false;
} else {
if (changeId) {
@@ -54,15 +54,15 @@ void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDir
void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t offset,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false);
bool scratchSurfaceDirty = false;
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
vfeStateDirty = true;
updateSlots = true;
@@ -75,7 +75,7 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() {
slotId++;
}
UNRECOVERABLE_IF(slotId >= stateSlotsCount);
UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr && scratchSlot1Allocation == nullptr);
UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr);
void *surfaceStateForScratchAllocation = ptrOffset(static_cast<void *>(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset));
programSurfaceStateAtPtr(surfaceStateForScratchAllocation);
@@ -84,23 +84,23 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() {
void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) {
auto &gfxCoreHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>();
uint64_t scratchAllocationAddress = 0u;
if (scratchSlot0Allocation) {
scratchAllocationAddress = scratchSlot0Allocation->getGpuAddress();
if (scratchAllocation) {
scratchAllocationAddress = scratchAllocation->getGpuAddress();
}
gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0,
perThreadScratchSize, nullptr, false, scratchType, false, true);
if (twoSlotScratchSpaceSupported) {
void *surfaceStateForSlot1Allocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize);
uint64_t scratchSlot1AllocationAddress = 0u;
if (privateScratchSpaceSupported) {
void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize);
uint64_t privateScratchAllocationAddress = 0u;
if (scratchSlot1Allocation) {
scratchSlot1AllocationAddress = scratchSlot1Allocation->getGpuAddress();
if (privateScratchAllocation) {
privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress();
}
gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
surfaceStateForSlot1Allocation, computeUnitsUsedForScratch,
scratchSlot1AllocationAddress, 0, perThreadScratchSpaceSlot1Size, nullptr, false,
surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch,
privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false,
scratchType, false, true);
}
}
@@ -110,7 +110,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::calculateNewGSH() {
}
uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() {
uint64_t scratchAddress = 0u;
if (scratchSlot0Allocation || scratchSlot1Allocation) {
if (scratchAllocation || privateScratchAllocation) {
scratchAddress = static_cast<uint64_t>(getOffsetToSurfaceState(slotId + sshOffset));
}
return scratchAddress;
@@ -118,7 +118,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() {
size_t ScratchSpaceControllerXeHPAndLater::getOffsetToSurfaceState(uint32_t requiredSlotCount) const {
auto offset = requiredSlotCount * singleSurfaceStateSize;
if (twoSlotScratchSpaceSupported) {
if (privateScratchSpaceSupported) {
offset *= 2;
}
return offset;
@@ -131,17 +131,17 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType
}
void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::CommandStreamReceiver *csr) {
bool scratchSurfaceDirty = false;
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (twoSlotScratchSpaceSupported ? 2 : 1), scratchSlot0Allocation, BindlessHeapsHelper::specialSsh);
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::specialSsh);
programSurfaceStateAtPtr(bindlessSS.ssPtr);
vfeStateDirty = true;
}
@@ -150,62 +150,62 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
}
}
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,
bool &vfeStateDirty) {
uint32_t requiredPerThreadScratchSizeSlot0AlignedUp = requiredPerThreadScratchSizeSlot0;
if (!Math::isPow2(requiredPerThreadScratchSizeSlot0AlignedUp)) {
requiredPerThreadScratchSizeSlot0AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot0);
uint32_t requiredPerThreadScratchSizeAlignedUp = requiredPerThreadScratchSize;
if (!Math::isPow2(requiredPerThreadScratchSizeAlignedUp)) {
requiredPerThreadScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSize);
}
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot0AlignedUp) * computeUnitsUsedForScratch;
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
scratchSurfaceDirty = false;
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
if (scratchSlot0SizeInBytes < requiredScratchSizeInBytes) {
if (scratchSlot0Allocation) {
scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot0Allocation), TEMPORARY_ALLOCATION);
if (scratchSizeBytes < requiredScratchSizeInBytes) {
if (scratchAllocation) {
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
}
scratchSurfaceDirty = true;
scratchSlot0SizeInBytes = requiredScratchSizeInBytes;
perThreadScratchSize = requiredPerThreadScratchSizeSlot0AlignedUp;
AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot0SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
scratchSizeBytes = requiredScratchSizeInBytes;
perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp;
AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
if (twoSlotScratchSpaceSupported) {
uint32_t requiredPerThreadScratchSizeSlot1AlignedUp = requiredPerThreadScratchSizeSlot1;
if (!Math::isPow2(requiredPerThreadScratchSizeSlot1AlignedUp)) {
requiredPerThreadScratchSizeSlot1AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot1);
if (privateScratchSpaceSupported) {
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = requiredPerThreadPrivateScratchSize;
if (!Math::isPow2(requiredPerThreadPrivateScratchSizeAlignedUp)) {
requiredPerThreadPrivateScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadPrivateScratchSize);
}
size_t requiredScratchSlot1SizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot1AlignedUp) * computeUnitsUsedForScratch;
if (scratchSlot1SizeInBytes < requiredScratchSlot1SizeInBytes) {
if (scratchSlot1Allocation) {
scratchSlot1Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot1Allocation), TEMPORARY_ALLOCATION);
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
if (privateScratchAllocation) {
privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(privateScratchAllocation), TEMPORARY_ALLOCATION);
}
scratchSlot1SizeInBytes = requiredScratchSlot1SizeInBytes;
perThreadScratchSpaceSlot1Size = requiredPerThreadScratchSizeSlot1AlignedUp;
privateScratchSizeBytes = requiredPrivateScratchSizeInBytes;
perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp;
scratchSurfaceDirty = true;
AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot1SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
scratchSlot1Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, AllocationType::privateSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
}
}
void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
sshOffset = scratchSlot;
updateSlots = false;
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
for (uint32_t i = 1; i < heapContainer.size(); ++i) {
surfaceStateHeap = static_cast<char *>(heapContainer[i]->getUnderlyingBuffer());

View File

@@ -22,8 +22,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -36,15 +36,15 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
void programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -54,8 +54,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
protected:
MOCKABLE_VIRTUAL void programSurfaceState();
MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation);
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -66,13 +66,13 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
bool updateSlots = true;
uint32_t stateSlotsCount = 16;
static const uint32_t scratchType = 6;
bool twoSlotScratchSpaceSupported = true;
bool privateScratchSpaceSupported = true;
char *surfaceStateHeap = nullptr;
size_t singleSurfaceStateSize = 0;
uint32_t slotId = 0;
uint32_t perThreadScratchSize = 0;
uint32_t perThreadScratchSpaceSlot1Size = 0;
uint32_t perThreadPrivateScratchSize = 0;
uint32_t sshOffset = 0;
SurfaceStateInHeapInfo bindlessSS = {};
};