mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
Revert "refactor: create new members for storing spill and private memory in ...
This reverts commit 87eb5f554a.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b34e8646ac
commit
f9f9035b95
@@ -566,17 +566,17 @@ FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {
|
||||
return flushStamp->peekStamp();
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredScratchSlot1Size) {
|
||||
if (newRequiredScratchSlot0Size > requiredScratchSlot0Size) {
|
||||
requiredScratchSlot0Size = newRequiredScratchSlot0Size;
|
||||
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) {
|
||||
if (newRequiredScratchSize > requiredScratchSize) {
|
||||
requiredScratchSize = newRequiredScratchSize;
|
||||
}
|
||||
if (newRequiredScratchSlot1Size > requiredScratchSlot1Size) {
|
||||
requiredScratchSlot1Size = newRequiredScratchSlot1Size;
|
||||
if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) {
|
||||
requiredPrivateScratchSize = newRequiredPrivateScratchSize;
|
||||
}
|
||||
}
|
||||
|
||||
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {
|
||||
return scratchSpaceController->getScratchSpaceSlot0Allocation();
|
||||
return scratchSpaceController->getScratchSpaceAllocation();
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) {
|
||||
|
||||
@@ -179,7 +179,7 @@ class CommandStreamReceiver {
|
||||
bool getBtdCommandDirty() const { return btdCommandDirty; }
|
||||
bool isRayTracingStateProgramingNeeded(Device &device) const;
|
||||
|
||||
void setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredPrivateScratchSlot1Size);
|
||||
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
|
||||
GraphicsAllocation *getScratchAllocation();
|
||||
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
|
||||
GraphicsAllocation *allocateDebugSurface(size_t size);
|
||||
@@ -534,8 +534,8 @@ class CommandStreamReceiver {
|
||||
uint32_t latestSentStatelessMocsConfig = 0;
|
||||
uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount;
|
||||
|
||||
uint32_t requiredScratchSlot0Size = 0;
|
||||
uint32_t requiredScratchSlot1Size = 0;
|
||||
uint32_t requiredScratchSize = 0;
|
||||
uint32_t requiredPrivateScratchSize = 0;
|
||||
uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::notSet;
|
||||
KernelExecutionType lastKernelExecutionType = KernelExecutionType::defaultType;
|
||||
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::notApplicable;
|
||||
|
||||
@@ -269,13 +269,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
||||
flushData.stateComputeModeFullConfigurationNeeded = getStateComputeModeDirty();
|
||||
flushData.stateBaseAddressFullConfigurationNeeded = getGSBAStateDirty();
|
||||
|
||||
if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSlot0Size > 0 || this->requiredScratchSlot1Size > 0)) {
|
||||
if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSize > 0 || this->requiredPrivateScratchSize > 0)) {
|
||||
bool checkFeStateDirty = false;
|
||||
bool checkSbaStateDirty = false;
|
||||
scratchSpaceController->setRequiredScratchSpace(dispatchFlags.sshCpuBase,
|
||||
0u,
|
||||
this->requiredScratchSlot0Size,
|
||||
this->requiredScratchSlot1Size,
|
||||
this->requiredScratchSize,
|
||||
this->requiredPrivateScratchSize,
|
||||
this->taskCount,
|
||||
*this->osContext,
|
||||
checkSbaStateDirty,
|
||||
@@ -283,11 +283,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
||||
flushData.frontEndFullConfigurationNeeded |= checkFeStateDirty;
|
||||
flushData.stateBaseAddressFullConfigurationNeeded |= checkSbaStateDirty;
|
||||
|
||||
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
|
||||
if (scratchSpaceController->getScratchSpaceAllocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
|
||||
}
|
||||
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
|
||||
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
|
||||
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,11 +446,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
bool stateBaseAddressDirty = false;
|
||||
|
||||
bool checkVfeStateDirty = false;
|
||||
if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) {
|
||||
if (ssh && (requiredScratchSize || requiredPrivateScratchSize)) {
|
||||
scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(),
|
||||
0u,
|
||||
requiredScratchSlot0Size,
|
||||
requiredScratchSlot1Size,
|
||||
requiredScratchSize,
|
||||
requiredPrivateScratchSize,
|
||||
this->taskCount,
|
||||
*this->osContext,
|
||||
stateBaseAddressDirty,
|
||||
@@ -458,11 +458,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (checkVfeStateDirty) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
|
||||
if (scratchSpaceController->getScratchSpaceAllocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
|
||||
}
|
||||
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
|
||||
makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
|
||||
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
|
||||
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1036,7 +1036,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
|
||||
auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
|
||||
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
|
||||
PreambleHelper<GfxFamily>::programVfeState(
|
||||
pVfeState, peekRootDeviceEnvironment(), requiredScratchSlot0Size, getScratchPatchAddress(),
|
||||
pVfeState, peekRootDeviceEnvironment(), requiredScratchSize, getScratchPatchAddress(),
|
||||
maxFrontEndThreads, streamProperties);
|
||||
auto commandOffset = PreambleHelper<GfxFamily>::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState);
|
||||
|
||||
@@ -1757,10 +1757,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::reprogramStateBaseAddress(const
|
||||
|
||||
uint64_t newGshBase = 0;
|
||||
gsbaFor32BitProgrammed = false;
|
||||
if (is64bit && scratchSpaceController->getScratchSpaceSlot0Allocation() && !force32BitAllocations) {
|
||||
if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) {
|
||||
newGshBase = scratchSpaceController->calculateNewGSH();
|
||||
} else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) {
|
||||
bool useLocalMemory = scratchSpaceController->getScratchSpaceSlot0Allocation() ? scratchSpaceController->getScratchSpaceSlot0Allocation()->isAllocatedInLocalMemoryPool() : false;
|
||||
bool useLocalMemory = scratchSpaceController->getScratchSpaceAllocation() ? scratchSpaceController->getScratchSpaceAllocation()->isAllocatedInLocalMemoryPool() : false;
|
||||
newGshBase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex, useLocalMemory);
|
||||
gsbaFor32BitProgrammed = true;
|
||||
}
|
||||
@@ -1950,7 +1950,7 @@ void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushFrontEndCommand(I
|
||||
auto feStateCmdSpace = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType);
|
||||
PreambleHelper<GfxFamily>::programVfeState(feStateCmdSpace,
|
||||
peekRootDeviceEnvironment(),
|
||||
requiredScratchSlot0Size,
|
||||
requiredScratchSize,
|
||||
getScratchPatchAddress(),
|
||||
device.getDeviceInfo().maxFrontEndThreads,
|
||||
this->streamProperties);
|
||||
|
||||
@@ -24,11 +24,11 @@ ScratchSpaceController::ScratchSpaceController(uint32_t rootDeviceIndex, Executi
|
||||
}
|
||||
|
||||
ScratchSpaceController::~ScratchSpaceController() {
|
||||
if (scratchSlot0Allocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(scratchSlot0Allocation);
|
||||
if (scratchAllocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
|
||||
}
|
||||
if (scratchSlot1Allocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(scratchSlot1Allocation);
|
||||
if (privateScratchAllocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(privateScratchAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,16 +34,16 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
|
||||
ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
|
||||
virtual ~ScratchSpaceController();
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceSlot0Allocation() {
|
||||
return scratchSlot0Allocation;
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceAllocation() {
|
||||
return scratchAllocation;
|
||||
}
|
||||
GraphicsAllocation *getScratchSpaceSlot1Allocation() {
|
||||
return scratchSlot1Allocation;
|
||||
GraphicsAllocation *getPrivateScratchSpaceAllocation() {
|
||||
return privateScratchAllocation;
|
||||
}
|
||||
virtual void setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -51,25 +51,25 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
|
||||
|
||||
virtual uint64_t calculateNewGSH() = 0;
|
||||
virtual uint64_t getScratchPatchAddress() = 0;
|
||||
inline uint32_t getPerThreadScratchSpaceSizeSlot0() {
|
||||
return static_cast<uint32_t>(scratchSlot0SizeInBytes / computeUnitsUsedForScratch);
|
||||
inline uint32_t getPerThreadScratchSpaceSize() {
|
||||
return static_cast<uint32_t>(scratchSizeBytes / computeUnitsUsedForScratch);
|
||||
}
|
||||
inline uint32_t getPerThreadScratchSizeSlot1() {
|
||||
return static_cast<uint32_t>(scratchSlot1SizeInBytes / computeUnitsUsedForScratch);
|
||||
inline uint32_t getPerThreadPrivateScratchSize() {
|
||||
return static_cast<uint32_t>(privateScratchSizeBytes / computeUnitsUsedForScratch);
|
||||
}
|
||||
|
||||
virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0;
|
||||
virtual void programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) = 0;
|
||||
virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -81,11 +81,11 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
ExecutionEnvironment &executionEnvironment;
|
||||
GraphicsAllocation *scratchSlot0Allocation = nullptr;
|
||||
GraphicsAllocation *scratchSlot1Allocation = nullptr;
|
||||
GraphicsAllocation *scratchAllocation = nullptr;
|
||||
GraphicsAllocation *privateScratchAllocation = nullptr;
|
||||
InternalAllocationStorage &csrAllocationStorage;
|
||||
size_t scratchSlot0SizeInBytes = 0;
|
||||
size_t scratchSlot1SizeInBytes = 0;
|
||||
size_t scratchSizeBytes = 0;
|
||||
size_t privateScratchSizeBytes = 0;
|
||||
bool force32BitAllocation = false;
|
||||
uint32_t computeUnitsUsedForScratch = 0;
|
||||
};
|
||||
|
||||
@@ -26,19 +26,19 @@ ScratchSpaceControllerBase::ScratchSpaceControllerBase(uint32_t rootDeviceIndex,
|
||||
|
||||
void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) {
|
||||
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeSlot0 * computeUnitsUsedForScratch;
|
||||
if (requiredScratchSizeInBytes && (scratchSlot0SizeInBytes < requiredScratchSizeInBytes)) {
|
||||
if (scratchSlot0Allocation) {
|
||||
scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot0Allocation), TEMPORARY_ALLOCATION);
|
||||
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch;
|
||||
if (requiredScratchSizeInBytes && (scratchSizeBytes < requiredScratchSizeInBytes)) {
|
||||
if (scratchAllocation) {
|
||||
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
|
||||
}
|
||||
scratchSlot0SizeInBytes = requiredScratchSizeInBytes;
|
||||
scratchSizeBytes = requiredScratchSizeInBytes;
|
||||
createScratchSpaceAllocation();
|
||||
vfeStateDirty = true;
|
||||
force32BitAllocation = getMemoryManager()->peekForce32BitAllocations();
|
||||
@@ -49,14 +49,14 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerBase::createScratchSpaceAllocation() {
|
||||
scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSlot0SizeInBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()});
|
||||
UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr);
|
||||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSizeBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()});
|
||||
UNRECOVERABLE_IF(scratchAllocation == nullptr);
|
||||
}
|
||||
|
||||
uint64_t ScratchSpaceControllerBase::calculateNewGSH() {
|
||||
uint64_t gsh = 0;
|
||||
if (scratchSlot0Allocation) {
|
||||
gsh = scratchSlot0Allocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
|
||||
if (scratchAllocation) {
|
||||
gsh = scratchAllocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
|
||||
}
|
||||
return gsh;
|
||||
}
|
||||
@@ -65,8 +65,8 @@ uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() {
|
||||
// for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
|
||||
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
|
||||
uint64_t scratchAddress = 0;
|
||||
if (scratchSlot0Allocation) {
|
||||
scratchAddress = scratchSlot0Allocation->getGpuAddressToPatch();
|
||||
if (scratchAllocation) {
|
||||
scratchAddress = scratchAllocation->getGpuAddressToPatch();
|
||||
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
|
||||
// this is to avoid scractch allocation offset "0"
|
||||
scratchAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit;
|
||||
@@ -85,8 +85,8 @@ void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, Indire
|
||||
|
||||
void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t offset,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -94,8 +94,8 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
|
||||
@@ -16,8 +16,8 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
|
||||
|
||||
void setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -29,15 +29,15 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
|
||||
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override;
|
||||
void programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) override;
|
||||
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
|
||||
@@ -30,9 +30,9 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t
|
||||
auto &gfxCoreHelper = environment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>();
|
||||
singleSurfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
if (debugManager.flags.EnablePrivateScratchSlot1.get() != -1) {
|
||||
twoSlotScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get();
|
||||
privateScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get();
|
||||
}
|
||||
if (twoSlotScratchSpaceSupported) {
|
||||
if (privateScratchSpaceSupported) {
|
||||
ScratchSpaceControllerXeHPAndLater::stateSlotsCount *= 2;
|
||||
}
|
||||
}
|
||||
@@ -40,7 +40,7 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t
|
||||
void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) {
|
||||
if (surfaceStateHeap != newSsh) {
|
||||
surfaceStateHeap = static_cast<char *>(newSsh);
|
||||
if (scratchSlot0Allocation == nullptr) {
|
||||
if (scratchAllocation == nullptr) {
|
||||
cfeDirty = false;
|
||||
} else {
|
||||
if (changeId) {
|
||||
@@ -54,15 +54,15 @@ void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDir
|
||||
|
||||
void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t offset,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) {
|
||||
setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false);
|
||||
bool scratchSurfaceDirty = false;
|
||||
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
|
||||
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
|
||||
if (scratchSurfaceDirty) {
|
||||
vfeStateDirty = true;
|
||||
updateSlots = true;
|
||||
@@ -75,7 +75,7 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() {
|
||||
slotId++;
|
||||
}
|
||||
UNRECOVERABLE_IF(slotId >= stateSlotsCount);
|
||||
UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr && scratchSlot1Allocation == nullptr);
|
||||
UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr);
|
||||
|
||||
void *surfaceStateForScratchAllocation = ptrOffset(static_cast<void *>(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset));
|
||||
programSurfaceStateAtPtr(surfaceStateForScratchAllocation);
|
||||
@@ -84,23 +84,23 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() {
|
||||
void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) {
|
||||
auto &gfxCoreHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>();
|
||||
uint64_t scratchAllocationAddress = 0u;
|
||||
if (scratchSlot0Allocation) {
|
||||
scratchAllocationAddress = scratchSlot0Allocation->getGpuAddress();
|
||||
if (scratchAllocation) {
|
||||
scratchAllocationAddress = scratchAllocation->getGpuAddress();
|
||||
}
|
||||
gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
|
||||
surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0,
|
||||
perThreadScratchSize, nullptr, false, scratchType, false, true);
|
||||
|
||||
if (twoSlotScratchSpaceSupported) {
|
||||
void *surfaceStateForSlot1Allocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize);
|
||||
uint64_t scratchSlot1AllocationAddress = 0u;
|
||||
if (privateScratchSpaceSupported) {
|
||||
void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize);
|
||||
uint64_t privateScratchAllocationAddress = 0u;
|
||||
|
||||
if (scratchSlot1Allocation) {
|
||||
scratchSlot1AllocationAddress = scratchSlot1Allocation->getGpuAddress();
|
||||
if (privateScratchAllocation) {
|
||||
privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress();
|
||||
}
|
||||
gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
|
||||
surfaceStateForSlot1Allocation, computeUnitsUsedForScratch,
|
||||
scratchSlot1AllocationAddress, 0, perThreadScratchSpaceSlot1Size, nullptr, false,
|
||||
surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch,
|
||||
privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false,
|
||||
scratchType, false, true);
|
||||
}
|
||||
}
|
||||
@@ -110,7 +110,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::calculateNewGSH() {
|
||||
}
|
||||
uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() {
|
||||
uint64_t scratchAddress = 0u;
|
||||
if (scratchSlot0Allocation || scratchSlot1Allocation) {
|
||||
if (scratchAllocation || privateScratchAllocation) {
|
||||
scratchAddress = static_cast<uint64_t>(getOffsetToSurfaceState(slotId + sshOffset));
|
||||
}
|
||||
return scratchAddress;
|
||||
@@ -118,7 +118,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() {
|
||||
|
||||
size_t ScratchSpaceControllerXeHPAndLater::getOffsetToSurfaceState(uint32_t requiredSlotCount) const {
|
||||
auto offset = requiredSlotCount * singleSurfaceStateSize;
|
||||
if (twoSlotScratchSpaceSupported) {
|
||||
if (privateScratchSpaceSupported) {
|
||||
offset *= 2;
|
||||
}
|
||||
return offset;
|
||||
@@ -131,17 +131,17 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
NEO::CommandStreamReceiver *csr) {
|
||||
bool scratchSurfaceDirty = false;
|
||||
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
|
||||
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
|
||||
if (scratchSurfaceDirty) {
|
||||
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (twoSlotScratchSpaceSupported ? 2 : 1), scratchSlot0Allocation, BindlessHeapsHelper::specialSsh);
|
||||
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::specialSsh);
|
||||
programSurfaceStateAtPtr(bindlessSS.ssPtr);
|
||||
vfeStateDirty = true;
|
||||
}
|
||||
@@ -150,62 +150,62 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
|
||||
}
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &scratchSurfaceDirty,
|
||||
bool &vfeStateDirty) {
|
||||
uint32_t requiredPerThreadScratchSizeSlot0AlignedUp = requiredPerThreadScratchSizeSlot0;
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeSlot0AlignedUp)) {
|
||||
requiredPerThreadScratchSizeSlot0AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot0);
|
||||
uint32_t requiredPerThreadScratchSizeAlignedUp = requiredPerThreadScratchSize;
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeAlignedUp)) {
|
||||
requiredPerThreadScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSize);
|
||||
}
|
||||
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot0AlignedUp) * computeUnitsUsedForScratch;
|
||||
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||
scratchSurfaceDirty = false;
|
||||
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
|
||||
if (scratchSlot0SizeInBytes < requiredScratchSizeInBytes) {
|
||||
if (scratchSlot0Allocation) {
|
||||
scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot0Allocation), TEMPORARY_ALLOCATION);
|
||||
if (scratchSizeBytes < requiredScratchSizeInBytes) {
|
||||
if (scratchAllocation) {
|
||||
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
|
||||
}
|
||||
scratchSurfaceDirty = true;
|
||||
scratchSlot0SizeInBytes = requiredScratchSizeInBytes;
|
||||
perThreadScratchSize = requiredPerThreadScratchSizeSlot0AlignedUp;
|
||||
AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot0SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
|
||||
scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
scratchSizeBytes = requiredScratchSizeInBytes;
|
||||
perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp;
|
||||
AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
|
||||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
}
|
||||
if (twoSlotScratchSpaceSupported) {
|
||||
uint32_t requiredPerThreadScratchSizeSlot1AlignedUp = requiredPerThreadScratchSizeSlot1;
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeSlot1AlignedUp)) {
|
||||
requiredPerThreadScratchSizeSlot1AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot1);
|
||||
if (privateScratchSpaceSupported) {
|
||||
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = requiredPerThreadPrivateScratchSize;
|
||||
if (!Math::isPow2(requiredPerThreadPrivateScratchSizeAlignedUp)) {
|
||||
requiredPerThreadPrivateScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadPrivateScratchSize);
|
||||
}
|
||||
size_t requiredScratchSlot1SizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot1AlignedUp) * computeUnitsUsedForScratch;
|
||||
if (scratchSlot1SizeInBytes < requiredScratchSlot1SizeInBytes) {
|
||||
if (scratchSlot1Allocation) {
|
||||
scratchSlot1Allocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchSlot1Allocation), TEMPORARY_ALLOCATION);
|
||||
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
|
||||
if (privateScratchAllocation) {
|
||||
privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(privateScratchAllocation), TEMPORARY_ALLOCATION);
|
||||
}
|
||||
scratchSlot1SizeInBytes = requiredScratchSlot1SizeInBytes;
|
||||
perThreadScratchSpaceSlot1Size = requiredPerThreadScratchSizeSlot1AlignedUp;
|
||||
privateScratchSizeBytes = requiredPrivateScratchSizeInBytes;
|
||||
perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp;
|
||||
scratchSurfaceDirty = true;
|
||||
AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot1SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
|
||||
scratchSlot1Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, AllocationType::privateSurface, multiTileCapable, false, osContext.getDeviceBitfield()};
|
||||
privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) {
|
||||
sshOffset = scratchSlot;
|
||||
updateSlots = false;
|
||||
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
|
||||
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
|
||||
|
||||
for (uint32_t i = 1; i < heapContainer.size(); ++i) {
|
||||
surfaceStateHeap = static_cast<char *>(heapContainer[i]->getUnderlyingBuffer());
|
||||
|
||||
@@ -22,8 +22,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
|
||||
|
||||
void setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -36,15 +36,15 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
|
||||
|
||||
void programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) override;
|
||||
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -54,8 +54,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void programSurfaceState();
|
||||
MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation);
|
||||
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -66,13 +66,13 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
|
||||
bool updateSlots = true;
|
||||
uint32_t stateSlotsCount = 16;
|
||||
static const uint32_t scratchType = 6;
|
||||
bool twoSlotScratchSpaceSupported = true;
|
||||
bool privateScratchSpaceSupported = true;
|
||||
|
||||
char *surfaceStateHeap = nullptr;
|
||||
size_t singleSurfaceStateSize = 0;
|
||||
uint32_t slotId = 0;
|
||||
uint32_t perThreadScratchSize = 0;
|
||||
uint32_t perThreadScratchSpaceSlot1Size = 0;
|
||||
uint32_t perThreadPrivateScratchSize = 0;
|
||||
uint32_t sshOffset = 0;
|
||||
SurfaceStateInHeapInfo bindlessSS = {};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user