Program private 2 scratch in L0

Related-To: NEO-5427
Signed-off-by: Kamil Diedrich <kamil.diedrich@intel.com>
This commit is contained in:
Kamil Diedrich
2021-12-17 03:18:45 +00:00
committed by Compute-Runtime-Automation
parent ab4640635d
commit 6940fbf387
10 changed files with 399 additions and 13 deletions

View File

@@ -48,7 +48,8 @@ struct CommandQueueHw : public CommandQueueImp {
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize);
uint32_t perThreadScratchSpaceSize,
uint32_t perThreadPrivateScratchSize);
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress);

View File

@@ -157,6 +157,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
size_t totalCmdBuffers = 0;
uint32_t perThreadScratchSpaceSize = 0;
uint32_t perThreadPrivateScratchSize = 0;
NEO::PageFaultManager *pageFaultManager = nullptr;
if (performMigration) {
pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
@@ -188,11 +189,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
statePreemption = commandListPreemption;
}
if (perThreadScratchSpaceSize < commandList->getCommandListPerThreadScratchSize()) {
perThreadScratchSpaceSize = commandList->getCommandListPerThreadScratchSize();
}
perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
if (commandList->getCommandListPerThreadScratchSize() != 0) {
perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
@@ -237,7 +238,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
handleScratchSpace(heapContainer,
scratchSpaceController,
gsbaStateDirty, frontEndStateDirty,
perThreadScratchSpaceSize);
perThreadScratchSpaceSize, perThreadPrivateScratchSize);
auto &streamProperties = csr->getStreamProperties();
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);

View File

@@ -107,7 +107,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) {
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
if (perThreadScratchSpaceSize > 0) {
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),

View File

@@ -111,19 +111,27 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHeaps,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) {
if (perThreadScratchSpaceSize > 0) {
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) {
if (sshHeaps.size() > 0) {
uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u;
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
}
if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {
scratchController->programBindlessSurfaceStateForScratch(device->getNEODevice()->getBindlessHeapsHelper(), perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
scratchController->programBindlessSurfaceStateForScratch(device->getNEODevice()->getBindlessHeapsHelper(), perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState, csr);
}
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
csr->makeResident(*scratchAllocation);
if (scratchAllocation != nullptr) {
csr->makeResident(*scratchAllocation);
}
auto privateScratchAllocation = scratchController->getPrivateScratchSpaceAllocation();
if (privateScratchAllocation != nullptr) {
csr->makeResident(*privateScratchAllocation);
}
}
}