mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
refactor: change queue interfaces to provide different scratch controller
Related-To: NEO-10381 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
96abe38c6d
commit
94cf31033c
@@ -52,6 +52,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSlot0Size,
|
||||
uint32_t perThreadScratchSpaceSlot1Size);
|
||||
@@ -68,6 +69,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
uint32_t numCommandLists,
|
||||
NEO::PreemptionMode contextPreemptionMode,
|
||||
Device *device,
|
||||
NEO::ScratchSpaceController *scratchSpaceController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool debugEnabled,
|
||||
bool programActivePartitionConfig,
|
||||
bool performMigration,
|
||||
@@ -83,6 +86,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
CommandList *firstCommandList = nullptr;
|
||||
CommandList *lastCommandList = nullptr;
|
||||
void *currentPatchForChainedBbStart = nullptr;
|
||||
NEO::ScratchSpaceController *scratchSpaceController = nullptr;
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation = nullptr;
|
||||
|
||||
NEO::PreemptionMode preemptionMode{};
|
||||
NEO::PreemptionMode statePreemption{};
|
||||
@@ -109,14 +114,15 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
bool hasIndirectAccess{};
|
||||
bool rtDispatchRequired = false;
|
||||
bool globalInit = false;
|
||||
bool lockScratchController = false;
|
||||
};
|
||||
|
||||
ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *commandListHandles,
|
||||
ze_fence_handle_t hFence,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents);
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *commandListHandles,
|
||||
ze_fence_handle_t hFence,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents);
|
||||
inline ze_result_t executeCommandListsCopyOnly(CommandListExecutionContext &ctx,
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
@@ -144,7 +150,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
|
||||
inline void getGlobalFenceAndMakeItResident();
|
||||
inline void getWorkPartitionAndMakeItResident();
|
||||
inline void getGlobalStatelessHeapAndMakeItResident();
|
||||
inline void getGlobalStatelessHeapAndMakeItResident(CommandListExecutionContext &ctx);
|
||||
inline void getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);
|
||||
inline void makeSbaTrackingBufferResidentIfL0DebuggerEnabled(bool isDebugEnabled);
|
||||
inline void programCommandQueueDebugCmdsForSourceLevelOrL0DebuggerIfEnabled(bool isDebugEnabled, NEO::LinearStream &commandStream);
|
||||
|
||||
@@ -69,24 +69,35 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
auto ret = ZE_RESULT_SUCCESS;
|
||||
|
||||
auto lockCSR = this->csr->obtainUniqueOwnership();
|
||||
auto neoDevice = device->getNEODevice();
|
||||
|
||||
if (NEO::ApiSpecificConfig::isSharedAllocPrefetchEnabled()) {
|
||||
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
|
||||
svmAllocMgr->prefetchSVMAllocs(*device->getNEODevice(), *csr);
|
||||
svmAllocMgr->prefetchSVMAllocs(*neoDevice, *csr);
|
||||
}
|
||||
|
||||
registerCsrClient();
|
||||
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto scratchController = this->csr->getScratchSpaceController();
|
||||
auto globalStatelessHeapAllocation = this->csr->getGlobalStatelessHeapAllocation();
|
||||
bool lockScratchController = false;
|
||||
if (this->heaplessModeEnabled) {
|
||||
scratchController = neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController();
|
||||
globalStatelessHeapAllocation = neoDevice->getDefaultEngine().commandStreamReceiver->getGlobalStatelessHeapAllocation();
|
||||
lockScratchController = scratchController != this->csr->getScratchSpaceController();
|
||||
}
|
||||
auto ctx = CommandListExecutionContext{phCommandLists,
|
||||
numCommandLists,
|
||||
this->isCopyOnlyCommandQueue ? NEO::PreemptionMode::Disabled : csr->getPreemptionMode(),
|
||||
device,
|
||||
scratchController,
|
||||
globalStatelessHeapAllocation,
|
||||
NEO::Debugger::isDebugEnabled(internalUsage),
|
||||
csr->isProgramActivePartitionConfigRequired(),
|
||||
performMigration,
|
||||
csr->getSipSentFlag()};
|
||||
ctx.globalInit |= ctx.isDebugEnabled && !this->commandQueueDebugCmdsProgrammed && device->getL0Debugger();
|
||||
ctx.lockScratchController = lockScratchController;
|
||||
|
||||
this->startingCmdBuffer = &this->commandStream;
|
||||
this->device->activateMetricGroups();
|
||||
@@ -160,7 +171,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
|
||||
this->getGlobalFenceAndMakeItResident();
|
||||
this->getWorkPartitionAndMakeItResident();
|
||||
this->getGlobalStatelessHeapAndMakeItResident();
|
||||
this->getGlobalStatelessHeapAndMakeItResident(ctx);
|
||||
this->makePreemptionAllocationResidentForModeMidThread(ctx.isDevicePreemptionModeMidThread);
|
||||
this->makeSipIsaResidentIfSipKernelUsed(ctx);
|
||||
this->makeDebugSurfaceResidentIfNEODebuggerActive(ctx.isNEODebuggerActive(this->device));
|
||||
@@ -473,10 +484,14 @@ CommandQueueHw<gfxCoreFamily>::CommandListExecutionContext::CommandListExecution
|
||||
uint32_t numCommandLists,
|
||||
NEO::PreemptionMode contextPreemptionMode,
|
||||
Device *device,
|
||||
NEO::ScratchSpaceController *scratchSpaceController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool debugEnabled,
|
||||
bool programActivePartitionConfig,
|
||||
bool performMigration,
|
||||
bool sipSent) : preemptionMode{contextPreemptionMode},
|
||||
bool sipSent) : scratchSpaceController(scratchSpaceController),
|
||||
globalStatelessAllocation(globalStatelessAllocation),
|
||||
preemptionMode{contextPreemptionMode},
|
||||
statePreemption{contextPreemptionMode},
|
||||
isPreemptionModeInitial{contextPreemptionMode == NEO::PreemptionMode::Initial},
|
||||
isDebugEnabled{debugEnabled},
|
||||
@@ -689,13 +704,17 @@ void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecut
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx) {
|
||||
auto scratchController = this->csr->getScratchSpaceController();
|
||||
std::unique_lock<NEO::CommandStreamReceiver::MutexType> defaultCsrLock;
|
||||
if (ctx.lockScratchController) {
|
||||
defaultCsrLock = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
|
||||
}
|
||||
handleScratchSpace(this->heapContainer,
|
||||
scratchController,
|
||||
ctx.scratchSpaceController,
|
||||
ctx.globalStatelessAllocation,
|
||||
ctx.gsbaStateDirty, ctx.frontEndStateDirty,
|
||||
ctx.perThreadScratchSpaceSlot0Size, ctx.perThreadScratchSpaceSlot1Size);
|
||||
ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty();
|
||||
ctx.scratchGsba = scratchController->calculateNewGSH();
|
||||
ctx.scratchGsba = ctx.scratchSpaceController->calculateNewGSH();
|
||||
|
||||
ctx.globalInit |= ctx.gsbaStateDirty;
|
||||
}
|
||||
@@ -814,10 +833,9 @@ void CommandQueueHw<gfxCoreFamily>::getWorkPartitionAndMakeItResident() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::getGlobalStatelessHeapAndMakeItResident() {
|
||||
const auto globalStatelessAllocation = this->csr->getGlobalStatelessHeapAllocation();
|
||||
if (globalStatelessAllocation) {
|
||||
this->csr->makeResident(*globalStatelessAllocation);
|
||||
void CommandQueueHw<gfxCoreFamily>::getGlobalStatelessHeapAndMakeItResident(CommandListExecutionContext &ctx) {
|
||||
if (ctx.globalStatelessAllocation) {
|
||||
this->csr->makeResident(*ctx.globalStatelessAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -120,6 +120,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
|
||||
|
||||
|
||||
@@ -135,12 +135,12 @@ constexpr uint32_t maxPtssIndex = 15u;
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHeaps,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
|
||||
if (perThreadScratchSpaceSlot0Size > 0 || perThreadScratchSpaceSlot1Size > 0) {
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation();
|
||||
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
|
||||
scratchController->setRequiredScratchSpace(globalStatelessAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
if (sshHeaps.size() > 0) {
|
||||
|
||||
@@ -78,6 +78,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
using L0::CommandQueue::dispatchCmdListBatchBufferAsPrimary;
|
||||
using L0::CommandQueue::doubleSbaWa;
|
||||
using L0::CommandQueue::frontEndStateTracking;
|
||||
using L0::CommandQueue::heaplessModeEnabled;
|
||||
using L0::CommandQueue::internalQueueForImmediateCommandList;
|
||||
using L0::CommandQueue::internalUsage;
|
||||
using L0::CommandQueue::partitionCount;
|
||||
@@ -114,11 +115,26 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
return BaseClass::submitBatchBuffer(offset, residencyContainer, endingCmdPtr, isCooperative);
|
||||
}
|
||||
|
||||
ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *commandListHandles,
|
||||
ze_fence_handle_t hFence,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override {
|
||||
recordedGlobalStatelessAllocation = ctx.globalStatelessAllocation;
|
||||
recordedScratchController = ctx.scratchSpaceController;
|
||||
recordedLockScratchController = ctx.lockScratchController;
|
||||
return BaseClass::executeCommandListsRegular(ctx, numCommandLists, commandListHandles, hFence, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *recordedGlobalStatelessAllocation = nullptr;
|
||||
NEO::ScratchSpaceController *recordedScratchController = nullptr;
|
||||
uint32_t synchronizedCalled = 0;
|
||||
NEO::ResidencyContainer residencyContainerSnapshot;
|
||||
ze_result_t synchronizeReturnValue{ZE_RESULT_SUCCESS};
|
||||
std::optional<NEO::WaitStatus> reserveLinearStreamSizeReturnValue{};
|
||||
std::optional<NEO::SubmissionStatus> submitBatchBufferReturnValue{};
|
||||
bool recordedLockScratchController = false;
|
||||
};
|
||||
|
||||
struct Deleter {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
@@ -2784,5 +2786,37 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
EXPECT_EQ(nullptr, ssh);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
givenCommandQueueUsingGlobalStatelessWhenQueueInHeaplessModeThenUsingScratchControllerAndHeapAllocationFromDefaultEngine,
|
||||
IsAtLeastXeHpCore) {
|
||||
auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
defaultCsr->createGlobalStatelessHeap();
|
||||
|
||||
auto otherCsr = std::unique_ptr<UltCommandStreamReceiver<FamilyType>>(static_cast<UltCommandStreamReceiver<FamilyType> *>(createCommandStream(*device->getNEODevice()->getExecutionEnvironment(), 0, 1)));
|
||||
|
||||
otherCsr->setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
otherCsr->initializeResources();
|
||||
otherCsr->initializeTagAllocation();
|
||||
otherCsr->createGlobalFenceAllocation();
|
||||
otherCsr->createPreemptionAllocation();
|
||||
otherCsr->createGlobalStatelessHeap();
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
auto otherCommandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, otherCsr.get(), &desc);
|
||||
otherCommandQueue->initialize(false, false, false);
|
||||
otherCommandQueue->heaplessModeEnabled = true;
|
||||
|
||||
commandList->close();
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
|
||||
auto result = otherCommandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(defaultCsr->getScratchSpaceController(), otherCommandQueue->recordedScratchController);
|
||||
EXPECT_EQ(defaultCsr->getGlobalStatelessHeapAllocation(), otherCommandQueue->recordedGlobalStatelessAllocation);
|
||||
EXPECT_TRUE(otherCommandQueue->recordedLockScratchController);
|
||||
otherCommandQueue->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1110,6 +1110,7 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
NEO::HeapContainer mockHeapContainer;
|
||||
void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
NEO::GraphicsAllocation *globalStatelessAllocation,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSlot0Size,
|
||||
uint32_t perThreadScratchSpaceSlot1Size) override {
|
||||
|
||||
@@ -879,7 +879,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
|
||||
|
||||
auto scratch = static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get());
|
||||
scratch->scratchAllocation = &graphicsAllocation;
|
||||
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
|
||||
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), nullptr, gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
|
||||
|
||||
EXPECT_TRUE(scratch->programHeapsCalled);
|
||||
EXPECT_GT(csr.makeResidentCalledTimes, 0u);
|
||||
@@ -933,7 +933,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
|
||||
|
||||
auto scratch = static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get());
|
||||
scratch->scratchSlot0Allocation = &graphicsAllocation;
|
||||
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
|
||||
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), nullptr, gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
|
||||
|
||||
EXPECT_FALSE(scratch->programHeapsCalled);
|
||||
scratch->scratchSlot0Allocation = nullptr;
|
||||
|
||||
@@ -818,6 +818,8 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWithIndirectAccessWhenExecuti
|
||||
1,
|
||||
csr->getPreemptionMode(),
|
||||
device,
|
||||
csr->getScratchSpaceController(),
|
||||
csr->getGlobalStatelessHeapAllocation(),
|
||||
false,
|
||||
csr->isProgramActivePartitionConfigRequired(),
|
||||
false,
|
||||
@@ -844,6 +846,8 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWitNohIndirectAccessWhenExecu
|
||||
1,
|
||||
csr->getPreemptionMode(),
|
||||
device,
|
||||
csr->getScratchSpaceController(),
|
||||
csr->getGlobalStatelessHeapAllocation(),
|
||||
false,
|
||||
csr->isProgramActivePartitionConfigRequired(),
|
||||
false,
|
||||
@@ -871,6 +875,8 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCommandQueueWhenHandleIndirectAl
|
||||
1,
|
||||
csr->getPreemptionMode(),
|
||||
device,
|
||||
csr->getScratchSpaceController(),
|
||||
csr->getGlobalStatelessHeapAllocation(),
|
||||
false,
|
||||
csr->isProgramActivePartitionConfigRequired(),
|
||||
false,
|
||||
|
||||
Reference in New Issue
Block a user