mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
feature: add heapless and global stateless scratch address patching
Related-To: NEO-10381 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
78885ae1fe
commit
73d558058c
@@ -73,6 +73,7 @@ struct EncodeDispatchKernelArgs {
|
||||
bool dcFlushEnable = false;
|
||||
bool isHeaplessModeEnabled = false;
|
||||
bool interruptEvent = false;
|
||||
bool immediateScratchAddressPatching = false;
|
||||
|
||||
bool requiresSystemMemoryFence() const {
|
||||
return (isHostScopeSignalEvent && isKernelUsingSystemAllocation);
|
||||
|
||||
@@ -332,15 +332,22 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
auto address = heap->getHeapGpuBase() + offsetThreadData;
|
||||
std::memcpy(inlineDataPointer + indirectDataPointerAddress.offset, &address, indirectDataPointerAddress.pointerSize);
|
||||
|
||||
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||
auto ssh = container.getIndirectHeap(HeapType::surfaceState);
|
||||
if (args.immediateScratchAddressPatching) {
|
||||
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
if (csr->getGlobalStatelessHeapAllocation() != nullptr) {
|
||||
ssh = csr->getGlobalStatelessHeap();
|
||||
} else {
|
||||
ssh = args.surfaceStateHeap ? args.surfaceStateHeap : container.getIndirectHeap(HeapType::surfaceState);
|
||||
}
|
||||
|
||||
uint64_t scratchAddress = 0u;
|
||||
EncodeDispatchKernel<Family>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
std::memcpy(inlineDataPointer + scratchPointerAddress.offset, &scratchAddress, scratchPointerAddress.pointerSize);
|
||||
uint64_t scratchAddress = 0u;
|
||||
EncodeDispatchKernel<Family>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
std::memcpy(inlineDataPointer + scratchPointerAddress.offset, &scratchAddress, scratchPointerAddress.pointerSize);
|
||||
}
|
||||
} else {
|
||||
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||
walkerCmd.setIndirectDataLength(sizeThreadData);
|
||||
|
||||
@@ -87,7 +87,7 @@ if(SUPPORT_DG2_AND_LATER)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT SUPPORT_HEAPLESS)
|
||||
if(NOT SUPPORTED_HEAPLESS)
|
||||
list(APPEND NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_heap_addressing.inl
|
||||
)
|
||||
|
||||
@@ -303,7 +303,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
||||
flushData.stateComputeModeFullConfigurationNeeded = getStateComputeModeDirty();
|
||||
flushData.stateBaseAddressFullConfigurationNeeded = getGSBAStateDirty();
|
||||
|
||||
if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSlot0Size > 0 || this->requiredScratchSlot1Size > 0)) {
|
||||
if (!this->heaplessModeEnabled && dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSlot0Size > 0 || this->requiredScratchSlot1Size > 0)) {
|
||||
bool checkFeStateDirty = false;
|
||||
bool checkSbaStateDirty = false;
|
||||
scratchSpaceController->setRequiredScratchSpace(dispatchFlags.sshCpuBase,
|
||||
|
||||
@@ -100,6 +100,7 @@ struct UnitTestHelper {
|
||||
|
||||
static bool findStateCacheFlushPipeControl(LinearStream &csrStream);
|
||||
static void verifyDummyBlitWa(const RootDeviceEnvironment *rootDeviceEnvironment, GenCmdList::iterator &cmdIterator);
|
||||
static GenCmdList::iterator findWalkerCmd(GenCmdList::iterator begin, GenCmdList::iterator end, bool heapless);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -154,4 +154,9 @@ template <typename GfxFamily>
|
||||
void UnitTestHelper<GfxFamily>::verifyDummyBlitWa(const RootDeviceEnvironment *rootDeviceEnvironment, GenCmdList::iterator &cmdIterator) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
GenCmdList::iterator UnitTestHelper<GfxFamily>::findWalkerCmd(GenCmdList::iterator begin, GenCmdList::iterator end, bool heapless) {
|
||||
return find<typename GfxFamily::GPGPU_WALKER *>(begin, end);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -138,4 +138,9 @@ void UnitTestHelper<GfxFamily>::verifyDummyBlitWa(const RootDeviceEnvironment *r
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
GenCmdList::iterator UnitTestHelper<GfxFamily>::findWalkerCmd(GenCmdList::iterator begin, GenCmdList::iterator end, bool heapless) {
|
||||
return find<typename GfxFamily::COMPUTE_WALKER *>(begin, end);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -88,7 +88,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::sshState;
|
||||
using BaseClass::staticWorkPartitioningEnabled;
|
||||
using BaseClass::streamProperties;
|
||||
|
||||
using BaseClass::wasSubmittedToSingleSubdevice;
|
||||
using BaseClass::CommandStreamReceiver::activePartitions;
|
||||
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
|
||||
@@ -115,6 +114,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::globalFenceAllocation;
|
||||
using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
|
||||
using BaseClass::CommandStreamReceiver::gsbaFor32BitProgrammed;
|
||||
using BaseClass::CommandStreamReceiver::heaplessModeEnabled;
|
||||
using BaseClass::CommandStreamReceiver::immWritePostSyncWriteOffset;
|
||||
using BaseClass::CommandStreamReceiver::initDirectSubmission;
|
||||
using BaseClass::CommandStreamReceiver::internalAllocationStorage;
|
||||
|
||||
@@ -5095,3 +5095,30 @@ HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverF
|
||||
EXPECT_ANY_THROW(csr->flushImmediateTaskStateless(commandStream, 0, csr->recordedImmediateDispatchFlags, *pDevice));
|
||||
EXPECT_ANY_THROW(csr->handleImmediateFlushStatelessAllocationsResidency(0, commandStream));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
givenImmediateFlushTaskInHeaplessModeWhenNextDispatchRequiresScratchSpaceThenNoScratchIsAllocated,
|
||||
IsAtLeastXeHpCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
commandStreamReceiver.heaplessModeEnabled = true;
|
||||
|
||||
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
|
||||
|
||||
commandStreamReceiver.setRequiredScratchSizes(0x100, 0);
|
||||
|
||||
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
|
||||
commandStreamReceiver.flushImmediateTask(commandStream,
|
||||
commandStream.getUsed(),
|
||||
immediateFlushTaskFlags,
|
||||
*pDevice);
|
||||
|
||||
HardwareParse hwParserCsr;
|
||||
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
|
||||
auto frontEndCmd = hwParserCsr.getCommand<CFE_STATE>();
|
||||
ASSERT_EQ(nullptr, frontEndCmd);
|
||||
|
||||
EXPECT_EQ(nullptr, commandStreamReceiver.getScratchSpaceController()->getScratchSpaceSlot0Allocation());
|
||||
}
|
||||
|
||||
@@ -68,6 +68,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
|
||||
false, // dcFlushEnable
|
||||
false, // isHeaplessModeEnabled
|
||||
false, // interruptEvent
|
||||
false, // immediateScratchAddressPatching
|
||||
};
|
||||
|
||||
return args;
|
||||
|
||||
Reference in New Issue
Block a user