mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Add heap sharing to immediate command lists
This change is intended to be used in immediate command lists that are using flush task functionality. With this change all immediate command list using the same csr will consume shared allocations for dsh and ssh heaps. This will decrease number of SBA commands dispatched when multiple command lists coexists and dispatch kernels. With this change new SBA command should be dispatched only when current heap allocation is exhausted. Functionality is currently disabled and available under debug key. Functionality will be enabled by default for all immediate command lists with flush task functionality enabled. Related-To: NEO-7142 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
71bef6094d
commit
3d92186362
@@ -297,6 +297,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool commandListSLMEnabled = false;
|
||||
bool requiresQueueUncachedMocs = false;
|
||||
bool isBcsSplitNeeded = false;
|
||||
bool immediateCmdListHeapSharing = false;
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||
|
||||
@@ -135,6 +135,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
|
||||
}
|
||||
|
||||
if (this->immediateCmdListHeapSharing) {
|
||||
commandContainer.setImmediateCmdListCsr(this->csr);
|
||||
commandContainer.setNumIddPerBlock(1);
|
||||
}
|
||||
|
||||
commandContainer.setReservedSshSize(getReserveSshSize());
|
||||
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
|
||||
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly());
|
||||
|
||||
@@ -43,8 +43,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
appendEventForProfiling(event, true, false);
|
||||
const auto kernelImmutableData = kernel->getImmutableData();
|
||||
if (this->immediateCmdListHeapSharing) {
|
||||
auto kernelInfo = kernelImmutableData->getKernelInfo();
|
||||
commandContainer.ensureHeapSizePrepared(
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(*kernelInfo));
|
||||
}
|
||||
appendEventForProfiling(event, true, false);
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
|
||||
@@ -147,7 +153,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
|
||||
if (neoDevice->getDebugger()) {
|
||||
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
|
||||
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
@@ -21,10 +21,10 @@ constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
@@ -143,6 +145,32 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
||||
this->csr->makeResident(*this->device->getDebugSurface());
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) {
|
||||
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
|
||||
auto sshStateCopy = csrHw->getSshState();
|
||||
bool sshDirty = sshStateCopy.updateAndCheck(ssh);
|
||||
|
||||
if (sshDirty) {
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
|
||||
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
|
||||
args.mocs = this->device->getMOCS(false, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = this->device->getDebugSurface();
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = false;
|
||||
args.areMultipleSubDevicesInContext = false;
|
||||
args.isDebuggerActive = true;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
}
|
||||
|
||||
auto completionStamp = this->csr->flushTask(
|
||||
*commandStream,
|
||||
commandStreamStart,
|
||||
|
||||
@@ -139,6 +139,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (this->immediateCmdListHeapSharing) {
|
||||
auto kernelInfo = kernelImmutableData->getKernelInfo();
|
||||
commandContainer.ensureHeapSizePrepared(
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(*kernelInfo));
|
||||
}
|
||||
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
|
||||
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
|
||||
|
||||
@@ -265,7 +271,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
}
|
||||
|
||||
if (neoDevice->getDebugger()) {
|
||||
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
|
||||
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
@@ -127,6 +127,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
UNRECOVERABLE_IF(nullptr == csr);
|
||||
|
||||
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
|
||||
commandList->csr = csr;
|
||||
commandList->internalUsage = internalUsage;
|
||||
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
|
||||
@@ -135,6 +136,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
|
||||
commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
|
||||
}
|
||||
commandList->immediateCmdListHeapSharing = L0HwHelper::enableImmediateCmdListHeapSharing(commandList->isFlushTaskSubmissionEnabled);
|
||||
}
|
||||
returnValue = commandList->initialize(device, engineGroupType, desc->flags);
|
||||
if (returnValue != ZE_RESULT_SUCCESS) {
|
||||
@@ -151,7 +153,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
}
|
||||
|
||||
commandList->cmdQImmediate = commandQueue;
|
||||
commandList->csr = csr;
|
||||
commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB);
|
||||
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
|
||||
|
||||
@@ -1092,7 +1092,9 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool
|
||||
device->getSourceLevelDebugger()
|
||||
->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0);
|
||||
}
|
||||
device->createSysmanHandle(isSubDevice);
|
||||
if (device->getNEODevice()->getAllEngines()[0].commandStreamReceiver->getType() == NEO::CommandStreamReceiverType::CSR_HW) {
|
||||
device->createSysmanHandle(isSubDevice);
|
||||
}
|
||||
device->resourcesReleased = false;
|
||||
|
||||
device->populateSubDeviceCopyEngineGroups();
|
||||
|
||||
@@ -39,4 +39,12 @@ bool L0HwHelper::enableStateComputeModeTracking() {
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
bool L0HwHelper::enableImmediateCmdListHeapSharing(bool cmdlistSupport) {
|
||||
bool enabled = false;
|
||||
if (NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get() != -1) {
|
||||
return !!NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get();
|
||||
}
|
||||
return enabled;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -33,6 +33,7 @@ class L0HwHelper {
|
||||
static bool enableFrontEndStateTracking();
|
||||
static bool enablePipelineSelectStateTracking();
|
||||
static bool enableStateComputeModeTracking();
|
||||
static bool enableImmediateCmdListHeapSharing(bool cmdlistSupport);
|
||||
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
||||
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
||||
|
||||
|
||||
@@ -385,6 +385,11 @@ int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
bool useSyncQueue = isSyncQueueEnabled(argc, argv);
|
||||
bool commandListShared = isCommandListShared(argc, argv);
|
||||
bool commandListCoexist = isParamEnabled(argc, argv, "-o", "--coexists");
|
||||
if (commandListCoexist) {
|
||||
std::cerr << "Command List coexists between tests" << std::endl;
|
||||
commandListShared = false;
|
||||
}
|
||||
bool aubMode = isAubMode(argc, argv);
|
||||
|
||||
ze_context_handle_t context = nullptr;
|
||||
@@ -410,18 +415,43 @@ int main(int argc, char *argv[]) {
|
||||
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdList));
|
||||
}
|
||||
|
||||
ze_command_list_handle_t cmdListStandardMemoryCopy = nullptr;
|
||||
ze_command_list_handle_t cmdListMemoryCopyRegion = nullptr;
|
||||
ze_command_list_handle_t cmdListLaunchGpuKernel = nullptr;
|
||||
if (commandListCoexist) {
|
||||
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
cmdQueueDesc.pNext = nullptr;
|
||||
cmdQueueDesc.flags = 0;
|
||||
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device0);
|
||||
cmdQueueDesc.index = 0;
|
||||
selectQueueMode(cmdQueueDesc, useSyncQueue);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListStandardMemoryCopy));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListMemoryCopyRegion));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListLaunchGpuKernel));
|
||||
|
||||
cmdList = cmdListStandardMemoryCopy;
|
||||
}
|
||||
|
||||
std::string currentTest;
|
||||
currentTest = "Standard Memory Copy";
|
||||
testAppendMemoryCopy(context, device0, useSyncQueue, outputValidationSuccessful, cmdList);
|
||||
printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
|
||||
|
||||
if (outputValidationSuccessful || aubMode) {
|
||||
if (commandListCoexist) {
|
||||
cmdList = cmdListMemoryCopyRegion;
|
||||
}
|
||||
currentTest = "Memory Copy Region";
|
||||
testAppendMemoryCopyRegion(context, device0, useSyncQueue, outputValidationSuccessful, cmdList);
|
||||
printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
|
||||
}
|
||||
|
||||
if (outputValidationSuccessful || aubMode) {
|
||||
if (commandListCoexist) {
|
||||
cmdList = cmdListLaunchGpuKernel;
|
||||
}
|
||||
currentTest = "Launch GPU Kernel";
|
||||
testAppendGpuKernel(context, device0, useSyncQueue, outputValidationSuccessful, cmdList);
|
||||
printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
|
||||
@@ -430,6 +460,11 @@ int main(int argc, char *argv[]) {
|
||||
if (commandListShared) {
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
}
|
||||
if (commandListCoexist) {
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListStandardMemoryCopy));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListMemoryCopyRegion));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListLaunchGpuKernel));
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
|
||||
engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
|
||||
|
||||
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)));
|
||||
commandListImmediate.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue)));
|
||||
@@ -131,5 +131,11 @@ void CmdListStateComputeModeStateFixture::setUp() {
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
}
|
||||
|
||||
void ImmediateCmdListSharedHeapsFixture::setUp() {
|
||||
DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1);
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -78,6 +78,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
|
||||
std::unique_ptr<L0::ult::CommandList> commandListImmediate;
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
L0::ult::CommandQueue *commandQueue;
|
||||
NEO::EngineGroupType engineGroupType;
|
||||
};
|
||||
|
||||
struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture {
|
||||
@@ -117,5 +118,11 @@ struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture {
|
||||
void testBody();
|
||||
};
|
||||
|
||||
struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture {
|
||||
void setUp();
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -126,11 +126,15 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
public:
|
||||
using KernelImp::crossThreadData;
|
||||
using KernelImp::crossThreadDataSize;
|
||||
using KernelImp::dynamicStateHeapData;
|
||||
using KernelImp::dynamicStateHeapDataSize;
|
||||
using KernelImp::kernelArgHandlers;
|
||||
using KernelImp::kernelHasIndirectAccess;
|
||||
using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using KernelImp::privateMemoryGraphicsAllocation;
|
||||
using KernelImp::requiredWorkgroupOrder;
|
||||
using KernelImp::surfaceStateHeapData;
|
||||
using KernelImp::surfaceStateHeapDataSize;
|
||||
|
||||
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
|
||||
}
|
||||
|
||||
@@ -59,6 +59,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::getAllocationFromHostPtrMap;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::hostPtrMap;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::partitionCount;
|
||||
@@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::csr;
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::frontEndStateTracking;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
@@ -134,9 +136,11 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
|
||||
using CommandListCoreFamilyImmediate<gfxCoreFamily>::requiredStreamState;
|
||||
using CommandListCoreFamilyImmediate<gfxCoreFamily>::containsAnyKernel;
|
||||
using CommandListCoreFamilyImmediate<gfxCoreFamily>::indirectAllocationsAllowed;
|
||||
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
using BaseClass::requiredStreamState;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -148,6 +152,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::frontEndStateTracking;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::nonImmediateLogicalStateHelper;
|
||||
|
||||
@@ -45,6 +45,8 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::crossThreadData;
|
||||
using ::L0::KernelImp::crossThreadDataSize;
|
||||
using ::L0::KernelImp::dynamicStateHeapData;
|
||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
@@ -76,6 +78,8 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::crossThreadData;
|
||||
using ::L0::KernelImp::crossThreadDataSize;
|
||||
using ::L0::KernelImp::dynamicStateHeapData;
|
||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
|
||||
@@ -5,6 +5,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
@@ -716,5 +720,165 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL
|
||||
commandList->cmdQImmediate = oldCommandQueue;
|
||||
}
|
||||
|
||||
using ImmediateCmdListSharedHeapsTest = Test<ImmediateCmdListSharedHeapsFixture>;
|
||||
HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedHeapsWhenDispatchingKernelThenExpectSingleSbaCommandAndHeapsReused, IsAtLeastSkl) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
|
||||
using SAMPLER_BORDER_COLOR_STATE = typename FamilyType::SAMPLER_BORDER_COLOR_STATE;
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
|
||||
uint32_t expectedSbaCount = 1;
|
||||
auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) {
|
||||
expectedSbaCount++;
|
||||
}
|
||||
|
||||
bool dshPresent = hwInfo.capabilityTable.supportsImages || NEO::UnitTestHelper<FamilyType>::getAdditionalDshSize() > 0;
|
||||
|
||||
if (dshPresent) {
|
||||
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2;
|
||||
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset = sizeof(SAMPLER_BORDER_COLOR_STATE);
|
||||
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
|
||||
|
||||
kernel->dynamicStateHeapDataSize = static_cast<uint32_t>(sizeof(SAMPLER_STATE) * 2 + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset);
|
||||
kernel->dynamicStateHeapData.reset(new uint8_t[kernel->dynamicStateHeapDataSize]);
|
||||
|
||||
mockKernelImmData->mockKernelDescriptor->payloadMappings.samplerTable = mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable;
|
||||
}
|
||||
|
||||
mockKernelImmData->kernelInfo->heapInfo.SurfaceStateHeapSize = static_cast<uint32_t>(sizeof(RENDER_SURFACE_STATE) + sizeof(uint32_t));
|
||||
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.numEntries = 1;
|
||||
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.tableOffset = 0x40;
|
||||
mockKernelImmData->mockKernelDescriptor->kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless;
|
||||
|
||||
kernel->surfaceStateHeapDataSize = mockKernelImmData->kernelInfo->heapInfo.SurfaceStateHeapSize;
|
||||
kernel->surfaceStateHeapData.reset(new uint8_t[kernel->surfaceStateHeapDataSize]);
|
||||
|
||||
EXPECT_TRUE(commandListImmediate->isFlushTaskSubmissionEnabled);
|
||||
EXPECT_TRUE(commandListImmediate->immediateCmdListHeapSharing);
|
||||
|
||||
auto &cmdContainer = commandListImmediate->commandContainer;
|
||||
EXPECT_EQ(1u, cmdContainer.getNumIddPerBlock());
|
||||
EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE));
|
||||
EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::SURFACE_STATE));
|
||||
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto &csrStream = ultCsr.commandStream;
|
||||
|
||||
const ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = ZE_RESULT_SUCCESS;
|
||||
|
||||
auto csrDshHeap = &ultCsr.getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k);
|
||||
auto csrSshHeap = &ultCsr.getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
|
||||
|
||||
size_t dshUsed = csrDshHeap->getUsed();
|
||||
size_t sshUsed = csrSshHeap->getUsed();
|
||||
|
||||
size_t csrUsedBefore = csrStream.getUsed();
|
||||
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t csrUsedAfter = csrStream.getUsed();
|
||||
|
||||
NEO::IndirectHeap *containerDshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
NEO::IndirectHeap *containerSshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE);
|
||||
|
||||
if (dshPresent) {
|
||||
EXPECT_EQ(csrDshHeap, containerDshHeap);
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, containerDshHeap);
|
||||
}
|
||||
EXPECT_EQ(csrSshHeap, containerSshHeap);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
|
||||
(csrUsedAfter - csrUsedBefore)));
|
||||
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedSbaCount, sbaCmds.size());
|
||||
|
||||
auto &sbaCmd = *genCmdCast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
|
||||
if (dshPresent) {
|
||||
EXPECT_TRUE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(csrDshHeap->getHeapGpuBase(), sbaCmd.getDynamicStateBaseAddress());
|
||||
} else {
|
||||
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress());
|
||||
}
|
||||
EXPECT_TRUE(sbaCmd.getSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(csrSshHeap->getHeapGpuBase(), sbaCmd.getSurfaceStateBaseAddress());
|
||||
|
||||
dshUsed = csrDshHeap->getUsed() - dshUsed;
|
||||
sshUsed = csrSshHeap->getUsed() - sshUsed;
|
||||
if (dshPresent) {
|
||||
EXPECT_LT(0u, dshUsed);
|
||||
} else {
|
||||
EXPECT_EQ(0u, dshUsed);
|
||||
}
|
||||
EXPECT_LT(0u, sshUsed);
|
||||
|
||||
size_t dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(*kernel->getImmutableData()->getKernelInfo());
|
||||
size_t sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*kernel->getImmutableData()->getKernelInfo());
|
||||
|
||||
EXPECT_GE(dshEstimated, dshUsed);
|
||||
EXPECT_GE(sshEstimated, sshUsed);
|
||||
|
||||
ze_command_queue_desc_t queueDesc{};
|
||||
queueDesc.ordinal = 0u;
|
||||
queueDesc.index = 0u;
|
||||
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
std::unique_ptr<L0::ult::CommandList> commandListImmediateCoexisting;
|
||||
commandListImmediateCoexisting.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, result)));
|
||||
|
||||
auto &cmdContainerCoexisting = commandListImmediateCoexisting->commandContainer;
|
||||
EXPECT_EQ(1u, cmdContainerCoexisting.getNumIddPerBlock());
|
||||
EXPECT_TRUE(cmdContainerCoexisting.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE));
|
||||
EXPECT_TRUE(cmdContainerCoexisting.immediateCmdListSharedHeap(HeapType::SURFACE_STATE));
|
||||
|
||||
dshUsed = csrDshHeap->getUsed();
|
||||
sshUsed = csrSshHeap->getUsed();
|
||||
|
||||
csrUsedBefore = csrStream.getUsed();
|
||||
result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
csrUsedAfter = csrStream.getUsed();
|
||||
|
||||
auto containerDshHeapCoexisting = cmdContainerCoexisting.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
auto containerSshHeapCoexisting = cmdContainerCoexisting.getIndirectHeap(HeapType::SURFACE_STATE);
|
||||
|
||||
if (dshPresent) {
|
||||
EXPECT_EQ(csrDshHeap, containerDshHeapCoexisting);
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, containerDshHeapCoexisting);
|
||||
}
|
||||
EXPECT_EQ(csrSshHeap, containerSshHeapCoexisting);
|
||||
|
||||
cmdList.clear();
|
||||
sbaCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
|
||||
(csrUsedAfter - csrUsedBefore)));
|
||||
sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, sbaCmds.size());
|
||||
|
||||
dshUsed = csrDshHeap->getUsed() - dshUsed;
|
||||
sshUsed = csrSshHeap->getUsed() - sshUsed;
|
||||
|
||||
if (dshPresent) {
|
||||
EXPECT_LT(0u, dshUsed);
|
||||
} else {
|
||||
EXPECT_EQ(0u, dshUsed);
|
||||
}
|
||||
EXPECT_LT(0u, sshUsed);
|
||||
|
||||
EXPECT_GE(dshEstimated, dshUsed);
|
||||
EXPECT_GE(sshEstimated, sshUsed);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h"
|
||||
@@ -569,5 +570,64 @@ HWTEST2_F(L0DebuggerTest, givenXeHpOrXeHpgCoreAndDebugIsActiveThenDisableL3Cache
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(SBAModesForDebugger, L0DebuggerParameterizedTests, ::testing::Values(0, 1));
|
||||
|
||||
struct MockKernelImmutableData : public KernelImmutableData {
|
||||
using KernelImmutableData::isaGraphicsAllocation;
|
||||
using KernelImmutableData::kernelDescriptor;
|
||||
using KernelImmutableData::kernelInfo;
|
||||
|
||||
MockKernelImmutableData(L0::Device *device) : KernelImmutableData(device) {}
|
||||
};
|
||||
|
||||
HWTEST2_F(L0DebuggerTest, givenFlushTaskSubmissionAndSharedHeapsEnabledWhenAppendingKernelUsingNewHeapThenDebugSurfaceIsProgrammedOnce, IsAtLeastGen12lp) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1);
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled);
|
||||
EXPECT_TRUE(commandList->immediateCmdListHeapSharing);
|
||||
|
||||
auto kernelInfo = std::make_unique<NEO::KernelInfo>();
|
||||
auto kernelDescriptor = std::make_unique<NEO::KernelDescriptor>();
|
||||
auto kernelImmData = std::make_unique<MockKernelImmutableData>(device);
|
||||
|
||||
kernelImmData->kernelInfo = kernelInfo.get();
|
||||
kernelImmData->kernelDescriptor = kernelDescriptor.get();
|
||||
kernelImmData->isaGraphicsAllocation.reset(new MockGraphicsAllocation());
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
kernel.kernelImmData = kernelImmData.get();
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto csrHeap = &commandList->csr->getIndirectHeap(NEO::HeapType::SURFACE_STATE, 0);
|
||||
ASSERT_NE(nullptr, csrHeap);
|
||||
|
||||
auto debugSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(csrHeap->getCpuBase());
|
||||
ASSERT_NE(debugSurfaceState, nullptr);
|
||||
auto debugSurface = static_cast<::L0::DeviceImp *>(device)->getDebugSurface();
|
||||
ASSERT_NE(debugSurface, nullptr);
|
||||
ASSERT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress());
|
||||
|
||||
memset(debugSurfaceState, 0, sizeof(*debugSurfaceState));
|
||||
|
||||
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ASSERT_EQ(0u, debugSurfaceState->getSurfaceBaseAddress());
|
||||
|
||||
kernelImmData->isaGraphicsAllocation.reset(nullptr);
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -96,6 +96,9 @@ CommandContainer::ErrorCode CommandContainer::initialize(Device *device, Allocat
|
||||
if (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == i) {
|
||||
continue;
|
||||
}
|
||||
if (immediateCmdListSharedHeap(static_cast<HeapType>(i))) {
|
||||
continue;
|
||||
}
|
||||
allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i,
|
||||
heapSize,
|
||||
alignedSize,
|
||||
@@ -185,32 +188,40 @@ void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
|
||||
size_t size) {
|
||||
auto indirectHeap = getIndirectHeap(heapType);
|
||||
|
||||
if (indirectHeap->getAvailableSpace() < size) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize *= 2;
|
||||
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size);
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
auto oldAlloc = getIndirectHeapAllocation(heapType);
|
||||
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(!oldAlloc);
|
||||
UNRECOVERABLE_IF(!newAlloc);
|
||||
auto oldBase = indirectHeap->getHeapGpuBase();
|
||||
indirectHeap->replaceGraphicsAllocation(newAlloc);
|
||||
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
|
||||
newAlloc->getUnderlyingBufferSize());
|
||||
auto newBase = indirectHeap->getHeapGpuBase();
|
||||
getResidencyContainer().push_back(newAlloc);
|
||||
getDeallocationContainer().push_back(oldAlloc);
|
||||
setIndirectHeapAllocation(heapType, newAlloc);
|
||||
if (oldBase != newBase) {
|
||||
setHeapDirty(heapType);
|
||||
if (immediateCmdListSharedHeap(heapType)) {
|
||||
UNRECOVERABLE_IF(indirectHeap == nullptr);
|
||||
UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < size);
|
||||
getResidencyContainer().push_back(indirectHeap->getGraphicsAllocation());
|
||||
} else {
|
||||
if (indirectHeap->getAvailableSpace() < size) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize *= 2;
|
||||
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size);
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
auto oldAlloc = getIndirectHeapAllocation(heapType);
|
||||
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(!oldAlloc);
|
||||
UNRECOVERABLE_IF(!newAlloc);
|
||||
auto oldBase = indirectHeap->getHeapGpuBase();
|
||||
indirectHeap->replaceGraphicsAllocation(newAlloc);
|
||||
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
|
||||
newAlloc->getUnderlyingBufferSize());
|
||||
auto newBase = indirectHeap->getHeapGpuBase();
|
||||
getResidencyContainer().push_back(newAlloc);
|
||||
getDeallocationContainer().push_back(oldAlloc);
|
||||
setIndirectHeapAllocation(heapType, newAlloc);
|
||||
if (oldBase != newBase) {
|
||||
setHeapDirty(heapType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return indirectHeap->getSpace(size);
|
||||
}
|
||||
|
||||
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) {
|
||||
auto indirectHeap = getIndirectHeap(heapType);
|
||||
UNRECOVERABLE_IF(indirectHeap == nullptr);
|
||||
auto sizeRequested = sizeRequired;
|
||||
|
||||
auto heapBuffer = indirectHeap->getSpace(0);
|
||||
@@ -218,27 +229,32 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType hea
|
||||
sizeRequested += alignment;
|
||||
}
|
||||
|
||||
if (indirectHeap->getAvailableSpace() < sizeRequested) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
auto oldAlloc = getIndirectHeapAllocation(heapType);
|
||||
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(!oldAlloc);
|
||||
UNRECOVERABLE_IF(!newAlloc);
|
||||
auto oldBase = indirectHeap->getHeapGpuBase();
|
||||
indirectHeap->replaceGraphicsAllocation(newAlloc);
|
||||
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
|
||||
newAlloc->getUnderlyingBufferSize());
|
||||
auto newBase = indirectHeap->getHeapGpuBase();
|
||||
getResidencyContainer().push_back(newAlloc);
|
||||
getDeallocationContainer().push_back(oldAlloc);
|
||||
setIndirectHeapAllocation(heapType, newAlloc);
|
||||
if (oldBase != newBase) {
|
||||
setHeapDirty(heapType);
|
||||
}
|
||||
if (heapType == HeapType::SURFACE_STATE) {
|
||||
indirectHeap->getSpace(reservedSshSize);
|
||||
sshAllocations.push_back(oldAlloc);
|
||||
if (immediateCmdListSharedHeap(heapType)) {
|
||||
UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < sizeRequested);
|
||||
getResidencyContainer().push_back(indirectHeap->getGraphicsAllocation());
|
||||
} else {
|
||||
if (indirectHeap->getAvailableSpace() < sizeRequested) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
auto oldAlloc = getIndirectHeapAllocation(heapType);
|
||||
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(!oldAlloc);
|
||||
UNRECOVERABLE_IF(!newAlloc);
|
||||
auto oldBase = indirectHeap->getHeapGpuBase();
|
||||
indirectHeap->replaceGraphicsAllocation(newAlloc);
|
||||
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
|
||||
newAlloc->getUnderlyingBufferSize());
|
||||
auto newBase = indirectHeap->getHeapGpuBase();
|
||||
getResidencyContainer().push_back(newAlloc);
|
||||
getDeallocationContainer().push_back(oldAlloc);
|
||||
setIndirectHeapAllocation(heapType, newAlloc);
|
||||
if (oldBase != newBase) {
|
||||
setHeapDirty(heapType);
|
||||
}
|
||||
if (heapType == HeapType::SURFACE_STATE) {
|
||||
indirectHeap->getSpace(reservedSshSize);
|
||||
sshAllocations.push_back(oldAlloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -329,7 +345,19 @@ void CommandContainer::prepareBindfulSsh() {
|
||||
}
|
||||
|
||||
IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) {
|
||||
return indirectHeaps[heapType].get();
|
||||
if (immediateCmdListSharedHeap(heapType)) {
|
||||
return heapType == HeapType::SURFACE_STATE ? sharedSshCsrHeap : sharedDshCsrHeap;
|
||||
} else {
|
||||
return indirectHeaps[heapType].get();
|
||||
}
|
||||
}
|
||||
|
||||
void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize) {
|
||||
sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize);
|
||||
|
||||
if (dshRequiredSize > 0) {
|
||||
sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class Device;
|
||||
class GraphicsAllocation;
|
||||
class LinearStream;
|
||||
@@ -94,13 +95,20 @@ class CommandContainer : public NonCopyableOrMovableClass {
|
||||
void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; }
|
||||
void *getIddBlock() { return iddBlock; }
|
||||
uint32_t getNumIddPerBlock() const { return numIddsPerBlock; }
|
||||
void setNumIddPerBlock(uint32_t value) { numIddsPerBlock = value; }
|
||||
void setReservedSshSize(size_t reserveSize) {
|
||||
reservedSshSize = reserveSize;
|
||||
}
|
||||
|
||||
bool getFlushTaskUsedForImmediate() const { return isFlushTaskUsedForImmediate; }
|
||||
void setFlushTaskUsedForImmediate(bool flushTaskUsedForImmediate) { isFlushTaskUsedForImmediate = flushTaskUsedForImmediate; }
|
||||
|
||||
void setImmediateCmdListCsr(CommandStreamReceiver *newValue) {
|
||||
this->immediateCmdListCsr = newValue;
|
||||
}
|
||||
bool immediateCmdListSharedHeap(HeapType heapType) {
|
||||
return (this->immediateCmdListCsr != nullptr && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE));
|
||||
}
|
||||
void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize);
|
||||
HeapContainer sshAllocations;
|
||||
uint64_t currentLinearStreamStartOffset = 0u;
|
||||
uint32_t slmSize = std::numeric_limits<uint32_t>::max();
|
||||
@@ -129,6 +137,9 @@ class CommandContainer : public NonCopyableOrMovableClass {
|
||||
Device *device = nullptr;
|
||||
AllocationsList *reusableAllocationList = nullptr;
|
||||
size_t reservedSshSize = 0;
|
||||
CommandStreamReceiver *immediateCmdListCsr = nullptr;
|
||||
IndirectHeap *sharedSshCsrHeap = nullptr;
|
||||
IndirectHeap *sharedDshCsrHeap = nullptr;
|
||||
|
||||
uint32_t dirtyHeaps = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t numIddsPerBlock = 64;
|
||||
|
||||
@@ -30,6 +30,7 @@ class IndirectHeap;
|
||||
class LogicalStateHelper;
|
||||
class Gmm;
|
||||
struct HardwareInfo;
|
||||
struct KernelInfo;
|
||||
struct StateComputeModeProperties;
|
||||
|
||||
struct EncodeDispatchKernelArgs {
|
||||
@@ -112,6 +113,10 @@ struct EncodeDispatchKernel {
|
||||
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo);
|
||||
|
||||
static constexpr bool shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent);
|
||||
|
||||
static size_t getSizeRequiredDsh(const KernelInfo &kernelInfo);
|
||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||
inline static uint32_t additionalSizeRequiredDsh();
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -121,8 +126,8 @@ struct EncodeStates {
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
using SAMPLER_BORDER_COLOR_STATE = typename GfxFamily::SAMPLER_BORDER_COLOR_STATE;
|
||||
|
||||
static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize;
|
||||
static const size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize;
|
||||
static constexpr uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize;
|
||||
static constexpr size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize;
|
||||
|
||||
static uint32_t copySamplerState(IndirectHeap *dsh,
|
||||
uint32_t samplerStateOffset,
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "shared/source/kernel/implicit_args.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
|
||||
#include "encode_surface_state.inl"
|
||||
|
||||
@@ -698,6 +699,39 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
|
||||
template <typename Family>
|
||||
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent) { return false; }
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelInfo &kernelInfo) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
|
||||
constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE);
|
||||
const auto numSamplers = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
const auto additionalDshSize = additionalSizeRequiredDsh();
|
||||
if (numSamplers == 0U) {
|
||||
return alignUp(additionalDshSize, EncodeStates<Family>::alignInterfaceDescriptorData);
|
||||
}
|
||||
|
||||
size_t size = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset -
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
size = alignUp(size, EncodeStates<Family>::alignIndirectStatePointer);
|
||||
|
||||
size += numSamplers * samplerStateSize;
|
||||
size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
if (additionalDshSize > 0) {
|
||||
size += additionalDshSize;
|
||||
size = alignUp(size, EncodeStates<Family>::alignInterfaceDescriptorData);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getSizeRequiredSsh(const KernelInfo &kernelInfo) {
|
||||
using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE;
|
||||
size_t requiredSshSize = kernelInfo.heapInfo.SurfaceStateHeapSize;
|
||||
requiredSshSize = alignUp(requiredSshSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
return requiredSshSize;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
||||
@@ -104,13 +104,13 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
|
||||
|
||||
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
|
||||
uint32_t samplerStateOffset = 0;
|
||||
uint32_t samplerCount = 0;
|
||||
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
||||
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
|
||||
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
||||
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
|
||||
@@ -539,4 +539,9 @@ void EncodeDispatchKernel<Family>::setupPostSyncMocs(WALKER_TYPE &walkerCmd, con
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
|
||||
return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -127,13 +127,13 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
|
||||
|
||||
if constexpr (Family::supportsSampler) {
|
||||
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
|
||||
uint32_t samplerStateOffset = 0;
|
||||
uint32_t samplerCount = 0;
|
||||
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
||||
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
|
||||
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(
|
||||
heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
||||
@@ -768,4 +768,9 @@ inline void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeReg
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -140,6 +140,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
}
|
||||
void initializeDeviceWithFirstSubmission() override;
|
||||
|
||||
HeapDirtyState &getSshState() {
|
||||
return sshState;
|
||||
}
|
||||
|
||||
protected:
|
||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programL3(LinearStream &csr, uint32_t &newL3Config);
|
||||
|
||||
@@ -342,7 +342,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (stallingCommandsOnNextFlushRequired) {
|
||||
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
}
|
||||
const bool hasDsh = hwInfo.capabilityTable.supportsImages;
|
||||
const bool hasDsh = hwInfo.capabilityTable.supportsImages && dsh != nullptr;
|
||||
bool dshDirty = hasDsh ? dshState.updateAndCheck(dsh) : false;
|
||||
bool iohDirty = iohState.updateAndCheck(ioh);
|
||||
bool sshDirty = sshState.updateAndCheck(ssh);
|
||||
|
||||
@@ -407,6 +407,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDrmCompletionFence, -1, "Enables DRM compl
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseDrmCompletionFenceForAllAllocations, -1, "Uses DRM completion fence for all allocations, -1:default (disabled), 0:disable, 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableChipsetUniqueUUID, -1, "Enables retrieving chipset unique UUID using telemetry, -1:default (disabled), 0:disable, 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableFlushTaskSubmission, -1, "Driver uses csr flushTask for immediate commandlist submissions, -1:default (enabled), 0:disabled, 1:enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableImmediateCmdListHeapSharing, -1, "Immediate command lists using flush task use current csr heap instead private cmd list heap, -1:default (disabled), 0:disabled, 1:enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableBcsSwControlWa, -1, "Enable BCS WA via BCSSWCONTROL MMIO. -1: default, 0: disabled, 1: if src in system mem, 2: if dst in system mem, 3: if src and dst in system mem, 4: always")
|
||||
|
||||
/* IMPLICIT SCALING */
|
||||
|
||||
@@ -91,6 +91,7 @@ struct UnitTestHelper {
|
||||
static bool getDisableFusionStateFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd);
|
||||
static bool getComputeDispatchAllWalkerFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd);
|
||||
static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd);
|
||||
static size_t getAdditionalDshSize();
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -92,4 +92,9 @@ bool UnitTestHelper<GfxFamily>::getSystolicFlagValueFromPipelineSelectCommand(co
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t UnitTestHelper<GfxFamily>::getAdditionalDshSize() {
|
||||
return sizeof(typename GfxFamily::INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -121,4 +121,9 @@ bool UnitTestHelper<GfxFamily>::getSystolicFlagValueFromPipelineSelectCommand(co
|
||||
return pipelineSelectCmd.getSystolicModeEnable();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t UnitTestHelper<GfxFamily>::getAdditionalDshSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -172,6 +172,7 @@ EnableUsmConcurrentAccessSupport = 0
|
||||
EnableSharedSystemUsmSupport = -1
|
||||
EnablePassInlineData = -1
|
||||
ForceFineGrainedSVMSupport = -1
|
||||
EnableImmediateCmdListHeapSharing = -1
|
||||
ForcePipeSupport = -1
|
||||
ForceSystemMemoryPlacement = 0
|
||||
ForceNonSystemMemoryPlacement = 0
|
||||
|
||||
@@ -18,26 +18,17 @@ using namespace NEO;
|
||||
|
||||
constexpr uint32_t defaultNumIddsPerBlock = 64;
|
||||
|
||||
class CommandContainerTest : public DeviceFixture,
|
||||
public ::testing::Test {
|
||||
using CommandContainerFixture = DeviceFixture;
|
||||
using CommandContainerTest = Test<CommandContainerFixture>;
|
||||
|
||||
class MyMockCommandContainer : public CommandContainer {
|
||||
public:
|
||||
void SetUp() override {
|
||||
::testing::Test::SetUp();
|
||||
DeviceFixture::setUp();
|
||||
}
|
||||
void TearDown() override {
|
||||
DeviceFixture::tearDown();
|
||||
::testing::Test::TearDown();
|
||||
}
|
||||
using CommandContainer::allocationIndirectHeaps;
|
||||
using CommandContainer::dirtyHeaps;
|
||||
using CommandContainer::getTotalCmdBufferSize;
|
||||
};
|
||||
|
||||
struct CommandContainerHeapStateTests : public ::testing::Test {
|
||||
class MyMockCommandContainer : public CommandContainer {
|
||||
public:
|
||||
using CommandContainer::dirtyHeaps;
|
||||
};
|
||||
|
||||
MyMockCommandContainer myCommandContainer;
|
||||
};
|
||||
|
||||
@@ -795,23 +786,18 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStre
|
||||
TEST_F(CommandContainerTest, GivenCmdContainerAndDebugFlagWhenContainerIsInitializedThenStreamSizeEqualsAlignedTotalCmdBuffSizeDecreasedOfReservedSize) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
class MyCommandContainer : public CommandContainer {
|
||||
public:
|
||||
using CommandContainer::getTotalCmdBufferSize;
|
||||
};
|
||||
|
||||
DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(0);
|
||||
MyCommandContainer cmdContainer;
|
||||
MyMockCommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, true);
|
||||
size_t alignedSize = alignUp<size_t>(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
|
||||
EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyCommandContainer::cmdBufferReservedSize);
|
||||
EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize);
|
||||
|
||||
auto newSizeInKB = 512;
|
||||
DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(newSizeInKB);
|
||||
MyCommandContainer cmdContainer2;
|
||||
MyMockCommandContainer cmdContainer2;
|
||||
cmdContainer2.initialize(pDevice, nullptr, true);
|
||||
alignedSize = alignUp<size_t>(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
|
||||
EXPECT_EQ(cmdContainer2.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyCommandContainer::cmdBufferReservedSize);
|
||||
EXPECT_EQ(cmdContainer2.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) {
|
||||
@@ -841,15 +827,68 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBuf
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, GivenCmdContainerWhenContainerIsInitializedThenSurfaceStateIndirectHeapSizeIsCorrect) {
|
||||
|
||||
class MyCommandContainer : public CommandContainer {
|
||||
public:
|
||||
using CommandContainer::allocationIndirectHeaps;
|
||||
};
|
||||
|
||||
MyCommandContainer cmdContainer;
|
||||
MyMockCommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr, true);
|
||||
auto size = cmdContainer.allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getUnderlyingBufferSize();
|
||||
constexpr size_t expectedHeapSize = MemoryConstants::pageSize64k;
|
||||
EXPECT_EQ(expectedHeapSize, size);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWithoutEnsuringSpaceThenExpectNullptrReturnedOrUnrecoverable) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.setImmediateCmdListCsr(pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
cmdContainer.setNumIddPerBlock(1);
|
||||
auto code = cmdContainer.initialize(pDevice, nullptr, true);
|
||||
EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code);
|
||||
|
||||
EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE));
|
||||
EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE));
|
||||
|
||||
EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 0), std::exception);
|
||||
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 0, 0), std::exception);
|
||||
|
||||
EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0), std::exception);
|
||||
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0), std::exception);
|
||||
|
||||
cmdContainer.ensureHeapSizePrepared(0, 0);
|
||||
|
||||
EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE));
|
||||
EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE));
|
||||
|
||||
EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 0), std::exception);
|
||||
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 0, 0), std::exception);
|
||||
|
||||
EXPECT_NO_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0));
|
||||
EXPECT_NO_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0));
|
||||
|
||||
cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte);
|
||||
|
||||
auto dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
EXPECT_NE(nullptr, dshHeap);
|
||||
auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE);
|
||||
EXPECT_NE(nullptr, sshHeap);
|
||||
|
||||
size_t sizeUsedDsh = dshHeap->getUsed();
|
||||
size_t sizeUsedSsh = sshHeap->getUsed();
|
||||
|
||||
void *dshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 64);
|
||||
void *sshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 64);
|
||||
|
||||
EXPECT_EQ(ptrOffset(dshHeap->getCpuBase(), sizeUsedDsh), dshPtr);
|
||||
EXPECT_EQ(ptrOffset(sshHeap->getCpuBase(), sizeUsedSsh), sshPtr);
|
||||
|
||||
auto alignedHeapDsh = cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 128, 128);
|
||||
auto alignedHeapSsh = cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 128, 128);
|
||||
|
||||
EXPECT_EQ(dshHeap, alignedHeapDsh);
|
||||
EXPECT_EQ(sshHeap, alignedHeapSsh);
|
||||
|
||||
dshHeap->getSpace(dshHeap->getAvailableSpace() - 32);
|
||||
sshHeap->getSpace(sshHeap->getAvailableSpace() - 32);
|
||||
|
||||
EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 64), std::exception);
|
||||
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 64, 64), std::exception);
|
||||
|
||||
EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 64), std::exception);
|
||||
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 64, 64), std::exception);
|
||||
}
|
||||
|
||||
@@ -2124,3 +2124,27 @@ using SystolicSupport = IsAnyProducts<IGFX_ALDERLAKE_P, IGFX_XE_HP_SDV, IGFX_DG2
|
||||
HWTEST2_F(CommandStreamReceiverSystolicTests, givenSystolicModeChangedWhenFlushTaskCalledThenSystolicStateIsUpdated, SystolicSupport) {
|
||||
testBody<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenSshDirtyStateWhenUpdatingStateWithNewHeapThenExpectDirtyStateTrue) {
|
||||
MockGraphicsAllocation allocation{};
|
||||
allocation.gpuAddress = 0xABC000;
|
||||
allocation.size = 0x1000;
|
||||
|
||||
IndirectHeap dummyHeap(&allocation, false);
|
||||
|
||||
auto dirtyStateCopy = static_cast<CommandStreamReceiverHw<FamilyType> *>(commandStreamReceiver)->getSshState();
|
||||
|
||||
bool check = dirtyStateCopy.updateAndCheck(&dummyHeap);
|
||||
EXPECT_TRUE(check);
|
||||
|
||||
check = dirtyStateCopy.updateAndCheck(&dummyHeap);
|
||||
EXPECT_FALSE(check);
|
||||
|
||||
auto dirtyState = static_cast<CommandStreamReceiverHw<FamilyType> *>(commandStreamReceiver)->getSshState();
|
||||
|
||||
check = dirtyState.updateAndCheck(&dummyHeap);
|
||||
EXPECT_TRUE(check);
|
||||
|
||||
check = dirtyState.updateAndCheck(&dummyHeap);
|
||||
EXPECT_FALSE(check);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/helpers/gtest_helpers.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
||||
@@ -1324,3 +1325,67 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenBindlessModeDisabledelWithSampler
|
||||
|
||||
EXPECT_EQ(std::find(cmdContainer->getResidencyContainer().begin(), cmdContainer->getResidencyContainer().end(), pDevice->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()), cmdContainer->getResidencyContainer().end());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredDshSpaceThenReturnCorrectValues) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
|
||||
|
||||
size_t additionalSize = UnitTestHelper<FamilyType>::getAdditionalDshSize();
|
||||
size_t expectedSize = alignUp(additionalSize, EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
|
||||
// no samplers
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 0;
|
||||
size_t size = EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelInfo);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
|
||||
// two samplers, no border color state
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
|
||||
|
||||
// align samplers
|
||||
size_t alignedSamplers = alignUp(2 * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
// additional IDD for requiring platforms
|
||||
if (additionalSize > 0) {
|
||||
expectedSize = alignUp(alignedSamplers + additionalSize, EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
} else {
|
||||
expectedSize = alignedSamplers;
|
||||
}
|
||||
|
||||
size = EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelInfo);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
|
||||
// three samplers, border color state
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 3;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset = 32;
|
||||
|
||||
// align border color state and samplers
|
||||
alignedSamplers = alignUp(alignUp(32, EncodeStates<FamilyType>::alignIndirectStatePointer) + 3 * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
// additional IDD for requiring platforms
|
||||
if (additionalSize > 0) {
|
||||
expectedSize = alignUp(alignedSamplers + additionalSize, EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
} else {
|
||||
expectedSize = alignedSamplers;
|
||||
}
|
||||
size = EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelInfo);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredSshSpaceThenReturnCorrectValues) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
// no surface states
|
||||
kernelInfo.heapInfo.SurfaceStateHeapSize = 0;
|
||||
size_t size = EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(kernelInfo);
|
||||
EXPECT_EQ(0u, size);
|
||||
|
||||
// two surface states and BTI indices
|
||||
kernelInfo.heapInfo.SurfaceStateHeapSize = 2 * sizeof(RENDER_SURFACE_STATE) + 2 * sizeof(uint32_t);
|
||||
size_t expectedSize = alignUp(kernelInfo.heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
size = EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(kernelInfo);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
@@ -44,6 +45,7 @@ class CommandEncodeStatesFixture : public DeviceFixture {
|
||||
}
|
||||
|
||||
KernelDescriptor descriptor;
|
||||
KernelInfo kernelInfo;
|
||||
std::unique_ptr<MyMockCommandContainer> cmdContainer;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user