mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
fix: Reuse private allocations during cmdList dispatch
Related-To: NEO-8201 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ba4867c3d0
commit
5807d512b3
@@ -21,6 +21,7 @@
|
||||
#include <level_zero/zet_api.h>
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
struct _ze_command_list_handle_t {};
|
||||
@@ -355,7 +356,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||
std::vector<NEO::GraphicsAllocation *> patternAllocations;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
|
||||
|
||||
@@ -309,7 +309,8 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
|
||||
compactL3FlushEvent(dcFlush);
|
||||
}
|
||||
void allocateKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
|
||||
MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse);
|
||||
virtual void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
|
||||
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
|
||||
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||
void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/logical_state_helper.h"
|
||||
#include "shared/source/helpers/pipe_control_args.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
@@ -54,6 +55,7 @@
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
@@ -71,8 +73,8 @@ inline ze_result_t parseErrorCode(NEO::CommandContainer::ErrorCode returnValue)
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
|
||||
clearCommandsToPatch();
|
||||
for (auto alloc : this->ownedPrivateAllocations) {
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc);
|
||||
for (auto &alloc : this->ownedPrivateAllocations) {
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc.second);
|
||||
}
|
||||
this->ownedPrivateAllocations.clear();
|
||||
for (auto &patternAlloc : this->patternAllocations) {
|
||||
@@ -129,8 +131,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
this->returnPoints.clear();
|
||||
}
|
||||
|
||||
for (auto alloc : this->ownedPrivateAllocations) {
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc);
|
||||
for (auto &alloc : this->ownedPrivateAllocations) {
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc.second);
|
||||
}
|
||||
this->ownedPrivateAllocations.clear();
|
||||
cmdListCurrentStartOffset = 0;
|
||||
@@ -3172,16 +3174,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::allocateKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) {
|
||||
L0::KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
if (sizePerHwThread != 0U && kernelImp->getParentModule().shouldAllocatePrivateMemoryPerDispatch()) {
|
||||
auto privateMemoryGraphicsAllocation = kernel->allocatePrivateMemoryGraphicsAllocation();
|
||||
kernel->patchCrossthreadDataWithPrivateAllocation(privateMemoryGraphicsAllocation);
|
||||
this->commandContainer.addToResidencyContainer(privateMemoryGraphicsAllocation);
|
||||
this->ownedPrivateAllocations.push_back(privateMemoryGraphicsAllocation);
|
||||
allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, ownedPrivateAllocations);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse) {
|
||||
L0::KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
NEO::GraphicsAllocation *privateAlloc = nullptr;
|
||||
|
||||
if (privateAllocsToReuse[sizePerHwThread] != nullptr) {
|
||||
privateAlloc = privateAllocsToReuse[sizePerHwThread];
|
||||
} else {
|
||||
privateAlloc = kernelImp->allocatePrivateMemoryGraphicsAllocation();
|
||||
privateAllocsToReuse[sizePerHwThread] = privateAlloc;
|
||||
}
|
||||
kernelImp->patchAndMoveToResidencyContainerPrivateSurface(privateAlloc);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
CmdListEventOperation CommandListCoreFamily<gfxCoreFamily>::estimateEventPostSync(Event *event, uint32_t operations) {
|
||||
CmdListEventOperation ret;
|
||||
|
||||
@@ -191,6 +191,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void handleInOrderDependencyCounter();
|
||||
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
||||
bool useCounterAllocationForInOrderMode() const override { return true; }
|
||||
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
|
||||
@@ -1321,4 +1321,13 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::setupFlushMethod(const NEO::
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) {
|
||||
L0::KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
if (sizePerHwThread != 0U && kernelImp->getParentModule().shouldAllocatePrivateMemoryPerDispatch()) {
|
||||
auto ownership = this->csr->obtainUniqueOwnership();
|
||||
this->allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, this->csr->getOwnedPrivateAllocations());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -91,7 +91,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
|
||||
kernel->patchGlobalOffset();
|
||||
|
||||
this->allocateKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
|
||||
if (!launchParams.isIndirect) {
|
||||
kernel->setGroupCount(threadGroupDimensions->groupCountX,
|
||||
|
||||
@@ -152,8 +152,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
|
||||
|
||||
kernel->patchGlobalOffset();
|
||||
|
||||
this->allocateKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
|
||||
if (launchParams.isIndirect && threadGroupDimensions) {
|
||||
prepareIndirectParams(threadGroupDimensions);
|
||||
|
||||
@@ -909,6 +909,11 @@ void KernelImp::setInlineSamplers() {
|
||||
}
|
||||
}
|
||||
|
||||
void KernelImp::patchAndMoveToResidencyContainerPrivateSurface(NEO::GraphicsAllocation *alloc) {
|
||||
this->patchCrossthreadDataWithPrivateAllocation(alloc);
|
||||
this->residencyContainer.push_back(alloc);
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
this->kernelImmData = module->getKernelImmutableData(desc->pKernelName);
|
||||
if (this->kernelImmData == nullptr) {
|
||||
@@ -1017,8 +1022,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
auto &kernelAttributes = kernelDescriptor.kernelAttributes;
|
||||
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
|
||||
this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation();
|
||||
this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation);
|
||||
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
|
||||
this->patchAndMoveToResidencyContainerPrivateSurface(this->privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
this->createPrintfBuffer();
|
||||
|
||||
@@ -90,6 +90,8 @@ struct KernelImp : Kernel {
|
||||
|
||||
void setInlineSamplers();
|
||||
|
||||
void patchAndMoveToResidencyContainerPrivateSurface(NEO::GraphicsAllocation *alloc);
|
||||
|
||||
ze_result_t initialize(const ze_kernel_desc_t *desc);
|
||||
|
||||
const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; }
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
|
||||
#include "level_zero/core/test/unit_tests/white_box.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
}
|
||||
@@ -30,7 +32,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::addFlushRequiredCommand;
|
||||
using BaseClass::allocateKernelPrivateMemoryIfNeeded;
|
||||
using BaseClass::allocateOrReuseKernelPrivateMemoryIfNeeded;
|
||||
using BaseClass::appendBlitFill;
|
||||
using BaseClass::appendCopyImageBlit;
|
||||
using BaseClass::appendDispatchOffsetRegister;
|
||||
@@ -500,12 +502,14 @@ struct MockCommandList : public CommandList {
|
||||
};
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
public:
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::allocateOrReuseKernelPrivateMemoryIfNeeded;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::dcFlushSupport;
|
||||
using BaseClass::device;
|
||||
using BaseClass::ownedPrivateAllocations;
|
||||
|
||||
ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS,
|
||||
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
@@ -526,6 +530,19 @@ class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
uint64_t srcOffset,
|
||||
uint64_t size));
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemory,
|
||||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread,
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
|
||||
(kernel, sizePerHwThread, privateAllocsToReuse));
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,
|
||||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread),
|
||||
(kernel, sizePerHwThread));
|
||||
|
||||
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocationData(device, buffer, bufferSize, allowHostCopy);
|
||||
}
|
||||
@@ -610,6 +627,19 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
||||
checkAssertCalled++;
|
||||
}
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemory,
|
||||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread,
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
|
||||
(kernel, sizePerHwThread, privateAllocsToReuse));
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,
|
||||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread),
|
||||
(kernel, sizePerHwThread));
|
||||
|
||||
uint32_t checkAssertCalled = 0;
|
||||
bool callBaseExecute = false;
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit
|
||||
template <>
|
||||
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass = ::L0::ModuleImp;
|
||||
using BaseClass::allocatePrivateMemoryPerDispatch;
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::builtFromSPIRv;
|
||||
using BaseClass::copyPatchedSegments;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_ostime.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
@@ -3189,5 +3190,120 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen
|
||||
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily, typename BaseMock>
|
||||
class MockCommandListCoreFamilyIfPrivateNeeded : public BaseMock {
|
||||
public:
|
||||
void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, GraphicsAllocation *> &privateAllocsToReuse) override {
|
||||
passedContainer = &privateAllocsToReuse;
|
||||
BaseMock::allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, privateAllocsToReuse);
|
||||
}
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> *passedContainer;
|
||||
};
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenPrivatePerDispatchDisabledWhenAllocatingPrivateMemoryThenAllocateIsNotCalled, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = false;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->allocateOrReuseKernelPrivateMemoryCalled, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenPrivatePerDispatchEnabledWhenAllocatingPrivateMemoryThenAllocateIsCalled, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = true;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->allocateOrReuseKernelPrivateMemoryCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenPrivatePerDispatchEnabledWhenAllocatingPrivateMemoryThenCmdListMaprIsPassed, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = true;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->passedContainer, &commandList->ownedPrivateAllocations);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateListAndPrivatePerDispatchDisabledWhenAllocatingPrivateMemoryCalledThenAllocateIsNotCalled, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListImmediateHw<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = false;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->allocateOrReuseKernelPrivateMemoryCalled, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateListAndPrivatePerDispatchEnabledWhenAllocatingPrivateMemoryThenAllocateIsCalled, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListImmediateHw<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
||||
commandList->csr = &mockCommandStreamReceiver;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = true;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->allocateOrReuseKernelPrivateMemoryCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateListAndPrivatePerDispatchEnabledWhenAllocatingPrivateMemoryThenCsrMapIsPassed, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamilyIfPrivateNeeded<gfxCoreFamily, MockCommandListImmediateHw<gfxCoreFamily>>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeededCallBase = true;
|
||||
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
||||
commandList->csr = &mockCommandStreamReceiver;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockModule.allocatePrivateMemoryPerDispatch = true;
|
||||
commandList->allocateOrReuseKernelPrivateMemoryIfNeeded(&mockKernel, 0x1000);
|
||||
EXPECT_EQ(commandList->passedContainer, &mockCommandStreamReceiver.getOwnedPrivateAllocations());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCmdListWhenAllocateOrReuseCalledForSizeThatIsStoredInMapThenItsReused, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamily<gfxCoreFamily>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryCallBase = true;
|
||||
commandList->device = this->device;
|
||||
uint32_t sizePerHwThread = 0x1000;
|
||||
auto mockMem = std::make_unique<uint8_t[]>(0x1000);
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
|
||||
mockKernel.module = &mockModule;
|
||||
MockGraphicsAllocation mockGA(mockMem.get(), 2 * sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
|
||||
mapForReuse[sizePerHwThread] = &mockGA;
|
||||
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread, mapForReuse);
|
||||
EXPECT_EQ(mockKernel.residencyContainer[0], &mockGA);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenNewSizeDifferentThanSizesInMapWhenAllocatingPrivateMemoryThenNewAllocationIsCreated, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<MockCommandListCoreFamily<gfxCoreFamily>>();
|
||||
commandList->allocateOrReuseKernelPrivateMemoryCallBase = true;
|
||||
commandList->device = this->device;
|
||||
uint32_t sizePerHwThread = 0x1000;
|
||||
auto mockMem = std::make_unique<uint8_t[]>(0x1000);
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
|
||||
mockKernel.module = &mockModule;
|
||||
MockGraphicsAllocation mockGA(mockMem.get(), sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch / 2);
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
|
||||
mapForReuse[sizePerHwThread] = &mockGA;
|
||||
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread / 2, mapForReuse);
|
||||
EXPECT_NE(mockKernel.residencyContainer[0], &mockGA);
|
||||
neoDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.residencyContainer[0]);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1386,7 +1386,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
|
||||
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
|
||||
for (size_t i = 0; i < kernelsNb; i++) {
|
||||
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize;
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize + static_cast<uint32_t>(i * MemoryConstants::cacheLineSize);
|
||||
kernelDesc.kernelAttributes.flags.usesPrintf = false;
|
||||
kernelDesc.kernelMetadata.kernelName = kernelNames[i];
|
||||
}
|
||||
@@ -1405,8 +1405,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), i);
|
||||
kernels.push_back(this->createKernelWithName(kernelNames[i]));
|
||||
// This function is called by appendLaunchKernelWithParams
|
||||
pCommandList->allocateKernelPrivateMemoryIfNeeded(kernels[i].get(),
|
||||
kernels[i]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
pCommandList->allocateOrReuseKernelPrivateMemoryIfNeeded(kernels[i].get(),
|
||||
kernels[i]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), i + 1);
|
||||
}
|
||||
}
|
||||
@@ -1442,8 +1442,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
kernels.push_back(this->createKernelWithName(kernelNames[i]));
|
||||
// This function is called by appendLaunchKernelWithParams
|
||||
pCommandList->allocateKernelPrivateMemoryIfNeeded(kernels[i].get(),
|
||||
kernels[i]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
pCommandList->allocateOrReuseKernelPrivateMemoryIfNeeded(kernels[i].get(),
|
||||
kernels[i]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace ult {
|
||||
using AppendMemoryCopy = Test<DeviceFixture>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -33,7 +33,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion2DCalledThenSrcDstPointersArePageAligned, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -46,7 +46,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCo
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion3DCalledThenSrcDstPointersArePageAligned, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -59,7 +59,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCo
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion2DCalledThenSrcDstNotZeroOffsetsArePassed, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1233);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -71,7 +71,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemo
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstNotZeroOffsetsArePassed, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1233);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -83,7 +83,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemo
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndAlignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstZeroOffsetsArePassed, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
void *srcPtr = alignDown(reinterpret_cast<void *>(0x1233), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
|
||||
void *dstPtr = alignDown(reinterpret_cast<void *>(0x2345), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
|
||||
@@ -98,7 +98,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndDestinationPtrOffsetWhenMemor
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -132,7 +132,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndSourcePtrOffsetWhenMemoryCopy
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -166,7 +166,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndDestinationPtrOffsetWhenMemor
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -201,7 +201,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndSourcePtrOffsetWhenMemoryCopy
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -236,7 +236,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndDestinationPtrOffsetWhenMemor
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -270,7 +270,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndSourcePtrOffsetWhenMemoryCopy
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -303,7 +303,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListAndSourcePtrOffsetWhenMemoryCopy
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -610,7 +610,7 @@ HWTEST2_F(AppendMemoryCopy, givenSyncModeImmediateCommandListWhenAppendingMemory
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -646,7 +646,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -701,7 +701,7 @@ HWTEST2_F(AppendMemoryCopy,
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -759,7 +759,7 @@ HWTEST2_F(AppendMemoryCopy,
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
@@ -90,7 +90,7 @@ void testSingleTileAppendMemoryCopyThreeKernels(CopyTestInput &input, TestExpect
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -175,7 +175,7 @@ void testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input,
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -283,7 +283,7 @@ void testSingleTileAppendMemoryCopySingleKernel(CopyTestInput &input, TestExpect
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -355,7 +355,7 @@ void testSingleTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input,
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -509,7 +509,7 @@ void testMultiTileAppendMemoryCopyThreeKernels(CopyTestInput &input, TestExpecte
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -600,7 +600,7 @@ void testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input, T
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -726,7 +726,7 @@ void testMultiTileAppendMemoryCopySingleKernel(CopyTestInput &input, TestExpecte
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -802,7 +802,7 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
MockCommandListCoreFamily<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
@@ -389,6 +389,10 @@ void CommandStreamReceiver::cleanupResources() {
|
||||
getMemoryManager()->freeGraphicsMemory(globalStatelessHeapAllocation);
|
||||
globalStatelessHeapAllocation = nullptr;
|
||||
}
|
||||
for (auto &alloc : ownedPrivateAllocations) {
|
||||
getMemoryManager()->freeGraphicsMemory(alloc.second);
|
||||
}
|
||||
ownedPrivateAllocations.clear();
|
||||
}
|
||||
|
||||
WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) {
|
||||
@@ -567,6 +571,9 @@ ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() {
|
||||
ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() {
|
||||
return this->evictionAllocations;
|
||||
}
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> &CommandStreamReceiver::getOwnedPrivateAllocations() {
|
||||
return this->ownedPrivateAllocations;
|
||||
}
|
||||
|
||||
AubSubCaptureStatus CommandStreamReceiver::checkAndActivateAubSubCapture(const std::string &kernelName) { return {false, false}; }
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@ class CommandStreamReceiver {
|
||||
|
||||
ResidencyContainer &getResidencyAllocations();
|
||||
ResidencyContainer &getEvictionAllocations();
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> &getOwnedPrivateAllocations();
|
||||
|
||||
virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
|
||||
bool needsPageTableManager() const;
|
||||
@@ -460,6 +461,8 @@ class CommandStreamReceiver {
|
||||
|
||||
ResidencyContainer residencyAllocations;
|
||||
ResidencyContainer evictionAllocations;
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> ownedPrivateAllocations;
|
||||
|
||||
MutexType ownershipMutex;
|
||||
MutexType hostPtrSurfaceCreationMutex;
|
||||
ExecutionEnvironment &executionEnvironment;
|
||||
|
||||
@@ -35,6 +35,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
using CommandStreamReceiver::baseWaitFunction;
|
||||
using CommandStreamReceiver::checkForNewResources;
|
||||
using CommandStreamReceiver::checkImplicitFlushForGpuIdle;
|
||||
using CommandStreamReceiver::cleanupResources;
|
||||
using CommandStreamReceiver::CommandStreamReceiver;
|
||||
using CommandStreamReceiver::globalFenceAllocation;
|
||||
using CommandStreamReceiver::gpuHangCheckPeriod;
|
||||
|
||||
@@ -69,3 +69,13 @@
|
||||
} \
|
||||
return funcName##Result; \
|
||||
}
|
||||
|
||||
#define ADDMETHOD_VOIDRETURN(funcName, callBase, funcParams, invokeParams) \
|
||||
bool funcName##CallBase = callBase; \
|
||||
uint32_t funcName##Called = 0u; \
|
||||
void funcName funcParams override { \
|
||||
funcName##Called++; \
|
||||
if (funcName##CallBase) { \
|
||||
BaseClass::funcName invokeParams; \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -4485,3 +4485,13 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
EXPECT_EQ(nullptr, frontEndCmd);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCleanUpResourcesThenOwnedPrivateAllocationsAreFreed) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto mockGA = std::make_unique<MockGraphicsAllocation>();
|
||||
|
||||
auto mapForReuse = &csr.getOwnedPrivateAllocations();
|
||||
mapForReuse->insert({0x100, mockGA.release()});
|
||||
csr.cleanupResources();
|
||||
EXPECT_EQ(mapForReuse->size(), 0u);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user