mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
[5/n] Unified Shared Memory
- Add kernel support for host um allocations - During make resident call choose only appropriate resources for residency - change resource types to binary bit friendly values - enhance memory manager to only make resident compatible types Related-To: NEO-3148 Change-Id: Ic711a4425a0d8db151a335e0357440312dc09b7e Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
cfdade26c2
commit
2e8e625024
@@ -3987,9 +3987,10 @@ cl_int CL_API_CALL clSetKernelExecInfo(cl_kernel kernel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (paramName) {
|
switch (paramName) {
|
||||||
case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: {
|
case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL:
|
||||||
|
case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: {
|
||||||
auto propertyValue = *reinterpret_cast<const cl_bool *>(paramValue);
|
auto propertyValue = *reinterpret_cast<const cl_bool *>(paramValue);
|
||||||
pKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, propertyValue);
|
pKernel->setUnifiedMemoryProperty(paramName, propertyValue);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case CL_KERNEL_EXEC_INFO_SVM_PTRS:
|
case CL_KERNEL_EXEC_INFO_SVM_PTRS:
|
||||||
|
|||||||
@@ -944,6 +944,11 @@ void Kernel::clearSvmKernelExecInfo() {
|
|||||||
void Kernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) {
|
void Kernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) {
|
||||||
if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL) {
|
if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL) {
|
||||||
this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = infoValue;
|
this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = infoValue;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL) {
|
||||||
|
this->unifiedMemoryControls.indirectHostAllocationsAllowed = infoValue;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1009,8 +1014,9 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
|||||||
|
|
||||||
gtpinNotifyMakeResident(this, &commandStreamReceiver);
|
gtpinNotifyMakeResident(this, &commandStreamReceiver);
|
||||||
|
|
||||||
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) {
|
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||||
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver);
|
unifiedMemoryControls.indirectHostAllocationsAllowed) {
|
||||||
|
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2224,4 +2230,15 @@ void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocati
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
uint32_t Kernel::UnifiedMemoryControls::generateMask() {
|
||||||
|
uint32_t resourceMask = 0u;
|
||||||
|
if (this->indirectHostAllocationsAllowed) {
|
||||||
|
resourceMask |= InternalMemoryType::HOST_UNIFIED_MEMORY;
|
||||||
|
}
|
||||||
|
if (this->indirectDeviceAllocationsAllowed) {
|
||||||
|
resourceMask |= InternalMemoryType::DEVICE_UNIFIED_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return resourceMask;
|
||||||
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -62,7 +62,9 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct UnifiedMemoryControls {
|
struct UnifiedMemoryControls {
|
||||||
|
uint32_t generateMask();
|
||||||
bool indirectDeviceAllocationsAllowed = false;
|
bool indirectDeviceAllocationsAllowed = false;
|
||||||
|
bool indirectHostAllocationsAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
||||||
|
|||||||
@@ -68,10 +68,10 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP
|
|||||||
return &iter->second;
|
return &iter->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) {
|
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
|
||||||
std::unique_lock<SpinLock> lock(mtx);
|
std::unique_lock<SpinLock> lock(mtx);
|
||||||
for (auto &allocation : this->SVMAllocs.allocations) {
|
for (auto &allocation : this->SVMAllocs.allocations) {
|
||||||
if (allocation.second.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) {
|
if (allocation.second.memoryType & requestedTypesMask) {
|
||||||
commandStreamReceiver.makeResident(*allocation.second.gpuAllocation);
|
commandStreamReceiver.makeResident(*allocation.second.gpuAllocation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,11 +18,11 @@ class Device;
|
|||||||
class GraphicsAllocation;
|
class GraphicsAllocation;
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
|
|
||||||
enum class InternalMemoryType : uint32_t {
|
enum InternalMemoryType : uint32_t {
|
||||||
SVM = 0,
|
NOT_SPECIFIED = 0b0,
|
||||||
DEVICE_UNIFIED_MEMORY,
|
SVM = 0b1,
|
||||||
HOST_UNIFIED_MEMORY,
|
DEVICE_UNIFIED_MEMORY = 0b10,
|
||||||
NOT_SPECIFIED
|
HOST_UNIFIED_MEMORY = 0b100,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SvmAllocationData {
|
struct SvmAllocationData {
|
||||||
@@ -89,7 +89,7 @@ class SVMAllocsManager {
|
|||||||
void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
|
void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
|
||||||
void removeSvmMapOperation(const void *regionSvmPtr);
|
void removeSvmMapOperation(const void *regionSvmPtr);
|
||||||
SvmMapOperation *getSvmMapOperation(const void *regionPtr);
|
SvmMapOperation *getSvmMapOperation(const void *regionPtr);
|
||||||
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver);
|
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties);
|
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties);
|
||||||
|
|||||||
@@ -61,4 +61,4 @@ void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -1067,7 +1067,7 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAll
|
|||||||
auto &residentAllocations = commandStreamReceiver.getResidencyAllocations();
|
auto &residentAllocations = commandStreamReceiver.getResidencyAllocations();
|
||||||
EXPECT_EQ(0u, residentAllocations.size());
|
EXPECT_EQ(0u, residentAllocations.size());
|
||||||
|
|
||||||
svmManager->makeInternalAllocationsResident(commandStreamReceiver);
|
svmManager->makeInternalAllocationsResident(commandStreamReceiver, InternalMemoryType::DEVICE_UNIFIED_MEMORY);
|
||||||
|
|
||||||
//only unified memory allocation is made resident
|
//only unified memory allocation is made resident
|
||||||
EXPECT_EQ(1u, residentAllocations.size());
|
EXPECT_EQ(1u, residentAllocations.size());
|
||||||
|
|||||||
@@ -1640,6 +1640,26 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAl
|
|||||||
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) {
|
||||||
|
MockKernelWithInternals mockKernel(*this->pDevice);
|
||||||
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
|
||||||
|
auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY));
|
||||||
|
auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY));
|
||||||
|
|
||||||
|
mockKernel.mockKernel->makeResident(this->pDevice->getCommandStreamReceiver());
|
||||||
|
EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size());
|
||||||
|
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true);
|
||||||
|
|
||||||
|
mockKernel.mockKernel->makeResident(this->pDevice->getCommandStreamReceiver());
|
||||||
|
EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size());
|
||||||
|
EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation));
|
||||||
|
|
||||||
|
svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
|
||||||
|
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelResidencyTest, givenKernelWhenSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) {
|
HWTEST_F(KernelResidencyTest, givenKernelWhenSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) {
|
||||||
MockKernelWithInternals mockKernel(*this->pDevice);
|
MockKernelWithInternals mockKernel(*this->pDevice);
|
||||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
@@ -1698,6 +1718,18 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
|
|||||||
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed);
|
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyIsCalledThenKernelControlIsChanged) {
|
||||||
|
MockKernelWithInternals mockKernel(*this->pDevice);
|
||||||
|
cl_bool enableIndirectHostAccess = CL_TRUE;
|
||||||
|
auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, status);
|
||||||
|
EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed);
|
||||||
|
enableIndirectHostAccess = CL_FALSE;
|
||||||
|
status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, status);
|
||||||
|
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
||||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||||
pKernelInfo->kernelArgInfo.resize(3);
|
pKernelInfo->kernelArgInfo.resize(3);
|
||||||
|
|||||||
Reference in New Issue
Block a user