Optimize zeMemGetAllocProperties.

- No functional changes only performance optimizations.
- add const modifiers
- add early returns
- remove not needed code
- do fast lookup for aligned pointers
- fix incorrect tests

+21% improvement in
./api_overhead_benchmark_l0 --test=GetMemoryProperties
--api=l0 --AmountOfUsmAllocations=100 --iterations=1000
+32% improvement in
./api_overhead_benchmark_l0 --test=GetMemoryProperties
--api=l0 --AmountOfUsmAllocations=1000 --iterations=1000

Related-To: NEO-7676

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2023-01-27 16:24:26 +00:00
committed by Compute-Runtime-Automation
parent 9080b0c109
commit 2f847aa7bb
4 changed files with 81 additions and 15 deletions

View File

@@ -1394,10 +1394,7 @@ HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenWriteBufferThenC
char *cpuPtr = static_cast<char *>(gpuAllocation->getUnderlyingBuffer());
auto gpuAddress = gpuAllocation->getGpuAddress();
void *gpuPtr = reinterpret_cast<void *>(gpuAddress);
char *shiftedPtr = cpuPtr + 0x10;
auto gmmHelper = mockContext.getDevice(0)->getGmmHelper();
auto canonizedGpuAddress = gmmHelper->canonize(reinterpret_cast<uint64_t>(shiftedPtr));
gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, canonizedGpuAddress);
cl_mem_flags flags = 0;
auto status = CL_INVALID_PLATFORM;
@@ -1408,7 +1405,7 @@ HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenWriteBufferThenC
myCmdQ.enqueueWriteBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr);
EXPECT_EQ(gpuPtr, myCmdQ.srcPtr);
canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
auto canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, canonizedGpuAddress);
delete buffer;
clMemFreeINTEL(&mockContext, sharedMemory);
@@ -1424,10 +1421,6 @@ HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenReadBufferThenCp
char *cpuPtr = static_cast<char *>(gpuAllocation->getUnderlyingBuffer());
auto gpuAddress = gpuAllocation->getGpuAddress();
void *gpuPtr = reinterpret_cast<void *>(gpuAddress);
char *shiftedPtr = cpuPtr + 0x10;
auto gmmHelper = mockContext.getDevice(0)->getGmmHelper();
auto canonizedGpuAddress = gmmHelper->canonize(reinterpret_cast<uint64_t>(shiftedPtr));
gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, canonizedGpuAddress);
cl_mem_flags flags = 0;
auto status = CL_INVALID_PLATFORM;
@@ -1438,7 +1431,8 @@ HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenReadBufferThenCp
myCmdQ.enqueueReadBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr);
EXPECT_EQ(gpuPtr, myCmdQ.dstPtr);
canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
auto gmmHelper = mockContext.getDevice(0)->getGmmHelper();
auto canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, canonizedGpuAddress);
delete buffer;
clMemFreeINTEL(&mockContext, sharedMemory);