diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index d6d4dbea2c..306971ba6d 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -4898,9 +4898,9 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, const void *argValue) { TRACING_ENTER(ClSetKernelArgSvmPointer, &kernel, &argIndex, &argValue); - MultiDeviceKernel *pMultiDeviceKernel = nullptr; + MultiDeviceKernel *multiDeviceKernel = nullptr; - auto retVal = validateObjects(withCastToInternal(kernel, &pMultiDeviceKernel)); + auto retVal = validateObjects(withCastToInternal(kernel, &multiDeviceKernel)); API_ENTER(&retVal); if (CL_SUCCESS != retVal) { @@ -4908,27 +4908,27 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, return retVal; } - if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) { + if (argIndex >= multiDeviceKernel->getKernelArgsNumber()) { retVal = CL_INVALID_ARG_INDEX; TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal); return retVal; } - const auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager(); + const auto svmManager = multiDeviceKernel->getContext().getSVMAllocsManager(); if (argValue != nullptr) { - if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 && - pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) { + if (multiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 && + multiDeviceKernel->getKernelArguments()[argIndex].value == argValue) { bool reuseFromCache = false; const auto allocationsCounter = svmManager->allocationsCounter.load(); if (allocationsCounter > 0) { - if (allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) { + if (allocationsCounter == multiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) { reuseFromCache = true; } else { const auto svmData = svmManager->getSVMAlloc(argValue); - if (svmData && pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) { + if (svmData && multiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) { reuseFromCache = true; - pMultiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter); + multiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter); } } if (reuseFromCache) { @@ -4938,7 +4938,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } } else { - if (pMultiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) { + if (multiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) { TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal); return CL_SUCCESS; } @@ -4946,7 +4946,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue); - for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { + for (const auto &pDevice : multiDeviceKernel->getDevices()) { const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; @@ -4955,8 +4955,8 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } - for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { - auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex()); + for (const auto &pDevice : multiDeviceKernel->getDevices()) { + auto pKernel = multiDeviceKernel->getKernel(pDevice->getRootDeviceIndex()); cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo() .kernelDescriptor.payloadMappings.explicitArgs[argIndex] .getTraits() @@ -4969,12 +4969,12 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } - MultiGraphicsAllocation *pSvmAllocs = nullptr; + MultiGraphicsAllocation *svmAllocs = nullptr; uint32_t allocId = 0u; if (argValue != nullptr) { auto svmData = svmManager->getSVMAlloc(argValue); if (svmData == nullptr) { - for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { + for (const auto &pDevice : multiDeviceKernel->getDevices()) { if (!pDevice->areSharedSystemAllocationsAllowed()) { retVal = CL_INVALID_ARG_VALUE; TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal); @@ -4982,12 +4982,12 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } } else { - pSvmAllocs = &svmData->gpuAllocations; + svmAllocs = &svmData->gpuAllocations; allocId = svmData->getAllocId(); } } - retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast(argValue), pSvmAllocs, allocId); + retVal = multiDeviceKernel->setArgSvmAlloc(argIndex, const_cast(argValue), svmAllocs, allocId); TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal); return retVal; } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 825480ae6f..6ee0ade950 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -447,6 +447,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf dispatchWalkerArgs.timestampPacketDependencies = ×tampPacketDependencies; dispatchWalkerArgs.currentTimestampPacketNodes = timestampPacketContainer.get(); dispatchWalkerArgs.commandType = commandType; + dispatchWalkerArgs.event = event; HardwareInterface::dispatchWalker( *this, diff --git a/opencl/source/command_queue/hardware_interface.h b/opencl/source/command_queue/hardware_interface.h index 1a15384fd4..6b5a0844b8 100644 --- a/opencl/source/command_queue/hardware_interface.h +++ b/opencl/source/command_queue/hardware_interface.h @@ -16,6 +16,7 @@ namespace NEO { class CommandQueue; class DispatchInfo; +class Event; class IndirectHeap; class Kernel; class LinearStream; @@ -37,6 +38,7 @@ struct HardwareInterfaceWalkerArgs { const Vec3 *numberOfWorkgroups = nullptr; const Vec3 *startOfWorkgroups = nullptr; KernelOperation *blockedCommandsData = nullptr; + Event *event = nullptr; size_t currentDispatchIndex = 0; size_t offsetInterfaceDescriptorTable = 0; PreemptionMode preemptionMode = PreemptionMode::Initial; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 86da4a3ede..c838cc4361 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -362,7 +362,7 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) { break; case SVM_OBJ: setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast(pSourceKernel->getKernelArgInfo(i).value), - pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags); + pSourceKernel->getKernelArgInfo(i).svmAllocation, pSourceKernel->getKernelArgInfo(i).svmFlags); break; case SVM_ALLOC_OBJ: setArgSvmAlloc(i, const_cast(pSourceKernel->getKernelArgInfo(i).value), @@ -881,8 +881,10 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } + if (svmPtr != nullptr) { + this->anyKernelArgumentUsingSystemMemory |= true; + } addAllocationToCacheFlushVector(argIndex, svmAlloc); - return CL_SUCCESS; } @@ -894,6 +896,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless); patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast(svmPtr)); + auto &kernelArgInfo = kernelArguments[argIndex]; + bool disableL3 = false; bool forceNonAuxMode = false; bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection); @@ -910,7 +914,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio forceNonAuxMode = true; } - bool argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable; + bool argWasUncacheable = kernelArgInfo.isStatelessUncacheable; bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false; statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); @@ -929,15 +933,21 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio } storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); - kernelArguments[argIndex].allocId = allocId; - kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u; - kernelArguments[argIndex].isSetToNullptr = nullptr == svmPtr; - if (!kernelArguments[argIndex].isPatched) { + kernelArgInfo.allocId = allocId; + kernelArgInfo.allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u; + kernelArgInfo.isSetToNullptr = nullptr == svmPtr; + if (!kernelArgInfo.isPatched) { patchedArgumentsNum++; - kernelArguments[argIndex].isPatched = true; + kernelArgInfo.isPatched = true; + } + if (!kernelArgInfo.isSetToNullptr) { + if (svmAlloc != nullptr) { + this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(svmAlloc->getAllocationType()); + } else { + this->anyKernelArgumentUsingSystemMemory |= true; + } } addAllocationToCacheFlushVector(argIndex, svmAlloc); - return CL_SUCCESS; } @@ -948,7 +958,7 @@ void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argO kernelArguments[argIndex].object = argObject; kernelArguments[argIndex].value = argValue; kernelArguments[argIndex].size = argSize; - kernelArguments[argIndex].pSvmAlloc = argSvmAlloc; + kernelArguments[argIndex].svmAllocation = argSvmAlloc; kernelArguments[argIndex].svmFlags = argSvmFlags; } @@ -1391,8 +1401,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, storeKernelArg(argIndex, BUFFER_OBJ, clMemObj, argVal, argSize); auto buffer = castToObject(clMemObj); - if (!buffer) + if (!buffer) { return CL_INVALID_MEM_OBJECT; + } + + auto gfxAllocationType = buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType(); + this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(gfxAllocationType); if (buffer->peekSharingHandler()) { usingSharedObjArgs = true; @@ -1449,7 +1463,6 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, } addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush); - return CL_SUCCESS; } else { storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); @@ -2237,4 +2250,11 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { return CL_SUCCESS; } +bool Kernel::graphicsAllocationTypeUseSystemMemory(AllocationType type) { + return (type == AllocationType::BUFFER_HOST_MEMORY) || + (type == AllocationType::EXTERNAL_HOST_PTR) || + (type == AllocationType::SVM_CPU) || + (type == AllocationType::SVM_ZERO_COPY); +} + } // namespace NEO diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 9269632529..75cf3c506d 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -59,16 +59,16 @@ class Kernel : public ReferenceTrackedObject { }; struct SimpleKernelArgInfo { - kernelArgType type; + cl_mem_flags svmFlags; void *object; const void *value; size_t size; - GraphicsAllocation *pSvmAlloc; - cl_mem_flags svmFlags; - bool isPatched = false; - bool isStatelessUncacheable = false; + GraphicsAllocation *svmAllocation; + kernelArgType type; uint32_t allocId; uint32_t allocIdMemoryManagerCounter; + bool isPatched = false; + bool isStatelessUncacheable = false; bool isSetToNullptr = false; }; @@ -405,89 +405,11 @@ class Kernel : public ReferenceTrackedObject { const std::map &getMemObjectsToMigrate() const { return migratableArgsMap; } ImplicitArgs *getImplicitArgs() const { return pImplicitArgs.get(); } const HardwareInfo &getHardwareInfo() const; + bool isAnyKernelArgumentUsingSystemMemory() const { + return anyKernelArgumentUsingSystemMemory; + } protected: - void - makeArgsResident(CommandStreamReceiver &commandStreamReceiver); - - void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc); - - void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); - - Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice); - void provideInitializationHints(); - - void markArgPatchedAndResolveArgs(uint32_t argIndex); - void resolveArgs(); - - void reconfigureKernel(); - bool hasDirectStatelessAccessToSharedBuffer() const; - bool hasDirectStatelessAccessToHostMemory() const; - bool hasIndirectStatelessAccessToHostMemory() const; - - void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); - bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; - - const ClDevice &getDevice() const { - return clDevice; - } - cl_int patchPrivateSurface(); - - bool containsStatelessWrites = true; - const ExecutionEnvironment &executionEnvironment; - Program *program; - ClDevice &clDevice; - const KernelInfo &kernelInfo; - - std::vector kernelArguments; - std::vector kernelArgHandlers; - std::vector kernelSvmGfxAllocations; - std::vector kernelUnifiedMemoryGfxAllocations; - - AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; - - bool usingSharedObjArgs = false; - bool usingImages = false; - bool usingImagesOnly = false; - bool auxTranslationRequired = false; - uint32_t patchedArgumentsNum = 0; - uint32_t startOffset = 0; - uint32_t statelessUncacheableArgsCount = 0; - int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; - KernelExecutionType executionType = KernelExecutionType::Default; - - std::vector patchInfoDataList; - std::unique_ptr imageTransformer; - std::map migratableArgsMap{}; - - bool specialPipelineSelectMode = false; - bool svmAllocationsRequireCacheFlush = false; - std::vector kernelArgRequiresCacheFlush; - UnifiedMemoryControls unifiedMemoryControls{}; - bool isUnifiedMemorySyncRequired = true; - bool debugEnabled = false; - uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; - - uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; - uint32_t maxKernelWorkGroupSize = 0; - uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation; - uint32_t *parentEventOffset = &Kernel::dummyPatchLocation; - uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation; - - size_t numberOfBindingTableStates = 0u; - size_t localBindingTableOffset = 0u; - - std::vector slmSizes; - uint32_t slmTotalSize = 0u; - - std::unique_ptr pSshLocal; - uint32_t sshLocalSize = 0u; - char *crossThreadData = nullptr; - uint32_t crossThreadDataSize = 0u; - - GraphicsAllocation *privateSurface = nullptr; - uint64_t privateSurfaceSize = 0u; - struct KernelConfig { Vec3 gws; Vec3 lws; @@ -523,15 +445,98 @@ class Kernel : public ReferenceTrackedObject { bool singleSubdevicePreferred = false; }; + Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice); + + void makeArgsResident(CommandStreamReceiver &commandStreamReceiver); + + void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc); + + void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); + + void provideInitializationHints(); + + void markArgPatchedAndResolveArgs(uint32_t argIndex); + void resolveArgs(); + + void reconfigureKernel(); + bool hasDirectStatelessAccessToSharedBuffer() const; + bool hasDirectStatelessAccessToHostMemory() const; + bool hasIndirectStatelessAccessToHostMemory() const; + + void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); + bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; + + const ClDevice &getDevice() const { + return clDevice; + } + cl_int patchPrivateSurface(); + bool hasTunningFinished(KernelSubmissionData &submissionData); bool hasRunFinished(TimestampPacketContainer *timestampContainer); + bool graphicsAllocationTypeUseSystemMemory(AllocationType type); + + UnifiedMemoryControls unifiedMemoryControls{}; + + std::map migratableArgsMap{}; std::unordered_map kernelSubmissionMap; - bool singleSubdevicePreferredInCurrentEnqueue = false; - bool kernelHasIndirectAccess = true; - MultiDeviceKernel *pMultiDeviceKernel = nullptr; + std::vector kernelArguments; + std::vector kernelArgHandlers; + std::vector kernelSvmGfxAllocations; + std::vector kernelUnifiedMemoryGfxAllocations; + std::vector patchInfoDataList; + std::vector kernelArgRequiresCacheFlush; + std::vector slmSizes; + + std::unique_ptr imageTransformer; + std::unique_ptr pSshLocal; std::unique_ptr pImplicitArgs = nullptr; + + uint64_t privateSurfaceSize = 0u; + + size_t numberOfBindingTableStates = 0u; + size_t localBindingTableOffset = 0u; + + const ExecutionEnvironment &executionEnvironment; + Program *program; + ClDevice &clDevice; + const KernelInfo &kernelInfo; + GraphicsAllocation *privateSurface = nullptr; + MultiDeviceKernel *pMultiDeviceKernel = nullptr; + + uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; + uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation; + uint32_t *parentEventOffset = &Kernel::dummyPatchLocation; + uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation; + char *crossThreadData = nullptr; + + AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; + KernelExecutionType executionType = KernelExecutionType::Default; + + int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + + uint32_t patchedArgumentsNum = 0; + uint32_t startOffset = 0; + uint32_t statelessUncacheableArgsCount = 0; + uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; + uint32_t maxKernelWorkGroupSize = 0; + uint32_t slmTotalSize = 0u; + uint32_t sshLocalSize = 0u; + uint32_t crossThreadDataSize = 0u; + + bool containsStatelessWrites = true; + bool usingSharedObjArgs = false; + bool usingImages = false; + bool usingImagesOnly = false; + bool auxTranslationRequired = false; + bool specialPipelineSelectMode = false; + bool svmAllocationsRequireCacheFlush = false; + bool isUnifiedMemorySyncRequired = true; + bool debugEnabled = false; + bool singleSubdevicePreferredInCurrentEnqueue = false; + bool kernelHasIndirectAccess = true; + bool anyKernelArgumentUsingSystemMemory = false; }; } // namespace NEO diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index cde4c20b06..38e22998e2 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -28,11 +28,13 @@ class MediaImageSetArgTest : public ClDeviceFixture, protected: void SetUp() override { ClDeviceFixture::SetUp(); + context = new MockContext(pClDevice); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; program = std::make_unique(toClDeviceVector(*pClDevice)); + program->setContext(context); pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; @@ -51,7 +53,7 @@ class MediaImageSetArgTest : public ClDeviceFixture, pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); - context = new MockContext(pClDevice); + srcImage = Image2dHelper<>::create(context); ASSERT_NE(nullptr, srcImage); } diff --git a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl index 5337ee95e4..b567ee4f5e 100644 --- a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl +++ b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl @@ -706,12 +706,16 @@ TEST(clUnifiedSharedMemoryTests, whenDeviceSupportSharedMemoryAllocationsAndSyst auto device = mockContext->getDevice(0u); REQUIRE_SVM_OR_SKIP(device); - MockKernelWithInternals mockKernel(*mockContext->getDevice(0u), mockContext.get(), true); + MockKernelWithInternals mockKernel(*device, mockContext.get(), true); auto systemPointer = reinterpret_cast(0xfeedbac); + auto kernel = mockKernel.mockMultiDeviceKernel->getKernel(device->getRootDeviceIndex()); + EXPECT_FALSE(kernel->isAnyKernelArgumentUsingSystemMemory()); + auto retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockMultiDeviceKernel, 0, systemPointer); EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_TRUE(kernel->isAnyKernelArgumentUsingSystemMemory()); //check if cross thread is updated auto crossThreadLocation = reinterpret_cast(ptrOffset(mockKernel.mockKernel->getCrossThreadData(), mockKernel.kernelInfo.argAsPtr(0).stateless)); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index 4779f9f982..db7f310a44 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -470,3 +470,151 @@ HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBuf ); EXPECT_EQ(CL_SUCCESS, retVal); } + +HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSvmMemcpyWhenSvmZeroCopyThenBuiltinKernelUsesSystemMemory) { + if (!pDevice->isFullRangeSvm()) { + return; + } + auto builtIns = new MockBuiltins(); + pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); + // retrieve original builder + auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + EBuiltInOps::CopyBufferToBuffer, + pCmdQ->getClDevice()); + ASSERT_NE(nullptr, &origBuilder); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + EBuiltInOps::CopyBufferToBuffer, + std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); + EXPECT_EQ(&origBuilder, oldBuilder.get()); + + srcSvmAlloc->setAllocationType(NEO::AllocationType::SVM_ZERO_COPY); + dstSvmAlloc->setAllocationType(NEO::AllocationType::SVM_ZERO_COPY); + + // call enqueue on mock builder + auto retVal = pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + dstSvmPtr, // void *dst_ptr + srcSvmPtr, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_event *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + EBuiltInOps::CopyBufferToBuffer, + std::move(oldBuilder)); + EXPECT_NE(nullptr, newBuilder); + + // check if original builder is restored correctly + auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + EBuiltInOps::CopyBufferToBuffer, + pCmdQ->getClDevice()); + EXPECT_EQ(&origBuilder, &restoredBuilder); + + // use mock builder to validate builder's input / output + auto mockBuilder = static_cast(newBuilder.get()); + + // validate builder's input - builtin ops + auto params = mockBuilder->getBuiltinOpParams(); + EXPECT_EQ(srcSvmPtr, params->srcPtr); + EXPECT_EQ(dstSvmPtr, params->dstPtr); + EXPECT_EQ(nullptr, params->srcMemObj); + EXPECT_EQ(nullptr, params->dstMemObj); + EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); + EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); + EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); + EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); + EXPECT_EQ(Vec3(256, 0, 0), params->size); + + // validate builder's output - multi dispatch info + auto mdi = mockBuilder->getMultiDispatchInfo(); + EXPECT_EQ(1u, mdi->size()); + + auto di = mdi->begin(); + size_t middleElSize = 4 * sizeof(uint32_t); + EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); + + auto kernel = mdi->begin()->getKernel(); + EXPECT_TRUE(kernel->isAnyKernelArgumentUsingSystemMemory()); +} + +HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSvmMemcpyWhenSvmGpuThenBuiltinKernelNotUsesSystemMemory) { + if (!pDevice->isFullRangeSvm()) { + return; + } + auto builtIns = new MockBuiltins(); + pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); + // retrieve original builder + auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + EBuiltInOps::CopyBufferToBuffer, + pCmdQ->getClDevice()); + ASSERT_NE(nullptr, &origBuilder); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + EBuiltInOps::CopyBufferToBuffer, + std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); + EXPECT_EQ(&origBuilder, oldBuilder.get()); + + srcSvmAlloc->setAllocationType(NEO::AllocationType::SVM_GPU); + dstSvmAlloc->setAllocationType(NEO::AllocationType::SVM_GPU); + + // call enqueue on mock builder + auto retVal = pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + dstSvmPtr, // void *dst_ptr + srcSvmPtr, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_event *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + EBuiltInOps::CopyBufferToBuffer, + std::move(oldBuilder)); + EXPECT_NE(nullptr, newBuilder); + + // check if original builder is restored correctly + auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + EBuiltInOps::CopyBufferToBuffer, + pCmdQ->getClDevice()); + EXPECT_EQ(&origBuilder, &restoredBuilder); + + // use mock builder to validate builder's input / output + auto mockBuilder = static_cast(newBuilder.get()); + + // validate builder's input - builtin ops + auto params = mockBuilder->getBuiltinOpParams(); + EXPECT_EQ(srcSvmPtr, params->srcPtr); + EXPECT_EQ(dstSvmPtr, params->dstPtr); + EXPECT_EQ(nullptr, params->srcMemObj); + EXPECT_EQ(nullptr, params->dstMemObj); + EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); + EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); + EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); + EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); + EXPECT_EQ(Vec3(256, 0, 0), params->size); + + // validate builder's output - multi dispatch info + auto mdi = mockBuilder->getMultiDispatchInfo(); + EXPECT_EQ(1u, mdi->size()); + + auto di = mdi->begin(); + size_t middleElSize = 4 * sizeof(uint32_t); + EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); + + auto kernel = mdi->begin()->getKernel(); + EXPECT_FALSE(kernel->isAnyKernelArgumentUsingSystemMemory()); +} diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index d7e8e58fd9..c333fc689c 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -888,14 +888,14 @@ TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThen TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); - GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); + GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context->getDevice(0), &retVal)); - kernel->setSvmKernelExecInfo(pSvmAlloc); + kernel->setSvmKernelExecInfo(svmAllocation); size_t offset = 0; size_t size = 1; @@ -916,7 +916,7 @@ TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); - GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); + GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); auto program = clUniquePtr(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); @@ -927,7 +927,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur kernel->getResidency(allSurfaces); EXPECT_EQ(1u, allSurfaces.size()); - kernel->setSvmKernelExecInfo(pSvmAlloc); + kernel->setSvmKernelExecInfo(svmAllocation); auto uEvent = makeReleaseable(); cl_event eventWaitList[] = {uEvent.get()}; diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index c21784549e..a7ff2d86a0 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -137,7 +137,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo BuiltinOpParams dc; dc.srcMemObj = srcBuffer; - dc.srcMemObj = dstBuffer; + dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 99209442a4..6302935e73 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -459,16 +459,21 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenContextProvidesProperHint) { + auto device = castToObject(devices[0]); + const ClDeviceInfo &devInfo = device->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); - auto pDevice = castToObject(devices[0]); - MockKernelWithInternals mockKernel(*pDevice, context); + MockKernelWithInternals mockKernel(*device, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); - MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmHelper(), true); + MockBuffer::setAllocationType(&gfxAllocation, device->getRootDeviceEnvironment().getGmmHelper(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); @@ -558,16 +563,21 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKernelObjectWithGraphicsAllocationAccessedStatefullyOnlyThenDontReportAnyHint) { + auto device = castToObject(devices[0]); + const ClDeviceInfo &devInfo = device->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); - auto pDevice = castToObject(devices[0]); - MockKernelWithInternals mockKernel(*pDevice, context); + MockKernelWithInternals mockKernel(*device, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); - MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmHelper(), true); + MockBuffer::setAllocationType(&gfxAllocation, device->getRootDeviceEnvironment().getGmmHelper(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); @@ -588,13 +598,17 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKerne } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenDontReportAnyHint) { - auto pDevice = castToObject(devices[0]); - MockKernelWithInternals mockKernel(*pDevice, context); + auto device = castToObject(devices[0]); + const ClDeviceInfo &devInfo = device->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + MockKernelWithInternals mockKernel(*device, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); - MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmHelper(), true); + MockBuffer::setAllocationType(&gfxAllocation, device->getRootDeviceEnvironment().getGmmHelper(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); @@ -614,8 +628,12 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall } TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNullGfxAllocationThenDontReportAnyHint) { - auto pDevice = castToObject(devices[0]); - MockKernelWithInternals mockKernel(*pDevice, context); + auto device = castToObject(devices[0]); + const ClDeviceInfo &devInfo = device->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + MockKernelWithInternals mockKernel(*device, context); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index 6d55004c2c..4a204a8467 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -832,6 +832,10 @@ TEST_F(DispatchInfoBuilderTest, GivenSplit3dWhenSettingDispatchGeometryThenMdiSi } TEST_F(DispatchInfoBuilderTest, WhenSettingKernelArgThenAddressesAreCorrect) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; @@ -957,6 +961,11 @@ TEST_F(DispatchInfoBuilderTest, GivenInvalidInputWhenSettingKernelArgThenInvalid } TEST_F(DispatchInfoBuilderTest, GivenNullKernelWhenSettingKernelArgThenSuccessIsReturned) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index d691456ea3..829cbbed4b 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -471,6 +471,11 @@ TEST_F(CloneKernelTest, givenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { } TEST_F(CloneKernelTest, givenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) { + const ClDeviceInfo &devInfo = device1->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); char memory[100] = {}; diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 1cd2d37258..c08de6b6ab 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -372,6 +372,11 @@ TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessT } TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); @@ -388,6 +393,11 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHost } TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); @@ -548,6 +558,11 @@ TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccess } TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); @@ -569,6 +584,11 @@ TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccess } TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); @@ -762,3 +782,49 @@ HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfac EXPECT_NE(0xdeadu, *patchLocation); } + +TEST_F(KernelArgBufferTest, givenBufferAsHostMemoryWhenSettingKernelArgThenKernelUsesSystemMemory) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER_HOST_MEMORY); + + auto memVal = (cl_mem)&buffer; + auto val = &memVal; + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory()); +} + +TEST_F(KernelArgBufferTest, givenBufferAsDeviceMemoryWhenSettingKernelArgThenKernelNotUsesSystemMemory) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER); + + auto memVal = (cl_mem)&buffer; + auto val = &memVal; + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); +} + +TEST_F(KernelArgBufferTest, givenBufferAsDeviceMemoryAndKernelIsAlreadySetToUseSystemWhenSettingKernelArgThenKernelUsesSystemMemory) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER); + + auto memVal = (cl_mem)&buffer; + auto val = &memVal; + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + pKernel->anyKernelArgumentUsingSystemMemory = true; + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), val); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory()); +} diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index c1f2fe7923..9c48a57d44 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -9,6 +9,7 @@ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" +#include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" @@ -73,6 +74,10 @@ class KernelArgSvmFixture : public ContextFixture, public ClDeviceFixture { typedef Test KernelArgSvmTest; TEST_F(KernelArgSvmTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } char *svmPtr = new char[256]; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); @@ -86,6 +91,10 @@ TEST_F(KernelArgSvmTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect } HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } char *svmPtr = new char[256]; pKernelInfo->argAsPtr(0).bindful = 0; @@ -106,6 +115,10 @@ HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsA } TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); @@ -121,6 +134,10 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreS } TEST_F(KernelArgSvmTest, GivenSvmAllocWithUncacheableWhenSettingKernelArgThenKernelHasUncacheableArgs) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } auto svmPtr = std::make_unique(256); MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); @@ -133,6 +150,10 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithUncacheableWhenSettingKernelArgThenKer } TEST_F(KernelArgSvmTest, GivenSvmAllocWithoutUncacheableAndKenelWithUncachebleArgWhenSettingKernelArgThenKernelDoesNotHaveUncacheableArgs) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } auto svmPtr = std::make_unique(256); MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); @@ -150,6 +171,10 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithoutUncacheableAndKenelWithUncachebleAr } HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); @@ -172,6 +197,10 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg } HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThenProperSvmAddressIsPatched) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } std::unique_ptr svmPtr(new char[256]); auto offsetedPtr = svmPtr.get() + 4; @@ -191,6 +220,11 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen } HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + this->pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; auto systemPointer = reinterpret_cast(0xfeedbac); @@ -211,11 +245,21 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr } TEST_F(KernelArgSvmTest, WhenSettingKernelArgImmediateThenInvalidArgValueErrorIsReturned) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + auto retVal = pKernel->setArgImmediate(0, 256, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); @@ -264,6 +308,11 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { } TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + std::vector svmPtr; svmPtr.resize(256); @@ -353,6 +402,11 @@ using SetArgHandlers = ::testing::TypesgetDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); @@ -413,6 +467,11 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -428,6 +487,11 @@ TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpe } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -443,6 +507,11 @@ TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectA } TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -458,6 +527,11 @@ TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotEx } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -472,6 +546,11 @@ TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThen } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -486,6 +565,11 @@ TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoTh } TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); @@ -500,6 +584,11 @@ TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernel } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvmArgUseGpuAddress) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + char svmPtr[256]; pKernelInfo->argAsPtr(0).bufferOffset = 0u; @@ -515,6 +604,11 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvm } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBufferOffsetWithGpuAddress) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + std::vector svmPtr; svmPtr.resize(256); @@ -534,3 +628,82 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBuff EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } + +TEST_F(KernelArgSvmTest, GivenZeroCopySvmPtrWhenSettingKernelArgThenKernelUsesSystemMemory) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + + void *alloc = pContext->getSVMAllocsManager()->createSVMAlloc( + 4096, + MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_ONLY), + pContext->getRootDeviceIndices(), + pContext->getDeviceBitfields()); + + auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(alloc); + auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(*pContext->getRootDeviceIndices().begin()); + gpuAllocation->setAllocationType(NEO::AllocationType::SVM_ZERO_COPY); + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + auto retVal = pKernel->setArgSvmAlloc(0, alloc, gpuAllocation, 0u); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + pContext->getSVMAllocsManager()->freeSVMAlloc(alloc); +} + +TEST_F(KernelArgSvmTest, GivenGpuSvmPtrWhenSettingKernelArgThenKernelNotUsesSystemMemory) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + + void *alloc = pContext->getSVMAllocsManager()->createSVMAlloc( + 4096, + MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_ONLY), + pContext->getRootDeviceIndices(), + pContext->getDeviceBitfields()); + + auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(alloc); + auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(*pContext->getRootDeviceIndices().begin()); + gpuAllocation->setAllocationType(NEO::AllocationType::SVM_GPU); + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + auto retVal = pKernel->setArgSvmAlloc(0, alloc, gpuAllocation, 0u); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + pContext->getSVMAllocsManager()->freeSVMAlloc(alloc); +} + +TEST_F(KernelArgSvmTest, GivenGpuSvmPtrAndKernelIsAlreadySetToUseSystemWhenSettingKernelArgThenKernelUsesSystemMemory) { + const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + + void *alloc = pContext->getSVMAllocsManager()->createSVMAlloc( + 4096, + MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_ONLY), + pContext->getRootDeviceIndices(), + pContext->getDeviceBitfields()); + + auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(alloc); + auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(*pContext->getRootDeviceIndices().begin()); + gpuAllocation->setAllocationType(NEO::AllocationType::SVM_GPU); + + EXPECT_FALSE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + pKernel->anyKernelArgumentUsingSystemMemory = true; + + auto retVal = pKernel->setArgSvmAlloc(0, alloc, gpuAllocation, 0u); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pKernel->isAnyKernelArgumentUsingSystemMemory()); + + pContext->getSVMAllocsManager()->freeSVMAlloc(alloc); +} diff --git a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp index 2ea3e09f19..583af32b7b 100644 --- a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp @@ -65,6 +65,10 @@ TEST_F(PatchedKernelTest, givenKernelWithoutAllArgsSetWhenIsPatchedIsCalledThenR } TEST_F(PatchedKernelTest, givenArgSvmAllocWhenArgIsSetThenArgIsPatched) { + const ClDeviceInfo &devInfo = device->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } EXPECT_FALSE(kernel->getKernelArguments()[0].isPatched); kernel->setArgSvmAlloc(0, nullptr, nullptr, 0u); EXPECT_TRUE(kernel->getKernelArguments()[0].isPatched); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 9864d26b11..ebd42c725f 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -67,24 +67,24 @@ class KernelTests : public ProgramFromBinaryFixture { ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel - pKernel = Kernel::create( + kernel = Kernel::create( pProgram, pProgram->getKernelInfoForKernel(kernelName), *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_NE(nullptr, pKernel); + ASSERT_NE(nullptr, kernel); } void TearDown() override { - delete pKernel; - pKernel = nullptr; + delete kernel; + kernel = nullptr; knownSource.reset(); ProgramFromBinaryFixture::TearDown(); } - MockKernel *pKernel = nullptr; + MockKernel *kernel = nullptr; cl_int retVal = CL_SUCCESS; }; @@ -100,15 +100,15 @@ TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) { } TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) { - EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); - EXPECT_EQ(pKernel->getKernelInfo().heapInfo.KernelHeapSize, pKernel->getKernelHeapSize()); + EXPECT_EQ(kernel->getKernelInfo().heapInfo.pKernelHeap, kernel->getKernelHeap()); + EXPECT_EQ(kernel->getKernelInfo().heapInfo.KernelHeapSize, kernel->getKernelHeapSize()); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( 0, 0, nullptr, @@ -121,7 +121,7 @@ TEST_F(KernelTests, GivenInvalidParametersWhenGettingInfoThenValueSizeRetIsNotUp size_t paramValueSizeRet = 0x1234; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( 0, 0, nullptr, @@ -138,7 +138,7 @@ TEST_F(KernelTests, GivenKernelFunctionNameWhenGettingInfoThenKernelFunctionName size_t paramValueSizeRet = 0; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, nullptr, @@ -152,7 +152,7 @@ TEST_F(KernelTests, GivenKernelFunctionNameWhenGettingInfoThenKernelFunctionName // get the name paramValueSize = paramValueSizeRet; - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, paramValue, @@ -170,11 +170,11 @@ TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinary size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; - const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); + const char *pKernelData = reinterpret_cast(kernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, nullptr, @@ -187,7 +187,7 @@ TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinary // get kernel binary paramValueSize = paramValueSizeRet; - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, paramValue, @@ -206,7 +206,7 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, ¶mValue, @@ -215,7 +215,7 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs EXPECT_EQ(CL_SUCCESS, retVal); auto gmmHelper = pDevice->getGmmHelper(); - auto expectedGpuAddress = gmmHelper->decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); + auto expectedGpuAddress = gmmHelper->decanonize(kernel->getKernelInfo().kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } @@ -227,7 +227,7 @@ TEST_F(KernelTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKernelArgsIsRet size_t paramValueSizeRet = 0; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, ¶mValue, @@ -246,7 +246,7 @@ TEST_F(KernelTests, GivenKernelProgramWhenGettingInfoThenProgramIsReturned) { cl_program prog = pProgram; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, ¶mValue, @@ -265,7 +265,7 @@ TEST_F(KernelTests, GivenKernelContextWhenGettingInfoThenKernelContextIsReturned cl_context context = pContext; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( paramName, paramValueSize, ¶mValue, @@ -283,9 +283,9 @@ TEST_F(KernelTests, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGrou size_t paramValueSizeRet = 0; auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1; - pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); + kernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); - retVal = pKernel->getWorkGroupInfo( + retVal = kernel->getWorkGroupInfo( paramName, paramValueSize, ¶mValue, @@ -302,7 +302,7 @@ TEST_F(KernelTests, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenC size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; - retVal = pKernel->getWorkGroupInfo( + retVal = kernel->getWorkGroupInfo( paramName, paramValueSize, ¶mValue, @@ -368,7 +368,7 @@ TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhe TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0x1234u; - retVal = pKernel->getWorkGroupInfo( + retVal = kernel->getWorkGroupInfo( 0, 0, nullptr, @@ -379,15 +379,15 @@ TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValu } TEST_F(KernelTests, WhenIsSingleSubdevicePreferredIsCalledThenCorrectValuesAreReturned) { - std::unique_ptr pKernel{MockKernel::create(pClDevice->getDevice(), pProgram)}; + std::unique_ptr kernel{MockKernel::create(pClDevice->getDevice(), pProgram)}; for (auto usesSyncBuffer : ::testing::Bool()) { - pKernel->getAllocatedKernelInfo()->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = usesSyncBuffer; + kernel->getAllocatedKernelInfo()->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = usesSyncBuffer; for (auto singleSubdevicePreferredInCurrentEnqueue : ::testing::Bool()) { - pKernel->singleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue; + kernel->singleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue; - EXPECT_EQ(usesSyncBuffer, pKernel->usesSyncBuffer()); + EXPECT_EQ(usesSyncBuffer, kernel->usesSyncBuffer()); auto expectedSingleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue || usesSyncBuffer; - EXPECT_EQ(expectedSingleSubdevicePreferredInCurrentEnqueue, pKernel->isSingleSubdevicePreferred()); + EXPECT_EQ(expectedSingleSubdevicePreferredInCurrentEnqueue, kernel->isSingleSubdevicePreferred()); } } } @@ -417,7 +417,7 @@ TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKerne auto &kernelInfo = pProgram->getKernelInfoForKernel("test"); // create a kernel - auto pKernel = Kernel::create( + auto kernel = Kernel::create( pProgram, kernelInfo, *pClDevice, @@ -429,7 +429,7 @@ TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKerne size_t paramValueSizeRet = 0; // get size - retVal = pKernel->getInfo( + retVal = kernel->getInfo( CL_KERNEL_NUM_ARGS, sizeof(cl_uint), ¶mValue, @@ -439,7 +439,7 @@ TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKerne EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(3u, paramValue); - delete pKernel; + delete kernel; } TEST_F(KernelFromBinaryTests, WhenRegularKernelIsCreatedThenItIsNotBuiltIn) { @@ -456,21 +456,21 @@ TEST_F(KernelFromBinaryTests, WhenRegularKernelIsCreatedThenItIsNotBuiltIn) { auto &kernelInfo = pProgram->getKernelInfoForKernel("simple_kernel_0"); // create a kernel - auto pKernel = Kernel::create( + auto kernel = Kernel::create( pProgram, kernelInfo, *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_NE(nullptr, pKernel); + ASSERT_NE(nullptr, kernel); // get builtIn property - bool isBuiltIn = pKernel->isBuiltIn; + bool isBuiltIn = kernel->isBuiltIn; EXPECT_FALSE(isBuiltIn); - delete pKernel; + delete kernel; } TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreatedThenArgumentIsMarkedAsReadOnly) { @@ -597,8 +597,8 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); // Test it auto executionEnvironment = pDevice->getExecutionEnvironment(); @@ -607,13 +607,13 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda csr->residency.clear(); EXPECT_EQ(0u, csr->residency.size()); - pKernel->makeResident(*csr.get()); + kernel->makeResident(*csr.get()); EXPECT_EQ(1u, csr->residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations(), true); EXPECT_EQ(0u, csr->residency.size()); - delete pKernel; + delete kernel; } TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWhenKernelIsBeingDestroyedThenAllocationIsAddedToDeferredFreeList) { @@ -624,18 +624,18 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - pKernel->initialize(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + kernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); - auto privateSurface = pKernel->privateSurface; + auto privateSurface = kernel->privateSurface; auto tagAddress = csr.getTagAddress(); privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); - pKernel.reset(nullptr); + kernel.reset(nullptr); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), privateSurface); @@ -653,14 +653,14 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); if (MemoryManagement::nonfailingAllocation == failureIndex) { - EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); + EXPECT_EQ(CL_SUCCESS, kernel->initialize()); } else { - EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); + EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->initialize()); } - delete pKernel; + delete kernel; }; auto f = new MemoryManagementFixture(); f->SetUp(); @@ -681,13 +681,13 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_TRUE(pKernel->privateSurface->is32BitAllocation()); + EXPECT_TRUE(kernel->privateSurface->is32BitAllocation()); - delete pKernel; + delete kernel; } } @@ -702,28 +702,28 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, kernel->getSurfaceStateHeapSize()); - auto bufferAddress = pKernel->privateSurface->getGpuAddress(); + auto bufferAddress = kernel->privateSurface->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(kernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); - delete pKernel; + delete kernel; } TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsNotPatched) { @@ -742,15 +742,15 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, kernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, kernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelPrivateSurfaceTest, givenNullDataParameterStreamWhenGettingConstantBufferSizeThenZeroIsReturned) { @@ -773,12 +773,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; - EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); + EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { @@ -788,12 +788,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; - EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); + EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { @@ -803,12 +803,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; - EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); + EXPECT_EQ(CL_OUT_OF_RESOURCES, kernel->initialize()); } TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { @@ -826,16 +826,16 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - pKernel->isBuiltIn = true; + kernel->isBuiltIn = true; - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); + EXPECT_EQ(bufferAddress, *(uint64_t *)kernel->getCrossThreadData()); program.setGlobalSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithBaseAddressOffset) { @@ -853,15 +853,15 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); + EXPECT_EQ(bufferAddress, *(uint64_t *)kernel->getCrossThreadData()); program.setGlobalSurface(nullptr); - delete pKernel; + delete kernel; } HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsPatchedWithCpuAddress) { @@ -883,27 +883,27 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, kernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(kernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setGlobalSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsNotPatched) { @@ -921,15 +921,15 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, kernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, kernel->getSurfaceStateHeap()); program.setGlobalSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithCpuAddress) { @@ -946,16 +946,16 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - pKernel->isBuiltIn = true; + kernel->isBuiltIn = true; - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); + EXPECT_EQ(bufferAddress, *(uint64_t *)kernel->getCrossThreadData()); program.setConstantSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithBaseAddressOffset) { @@ -973,15 +973,15 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); + EXPECT_EQ(bufferAddress, *(uint64_t *)kernel->getCrossThreadData()); program.setConstantSurface(nullptr); - delete pKernel; + delete kernel; } HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithCpuAddress) { @@ -1000,27 +1000,27 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, kernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(kernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setConstantSurface(nullptr); - delete pKernel; + delete kernel; } TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsNotPatched) { @@ -1038,15 +1038,15 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); + MockKernel *kernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, kernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, kernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); - delete pKernel; + delete kernel; } typedef Test KernelResidencyTest; @@ -1073,14 +1073,14 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + kernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); - pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); + kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(kernel->getKernelInfo().getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -1100,16 +1100,16 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); - pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); + kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface)); // check getResidency as well std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces); + kernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv = std::make_unique(); mockCsrExecEnv->prepareRootDeviceEnvironments(1); mockCsrExecEnv->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); @@ -1141,15 +1141,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); - pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); + kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces); + kernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv = std::make_unique(); mockCsrExecEnv->prepareRootDeviceEnvironments(1); mockCsrExecEnv->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); @@ -1314,7 +1314,13 @@ HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenM EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + mockKernel.mockKernel->kernelArguments[0] = { + sizeof(uintptr_t), + gpuAllocation, + unifiedMemoryAllocation, + 4096u, + gpuAllocation, + Kernel::kernelArgType::SVM_ALLOC_OBJ}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); mockKernel.mockKernel->makeResident(commandStreamReceiver); @@ -1345,7 +1351,13 @@ HWTEST_F(KernelResidencyTest, givenSvmArgWhenKernelDoesNotRequireUnifiedMemorySy mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + mockKernel.mockKernel->kernelArguments[0] = { + sizeof(uintptr_t), + gpuAllocation, + unifiedMemoryAllocation, + 4096u, + gpuAllocation, + Kernel::kernelArgType::SVM_ALLOC_OBJ}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); std::vector residencySurfaces; mockKernel.mockKernel->getResidency(residencySurfaces); @@ -1368,7 +1380,13 @@ HWTEST_F(KernelResidencyTest, givenSvmArgWhenKernelRequireUnifiedMemorySyncThenS mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + mockKernel.mockKernel->kernelArguments[0] = { + sizeof(uintptr_t), + gpuAllocation, + unifiedMemoryAllocation, + 4096u, + gpuAllocation, + Kernel::kernelArgType::SVM_ALLOC_OBJ}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true); std::vector residencySurfaces; mockKernel.mockKernel->getResidency(residencySurfaces); @@ -1393,7 +1411,13 @@ HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryRequiredMemSyncWhenMakeRes auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + mockKernel.mockKernel->kernelArguments[0] = { + sizeof(uintptr_t), + gpuAllocation, + unifiedMemoryAllocation, + 4096u, + gpuAllocation, + Kernel::kernelArgType::SVM_ALLOC_OBJ}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true); mockKernel.mockKernel->makeResident(commandStreamReceiver); @@ -1668,10 +1692,10 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_FALSE(pKernel->getHasIndirectAccess()); + EXPECT_FALSE(kernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -1693,10 +1717,10 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_TRUE(pKernel->getHasIndirectAccess()); + EXPECT_TRUE(kernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -1718,10 +1742,10 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_TRUE(pKernel->getHasIndirectAccess()); + EXPECT_TRUE(kernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -1743,10 +1767,10 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndir MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); - EXPECT_TRUE(pKernel->getHasIndirectAccess()); + EXPECT_TRUE(kernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -2045,11 +2069,11 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor auto program = std::make_unique(toClDeviceVector(*pClDevice)); program->setContext(&context); - std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); - pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + kernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); + kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(imageNV12->isImageFromImage()); EXPECT_TRUE(imageY->isImageFromImage()); @@ -2066,17 +2090,17 @@ struct KernelExecutionEnvironmentTest : public Test { pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); } void TearDown() override { - delete pKernel; + delete kernel; ClDeviceFixture::TearDown(); } - MockKernel *pKernel; + MockKernel *kernel; std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; @@ -2995,25 +3019,25 @@ TEST(ArgTypeTraits, GivenDefaultInitializedArgTypeMetadataThenAddressSpaceIsGlob TEST_F(KernelTests, givenKernelWithSimdGreaterThan1WhenKernelCreatedThenMaxWorgGroupSizeEqualDeviceProperty) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - std::unique_ptr pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; - EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), kernelMaxWorkGroupSize); + EXPECT_EQ(kernel->getMaxKernelWorkGroupSize(), kernelMaxWorkGroupSize); } TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSizeExualMaxHwThreadsPerWG) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; - std::unique_ptr pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto deviceInfo = pClDevice->getDevice().getDeviceInfo(); - auto &hwInfoConfig = *HwInfoConfig::get(pKernel->getHardwareInfo().platform.eProductFamily); - auto maxThreadsPerWG = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); + auto &hwInfoConfig = *HwInfoConfig::get(kernel->getHardwareInfo().platform.eProductFamily); + auto maxThreadsPerWG = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(kernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); - EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize); - EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG); + EXPECT_LT(kernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize); + EXPECT_EQ(kernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG); } struct KernelLargeGrfTests : Test { @@ -3082,18 +3106,18 @@ HWTEST2_F(KernelConstantSurfaceTest, givenKernelWithConstantSurfaceWhenKernelIsC program.setConstantSurface(&gfxAlloc); // create kernel - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(kernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto actualMocs = surfaceState->getMemoryObjectControlState(); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); @@ -3233,3 +3257,25 @@ TEST_F(KernelImplicitArgsTest, givenKernelWithoutImplicitArgsWhenSettingKernelPa EXPECT_EQ(nullptr, kernel.getImplicitArgs()); } + +TEST_F(KernelTests, GivenCorrectAllocationTypeThenFunctionCheckingSystemMemoryReturnsTrue) { + std::vector systemMemoryAllocationType = { + NEO::AllocationType::BUFFER_HOST_MEMORY, + NEO::AllocationType::EXTERNAL_HOST_PTR, + NEO::AllocationType::SVM_CPU, + NEO::AllocationType::SVM_ZERO_COPY}; + + for (uint32_t allocationTypeIndex = static_cast(NEO::AllocationType::UNKNOWN); + allocationTypeIndex < static_cast(NEO::AllocationType::COUNT); + allocationTypeIndex++) { + auto currentAllocationType = static_cast(allocationTypeIndex); + bool ret = kernel->graphicsAllocationTypeUseSystemMemory(currentAllocationType); + if (std::find(systemMemoryAllocationType.begin(), + systemMemoryAllocationType.end(), + currentAllocationType) != systemMemoryAllocationType.end()) { + EXPECT_TRUE(ret); + } else { + EXPECT_FALSE(ret); + } + } +} diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index 8dd304a9c2..bf92d2f4b7 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -267,13 +267,13 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); - GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - EXPECT_NE(nullptr, pSvmAlloc); + GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); + EXPECT_NE(nullptr, svmAllocation); retVal = pKernel->setArgSvmAlloc( 0, ptrSVM, - pSvmAlloc, + svmAllocation, 0u); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 71b0865d96..0532a62a02 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -97,12 +97,14 @@ class MockKernel : public Kernel { public: using Kernel::addAllocationToCacheFlushVector; using Kernel::allBufferArgsStateful; + using Kernel::anyKernelArgumentUsingSystemMemory; using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; using Kernel::dataParameterSimdSize; using Kernel::executionType; using Kernel::getDevice; using Kernel::getHardwareInfo; + using Kernel::graphicsAllocationTypeUseSystemMemory; using Kernel::hasDirectStatelessAccessToHostMemory; using Kernel::hasDirectStatelessAccessToSharedBuffer; using Kernel::hasIndirectStatelessAccessToHostMemory;