diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 2145a87bcc..bafd8574b4 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -4790,7 +4790,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } - GraphicsAllocation *pSvmAlloc = nullptr; + MultiGraphicsAllocation *pSvmAllocs = nullptr; if (argValue != nullptr) { auto svmManager = pKernel->getContext().getSVMAllocsManager(); auto svmData = svmManager->getSVMAlloc(argValue); @@ -4803,11 +4803,11 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } } else { - pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pKernel->getDevices()[0]->getRootDeviceIndex()); + pSvmAllocs = &svmData->gpuAllocations; } } - retVal = pKernel->setArgSvmAlloc(argIndex, const_cast(argValue), pSvmAlloc); + retVal = pKernel->setArgMultiDeviceSvmAlloc(argIndex, const_cast(argValue), pSvmAllocs); TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } diff --git a/opencl/source/built_ins/builtins_dispatch_builder.cpp b/opencl/source/built_ins/builtins_dispatch_builder.cpp index f90d37216b..4cff48a7f6 100644 --- a/opencl/source/built_ins/builtins_dispatch_builder.cpp +++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp @@ -313,8 +313,9 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder { kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 1, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); // Set-up srcMemObj with pattern - auto graphicsAllocation = operationParams.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); - kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation, CL_MEM_READ_ONLY); + auto multiGraphicsAllocation = &operationParams.srcMemObj->getMultiGraphicsAllocation(); + auto graphicsAllocation = multiGraphicsAllocation->getDefaultGraphicsAllocation(); + kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), graphicsAllocation->getUnderlyingBuffer(), multiGraphicsAllocation, CL_MEM_READ_ONLY); // Set-up patternSizeInEls kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.srcMemObj->getSize())); diff --git a/opencl/source/kernel/CMakeLists.txt b/opencl/source/kernel/CMakeLists.txt index 20d9c2337e..3115e53296 100644 --- a/opencl/source/kernel/CMakeLists.txt +++ b/opencl/source/kernel/CMakeLists.txt @@ -16,6 +16,8 @@ set(RUNTIME_SRCS_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_objects_for_aux_translation.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/svm_object_arg.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/svm_object_arg.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL}) diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 47f074111a..f02e216fd9 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -42,6 +42,7 @@ #include "opencl/source/kernel/image_transformer.h" #include "opencl/source/kernel/kernel.inl" #include "opencl/source/kernel/kernel_info_cl.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/pipe.h" @@ -101,11 +102,14 @@ Kernel::~Kernel() { } for (uint32_t i = 0; i < patchedArgumentsNum; i++) { - if (SAMPLER_OBJ == getKernelArguments()[i].type) { - auto sampler = castToObject(kernelArguments.at(i).object); + if (SAMPLER_OBJ == kernelArguments[i].type) { + auto sampler = castToObject(kernelArguments[i].object); if (sampler) { sampler->decRefInternal(); } + } else if (SVM_ALLOC_OBJ == kernelArguments[i].type) { + auto svmObjectArg = reinterpret_cast(kernelArguments[i].object); + delete svmObjectArg; } } @@ -375,6 +379,7 @@ cl_int Kernel::initialize() { } auto numArgs = kernelInfo.kernelArgInfo.size(); kernelDeviceInfo.slmSizes.resize(numArgs); + kernelDeviceInfo.kernelArgRequiresCacheFlush.resize(numArgs); isDeviceInitialized.set(rootDeviceIndex); } @@ -388,7 +393,6 @@ cl_int Kernel::initialize() { auto numArgs = defaultKernelInfo.kernelArgInfo.size(); kernelArguments.resize(numArgs); kernelArgHandlers.resize(numArgs); - kernelArgRequiresCacheFlush.resize(numArgs); for (uint32_t i = 0; i < numArgs; ++i) { storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0); @@ -452,11 +456,11 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) { break; case SVM_OBJ: setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast(pSourceKernel->getKernelArgInfo(i).value), - pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags); + pSourceKernel->getKernelArgInfo(i).pSvmAllocs, pSourceKernel->getKernelArgInfo(i).svmFlags); break; case SVM_ALLOC_OBJ: - setArgSvmAlloc(i, const_cast(pSourceKernel->getKernelArgInfo(i).value), - (GraphicsAllocation *)pSourceKernel->getKernelArgInfo(i).object); + setArgMultiDeviceSvmAlloc(i, const_cast(pSourceKernel->getKernelArgInfo(i).value), + reinterpret_cast(pSourceKernel->getKernelArgInfo(i).object)->getMultiDeviceSvmAlloc()); break; default: setArg(i, pSourceKernel->getKernelArgInfo(i).size, pSourceKernel->getKernelArgInfo(i).value); @@ -926,37 +930,41 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap return ptrToPatch; } -cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) { - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - auto &kernelInfo = getKernelInfo(rootDeviceIndex); - void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex); +cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, const MultiGraphicsAllocation *svmAllocs, cl_mem_flags svmFlags) { setArgImmediate(argIndex, sizeof(void *), &svmPtr); + storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAllocs, svmFlags); - storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags); + std::bitset<64> isArgSet{}; - if (requiresSshForBuffers(rootDeviceIndex)) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); - Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0); + for (const auto &pClDevice : getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + if (isArgSet.test(rootDeviceIndex)) { + continue; + } + + auto svmAlloc = svmAllocs ? svmAllocs->getGraphicsAllocation(rootDeviceIndex) : nullptr; + auto &kernelInfo = getKernelInfo(rootDeviceIndex); + void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex); + + if (requiresSshForBuffers(rootDeviceIndex)) { + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0); + } + addAllocationToCacheFlushVector(argIndex, svmAlloc, rootDeviceIndex); + isArgSet.set(rootDeviceIndex); } if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } - addAllocationToCacheFlushVector(argIndex, svmAlloc); - return CL_SUCCESS; } - -cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { - DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc); - - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); +void Kernel::setArgSvmAllocForSingleDevice(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, const Device &device) { + auto rootDeviceIndex = device.getRootDeviceIndex(); auto &kernelInfo = getKernelInfo(rootDeviceIndex); const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); - void *ptrToPatch = patchBufferOffset(kernelArgInfo, svmPtr, svmAlloc, rootDeviceIndex); auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex), @@ -991,27 +999,62 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch()); allocSize -= offset; } - Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0); + Buffer::setSurfaceState(&device, surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0); } + addAllocationToCacheFlushVector(argIndex, svmAlloc, rootDeviceIndex); +} +cl_int Kernel::setArgMultiDeviceSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAllocs) { + DBG_LOG_INPUTS("setArgMultiDeviceSvmAlloc svm_allocs", svmAllocs); + if (kernelArguments[argIndex].object) { + delete reinterpret_cast(kernelArguments[argIndex].object); + } + storeKernelArg(argIndex, SVM_ALLOC_OBJ, new SvmObjectArg(svmAllocs), svmPtr, sizeof(uintptr_t)); + + std::bitset<64> isArgSet{}; + + for (const auto &pClDevice : getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + if (isArgSet.test(rootDeviceIndex)) { + continue; + } + auto pSvmAlloc = svmAllocs ? svmAllocs->getGraphicsAllocation(rootDeviceIndex) : nullptr; + setArgSvmAllocForSingleDevice(argIndex, svmPtr, pSvmAlloc, pClDevice->getDevice()); + isArgSet.set(rootDeviceIndex); + } + if (!kernelArguments[argIndex].isPatched) { + patchedArgumentsNum++; + kernelArguments[argIndex].isPatched = true; + } + + return CL_SUCCESS; +} + +cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { + DBG_LOG_INPUTS("setArgSvmAlloc svm_alloc", svmAlloc); + if (kernelArguments[argIndex].object && SVM_ALLOC_OBJ == kernelArguments[argIndex].type) { + delete reinterpret_cast(kernelArguments[argIndex].object); + } + storeKernelArg(argIndex, SVM_ALLOC_OBJ, new SvmObjectArg(svmAlloc), svmPtr, sizeof(uintptr_t)); + + setArgSvmAllocForSingleDevice(argIndex, svmPtr, svmAlloc, getDevice().getDevice()); + if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } - addAllocationToCacheFlushVector(argIndex, svmAlloc); - return CL_SUCCESS; } void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, const void *argValue, size_t argSize, - GraphicsAllocation *argSvmAlloc, cl_mem_flags argSvmFlags) { + const MultiGraphicsAllocation *argSvmAllocs, cl_mem_flags argSvmFlags) { kernelArguments[argIndex].type = argType; kernelArguments[argIndex].object = argObject; kernelArguments[argIndex].value = argValue; kernelArguments[argIndex].size = argSize; - kernelArguments[argIndex].pSvmAlloc = argSvmAlloc; + kernelArguments[argIndex].pSvmAllocs = argSvmAllocs; kernelArguments[argIndex].svmFlags = argSvmFlags; } @@ -1135,11 +1178,12 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local } inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { - auto numArgs = kernelInfos[commandStreamReceiver.getRootDeviceIndex()]->kernelArgInfo.size(); + auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); + auto numArgs = kernelInfos[rootDeviceIndex]->kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { - auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; + auto pSVMAlloc = reinterpret_cast(kernelArguments[argIndex].object)->getGraphicsAllocation(rootDeviceIndex); auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager(); if (pageFaultManager && this->isUnifiedMemorySyncRequired) { @@ -1153,8 +1197,8 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive if (image && image->isImageFromImage()) { commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); } - memObj->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*executionEnvironment.memoryManager, commandStreamReceiver.getRootDeviceIndex()); - commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex())); + memObj->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*executionEnvironment.memoryManager, rootDeviceIndex); + commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation(rootDeviceIndex)); if (memObj->getMcsAllocation()) { commandStreamReceiver.makeResident(*memObj->getMcsAllocation()); } @@ -1326,7 +1370,7 @@ void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { - auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; + auto pSVMAlloc = reinterpret_cast(kernelArguments[argIndex].object)->getGraphicsAllocation(rootDeviceIndex); dst.push_back(new GeneralSurface(pSVMAlloc)); } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); @@ -1351,8 +1395,7 @@ bool Kernel::requiresCoherency() { for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { - auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; - if (pSVMAlloc->isCoherent()) { + if (reinterpret_cast(kernelArguments[argIndex].object)->isCoherent()) { return true; } } @@ -1501,7 +1544,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, allocationForCacheFlush = nullptr; } - addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush); + addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush, rootDeviceIndex); isArgSet.set(rootDeviceIndex); } return CL_SUCCESS; @@ -1646,7 +1689,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, patch((imageDesc.image_height * pixelSize) - 1, crossThreadData, kernelArgInfo.offsetFlatHeight); patch(imageDesc.image_row_pitch - 1, crossThreadData, kernelArgInfo.offsetFlatPitch); - addAllocationToCacheFlushVector(argIndex, graphicsAllocation); + addAllocationToCacheFlushVector(argIndex, graphicsAllocation, rootDeviceIndex); retVal = CL_SUCCESS; } @@ -2587,7 +2630,7 @@ void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &ke } } if (SVM_ALLOC_OBJ == getKernelArguments().at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { - auto svmAlloc = reinterpret_cast(const_cast(getKernelArg(i))); + auto svmAlloc = reinterpret_cast(kernelArguments[i].object)->getGraphicsAllocation(rootDeviceIndex); if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, svmAlloc}); auto &context = this->program->getContext(); @@ -2609,8 +2652,8 @@ bool Kernel::hasDirectStatelessAccessToHostMemory() const { } } if (SVM_ALLOC_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) { - auto svmAlloc = reinterpret_cast(getKernelArg(i)); - if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { + auto svmObjectArg = reinterpret_cast(kernelArguments[i].object); + if (svmObjectArg && svmObjectArg->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { return true; } } @@ -2640,7 +2683,7 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo(rootDeviceIndex))) { return; } - for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { + for (GraphicsAllocation *alloc : this->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush) { if (nullptr == alloc) { continue; } @@ -2666,7 +2709,8 @@ bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const { return argAllocation->isFlushL3Required(); } -void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) { +void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation, uint32_t rootDeviceIndex) { + auto &kernelArgRequiresCacheFlush = kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; if (argAllocation == nullptr) { kernelArgRequiresCacheFlush[argIndex] = nullptr; } else { diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index a23e205e5d..f90e7dbff3 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -70,7 +70,7 @@ class Kernel : public BaseObject<_cl_kernel> { void *object; const void *value; size_t size; - GraphicsAllocation *pSvmAlloc; + const MultiGraphicsAllocation *pSvmAllocs; cl_mem_flags svmFlags; bool isPatched = false; bool isStatelessUncacheable = false; @@ -148,8 +148,9 @@ class Kernel : public BaseObject<_cl_kernel> { // API entry points cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal); - cl_int setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags); + cl_int setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, const MultiGraphicsAllocation *svmAlloc, cl_mem_flags svmFlags); cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc); + cl_int setArgMultiDeviceSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAlloc); void setSvmKernelExecInfo(GraphicsAllocation *argValue); void clearSvmKernelExecInfo(); @@ -293,7 +294,7 @@ class Kernel : public BaseObject<_cl_kernel> { void *argObject, const void *argValue, size_t argSize, - GraphicsAllocation *argSvmAlloc = nullptr, + const MultiGraphicsAllocation *argSvmAllocs = nullptr, cl_mem_flags argSvmFlags = 0); const void *getKernelArg(uint32_t argIndex) const; const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const; @@ -509,9 +510,11 @@ class Kernel : public BaseObject<_cl_kernel> { bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; - void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); + void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation, uint32_t rootDeviceIndex); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; + void setArgSvmAllocForSingleDevice(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, const Device &device); + const HardwareInfo &getHardwareInfo(uint32_t rootDeviceIndex) const; const ClDevice &getDevice() const { @@ -547,7 +550,6 @@ class Kernel : public BaseObject<_cl_kernel> { bool specialPipelineSelectMode = false; bool svmAllocationsRequireCacheFlush = false; - std::vector kernelArgRequiresCacheFlush; UnifiedMemoryControls unifiedMemoryControls{}; bool isUnifiedMemorySyncRequired = true; bool debugEnabled = false; @@ -598,6 +600,7 @@ class Kernel : public BaseObject<_cl_kernel> { GraphicsAllocation *privateSurface = nullptr; uint64_t privateSurfaceSize = 0u; + std::vector kernelArgRequiresCacheFlush; }; std::vector kernelDeviceInfos; const uint32_t defaultRootDeviceIndex; diff --git a/opencl/source/kernel/svm_object_arg.cpp b/opencl/source/kernel/svm_object_arg.cpp new file mode 100644 index 0000000000..ed5a57c918 --- /dev/null +++ b/opencl/source/kernel/svm_object_arg.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/kernel/svm_object_arg.h" + +#include "shared/source/memory_manager/multi_graphics_allocation.h" + +namespace NEO { +SvmObjectArg::SvmObjectArg(GraphicsAllocation *graphicsAllocation) : type(SvmObjectArgType::SingleDeviceSvm), singleDeviceSvmAlloc(graphicsAllocation) {} +SvmObjectArg::SvmObjectArg(MultiGraphicsAllocation *multiGraphicsAllocation) : type(SvmObjectArgType::MultiDeviceSvm), multiDeviceSvmAlloc(multiGraphicsAllocation) {} + +GraphicsAllocation *SvmObjectArg::getGraphicsAllocation(uint32_t rootDeviceIndex) const { + if (SvmObjectArgType::SingleDeviceSvm == type) { + DEBUG_BREAK_IF(singleDeviceSvmAlloc && rootDeviceIndex != singleDeviceSvmAlloc->getRootDeviceIndex()); + return singleDeviceSvmAlloc; + } + UNRECOVERABLE_IF(!multiDeviceSvmAlloc); + return multiDeviceSvmAlloc->getGraphicsAllocation(rootDeviceIndex); +} + +bool SvmObjectArg::isCoherent() const { + if (SvmObjectArgType::SingleDeviceSvm == type) { + return singleDeviceSvmAlloc->isCoherent(); + } + return multiDeviceSvmAlloc->isCoherent(); +} +GraphicsAllocation::AllocationType SvmObjectArg::getAllocationType() const { + if (SvmObjectArgType::SingleDeviceSvm == type) { + return singleDeviceSvmAlloc->getAllocationType(); + } + return multiDeviceSvmAlloc->getAllocationType(); +} +} // namespace NEO diff --git a/opencl/source/kernel/svm_object_arg.h b/opencl/source/kernel/svm_object_arg.h new file mode 100644 index 0000000000..a3bcfaf743 --- /dev/null +++ b/opencl/source/kernel/svm_object_arg.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/graphics_allocation.h" + +namespace NEO { +class GraphicsAllocation; +class MultiGraphicsAllocation; +class SvmObjectArg { + public: + SvmObjectArg(GraphicsAllocation *graphicsAllocation); + SvmObjectArg(MultiGraphicsAllocation *multiGraphicsAllocation); + + GraphicsAllocation *getGraphicsAllocation(uint32_t rootDeviceIndex) const; + bool isCoherent() const; + GraphicsAllocation::AllocationType getAllocationType() const; + MultiGraphicsAllocation *getMultiDeviceSvmAlloc() const { return multiDeviceSvmAlloc; } + + protected: + enum class SvmObjectArgType { + SingleDeviceSvm, + MultiDeviceSvm + }; + + const SvmObjectArgType type; + GraphicsAllocation *singleDeviceSvmAlloc = nullptr; + MultiGraphicsAllocation *multiDeviceSvmAlloc = nullptr; +}; +} // namespace NEO diff --git a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp index 78e7761949..ffcd6e0697 100644 --- a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -376,17 +376,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersTha retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + auto &kernelArgRequiresCacheFlush = kernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; + + EXPECT_EQ(nullptr, kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + EXPECT_NE(nullptr, kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, kernelArgRequiresCacheFlush[0]); clReleaseMemObject(bufferUncacheableInSurfaceState); clReleaseMemObject(bufferUncacheable); diff --git a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl index d18238daa4..6817f35c52 100644 --- a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl +++ b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl @@ -9,6 +9,7 @@ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" @@ -648,8 +649,8 @@ TEST(clUnifiedSharedMemoryTests, whenClSetKernelArgMemPointerINTELisCalledWithVa retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockKernel, 0, unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); auto svmAlloc = mockContext->getSVMAllocsManager()->getSVMAlloc(unfiedMemoryDeviceAllocation); - EXPECT_EQ(mockKernel.mockKernel->kernelArguments[0].object, - svmAlloc->gpuAllocations.getGraphicsAllocation(mockContext->getDevice(0)->getRootDeviceIndex())); + auto multiGraphicsAllocation = reinterpret_cast(mockKernel.mockKernel->kernelArguments[0].object)->getMultiDeviceSvmAlloc(); + EXPECT_EQ(multiGraphicsAllocation, &svmAlloc->gpuAllocations); retVal = clMemFreeINTEL(mockContext.get(), unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index 15bf350a8f..97a9f925db 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -22,6 +22,7 @@ #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/test/unit_test/built_ins/built_ins_file_names.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" @@ -278,14 +279,16 @@ HWTEST_P(AuxBuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTr Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } else { - auto gfxAllocation = static_cast(kernel->getKernelArguments().at(0).object); + auto gfxAllocation = reinterpret_cast(kernel->getKernelArguments().at(0).object)->getGraphicsAllocation(rootDeviceIndex); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); - EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object); - EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object); + auto svmArg0 = reinterpret_cast(kernel->getKernelArguments().at(0).object); + auto svmArg1 = reinterpret_cast(kernel->getKernelArguments().at(1).object); + EXPECT_EQ(gfxAllocation, svmArg0->getGraphicsAllocation(rootDeviceIndex)); + EXPECT_EQ(gfxAllocation, svmArg1->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16; @@ -344,14 +347,16 @@ HWTEST_P(AuxBuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTrans Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } else { - auto gfxAllocation = static_cast(kernel->getKernelArguments().at(0).object); + auto gfxAllocation = reinterpret_cast(kernel->getKernelArguments().at(0).object)->getGraphicsAllocation(rootDeviceIndex); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); - EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object); - EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object); + auto svmArg0 = reinterpret_cast(kernel->getKernelArguments().at(0).object); + auto svmArg1 = reinterpret_cast(kernel->getKernelArguments().at(1).object); + EXPECT_EQ(gfxAllocation, svmArg0->getGraphicsAllocation(rootDeviceIndex)); + EXPECT_EQ(gfxAllocation, svmArg1->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 5da89d3d97..6d049c3089 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -470,9 +470,10 @@ HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWi pDevice->getUltCommandStreamReceiver().multiOsContextCapable = true; MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); + auto &kernelArgRequiresCacheFlush = mockKernelWithInternals.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; + kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); @@ -486,10 +487,11 @@ HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWi MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); + auto &kernelArgRequiresCacheFlush = mockKernelWithInternals.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); + kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index f6cfcfec85..0f3da43b46 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -1151,10 +1151,12 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); + + auto &kernelArgRequiresCacheFlush = kernel1.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - kernel1.kernelArgRequiresCacheFlush.resize(1); + kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1})); // create commandStream @@ -1188,11 +1190,11 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); - kernel1.kernelArgRequiresCacheFlush.resize(1); - kernel2.kernelArgRequiresCacheFlush.resize(1); + kernel1.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); + kernel2.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel1.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel2.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); // create commandStream @@ -1226,11 +1228,11 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); - kernel1.kernelArgRequiresCacheFlush.resize(1); - kernel2.kernelArgRequiresCacheFlush.resize(1); + kernel1.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); + kernel2.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel1.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel2.kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo1(pClDevice, std::vector({&kernel1})); MockMultiDispatchInfo multiDispatchInfo2(pClDevice, std::vector({&kernel2})); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index ae5f6c00c1..8e0c0ffe74 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -57,7 +57,7 @@ void HardwareCommandsTest::addSpaceForSingleKernelArg() { mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0; mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments); - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1); + mockKernelWithInternal->mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenProgramInterfaceDescriptorDataIsCreatedThenOnlyRequiredSpaceOnIndirectHeapIsAllocated) { @@ -1278,8 +1278,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab addSpaceForSingleKernelArg(); MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + auto &kernelArgRequiresCacheFlush = mockKernelWithInternal->mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush; + kernelArgRequiresCacheFlush.resize(2); + kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); diff --git a/opencl/test/unit_test/kernel/CMakeLists.txt b/opencl/test/unit_test/kernel/CMakeLists.txt index 127283c2a3..c2470aeb3d 100644 --- a/opencl/test/unit_test/kernel/CMakeLists.txt +++ b/opencl/test/unit_test/kernel/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2017-2020 Intel Corporation +# Copyright (C) 2017-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -29,6 +29,7 @@ set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/svm_object_arg_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_kernel}) diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 4c0c2264e0..65fcd3e1a0 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,6 +12,7 @@ #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" @@ -458,8 +459,10 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); + MultiGraphicsAllocation multiGraphicsAllocation(svmAlloc.getRootDeviceIndex()); + multiGraphicsAllocation.addAllocation(&svmAlloc); - retVal = pSourceKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); + retVal = pSourceKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); @@ -473,7 +476,10 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); - EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); + EXPECT_NE(nullptr, pSourceKernel->getKernelArgInfo(0).object); + auto srcSvm = reinterpret_cast(pSourceKernel->getKernelArgInfo(0).object); + auto clonedSvm = reinterpret_cast(pClonedKernel->getKernelArgInfo(0).object); + EXPECT_EQ(srcSvm->getMultiDeviceSvmAlloc(), clonedSvm->getMultiDeviceSvmAlloc()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 9ad5cfd49f..0811b71d43 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -324,7 +324,7 @@ TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAl auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { @@ -337,7 +337,7 @@ TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAlloc auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { @@ -350,7 +350,7 @@ TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpect auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 8488cc2e84..eb1c041ea2 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -9,6 +9,7 @@ #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" +#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" @@ -339,7 +340,7 @@ class KernelArgSvmTestTyped : public KernelArgSvmTest { }; struct SetArgHandlerSetArgSvm { - static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { + static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { kernel.setArgSvm(argNum, allocSize, ptrToPatch, &alloc, 0u); } @@ -349,8 +350,8 @@ struct SetArgHandlerSetArgSvm { }; struct SetArgHandlerSetArgSvmAlloc { - static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { - kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc); + static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { + kernel.setArgMultiDeviceSvmAlloc(argNum, ptrToPatch, &alloc); } static constexpr bool supportsOffsets() { @@ -359,8 +360,8 @@ struct SetArgHandlerSetArgSvmAlloc { }; struct SetArgHandlerSetArgBuffer { - static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { - MockBuffer mb{alloc}; + static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, MultiGraphicsAllocation &alloc) { + MockBuffer mb{*alloc.getDefaultGraphicsAllocation()}; cl_mem memObj = &mb; kernel.setArgBuffer(argNum, sizeof(cl_mem), &memObj); } @@ -410,7 +411,10 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap(rootDeviceIndex)); memset(surfState, 0, rendSurfSize); - TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc); + MultiGraphicsAllocation multiGraphicsAllocation(svmAlloc.getRootDeviceIndex()); + multiGraphicsAllocation.addAllocation(&svmAlloc); + + TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, multiGraphicsAllocation); // surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; @@ -447,7 +451,7 @@ TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpe auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } @@ -462,7 +466,7 @@ TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectA auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(&svmAlloc, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } @@ -477,7 +481,7 @@ TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotEx auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } @@ -559,3 +563,140 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBuff EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } +struct KernelArgSvmMultiDeviceTest : public MultiRootDeviceWithSubDevicesFixture { + void SetUp() override { + MultiRootDeviceWithSubDevicesFixture::SetUp(); + program = std::make_unique(context.get(), false, context->getDevices()); + + KernelInfoContainer kernelInfos; + kernelInfos.resize(3); + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + pKernelInfo[rootDeviceIndex] = std::make_unique(); + pKernelInfo[rootDeviceIndex]->kernelDescriptor.kernelAttributes.simdSize = 1; + + // setup kernel arg offsets + KernelArgPatchInfo kernelArgPatchInfo; + + pKernelInfo[rootDeviceIndex]->heapInfo.pSsh = pSshLocal[rootDeviceIndex]; + pKernelInfo[rootDeviceIndex]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[rootDeviceIndex]); + pKernelInfo[rootDeviceIndex]->usesSsh = true; + pKernelInfo[rootDeviceIndex]->requiresSshForBuffers = true; + + pKernelInfo[rootDeviceIndex]->kernelArgInfo.resize(1); + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); + + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); + + kernelInfos[rootDeviceIndex] = pKernelInfo[rootDeviceIndex].get(); + } + + pKernel = new MockKernel(program.get(), kernelInfos); + ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + pKernel->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex, &pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex])); + } + } + + void TearDown() override { + delete pKernel; + + MultiRootDeviceWithSubDevicesFixture::TearDown(); + } + + cl_int retVal = CL_SUCCESS; + std::unique_ptr program; + MockKernel *pKernel = nullptr; + SKernelBinaryHeaderCommon kernelHeader; + std::unique_ptr pKernelInfo[3]; + char pCrossThreadData[3][0x60]; + char pSshLocal[3][64]; +}; + +TEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect) { + char svmPtr[256] = {}; + + auto retVal = pKernel->setArgSvm(0, 256, &svmPtr, nullptr, 0u); + EXPECT_EQ(CL_SUCCESS, retVal); + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(svmPtr, *pKernelArg); + } +} +TEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) { + char svmPtr[256] = {}; + + GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + + MultiGraphicsAllocation multiGraphicsAllocation(2); + multiGraphicsAllocation.addAllocation(&graphicsAllocation1); + multiGraphicsAllocation.addAllocation(&graphicsAllocation2); + + auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); + EXPECT_EQ(CL_SUCCESS, retVal); + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(svmPtr, *pKernelArg); + } +} + +TEST_F(KernelArgSvmMultiDeviceTest, whenSettingArgTwiceThenOverrideWithCorrectValue) { + char svmPtr[256] = {}; + char svmPtr2[256] = {}; + + GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + + MultiGraphicsAllocation multiGraphicsAllocation(2); + multiGraphicsAllocation.addAllocation(&graphicsAllocation1); + multiGraphicsAllocation.addAllocation(&graphicsAllocation2); + + auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); + EXPECT_EQ(CL_SUCCESS, retVal); + retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr2, &multiGraphicsAllocation); + EXPECT_EQ(CL_SUCCESS, retVal); + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(svmPtr2, *pKernelArg); + } +} + +HWTEST_F(KernelArgSvmMultiDeviceTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { + char svmPtr[256] = {}; + + GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + GraphicsAllocation graphicsAllocation2{2u, GraphicsAllocation::AllocationType::BUFFER, &svmPtr, sizeof(svmPtr), 0, MemoryPool::MemoryNull, 1u}; + + MultiGraphicsAllocation multiGraphicsAllocation(2); + multiGraphicsAllocation.addAllocation(&graphicsAllocation1); + multiGraphicsAllocation.addAllocation(&graphicsAllocation2); + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + pKernelInfo[rootDeviceIndex]->usesSsh = true; + pKernelInfo[rootDeviceIndex]->requiresSshForBuffers = true; + } + + auto retVal = pKernel->setArgMultiDeviceSvmAlloc(0, svmPtr, &multiGraphicsAllocation); + EXPECT_EQ(CL_SUCCESS, retVal); + + typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; + + for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + auto surfaceState = reinterpret_cast( + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].offsetHeap)); + + void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); + EXPECT_EQ(svmPtr, surfaceAddress); + } +} diff --git a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp index e3ac703b71..739f593851 100644 --- a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -320,7 +320,7 @@ TEST_F(KernelImageArgTest, givenWritableImageWhenSettingAsArgThenDoNotExpectAllo pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { @@ -332,7 +332,7 @@ TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocat pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(image.graphicsAllocation, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { @@ -344,7 +344,7 @@ TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAlloc pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); + EXPECT_EQ(nullptr, pKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } class KernelImageArgTestBindless : public KernelImageArgTest { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 7d85a4da43..210c8a3ecd 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -24,6 +24,7 @@ #include "opencl/source/helpers/memory_properties_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" @@ -1767,7 +1768,8 @@ HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenM EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + SvmObjectArg svmObjectArg{gpuAllocation}; + mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, &svmObjectArg, unifiedMemoryAllocation, 4096u, nullptr, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); mockKernel.mockKernel->makeResident(commandStreamReceiver); @@ -1795,7 +1797,8 @@ HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryRequiredMemSyncWhenMakeRes auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); - mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; + SvmObjectArg svmObjectArg{gpuAllocation}; + mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, &svmObjectArg, unifiedMemoryAllocation, 4096u, nullptr, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true); mockKernel.mockKernel->makeResident(commandStreamReceiver); @@ -2773,12 +2776,13 @@ TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediat TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); - kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); + kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr, rootDeviceIndex); + EXPECT_EQ(nullptr, kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializingThenReturnError) { @@ -2809,14 +2813,15 @@ TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(true); - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); + kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation, rootDeviceIndex); + EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { @@ -2840,28 +2845,30 @@ TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfte TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(true); mockAllocation.setFlushL3Required(false); - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); + kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation, rootDeviceIndex); + EXPECT_EQ(nullptr, kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush.resize(1); + kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(false); - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); + kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation, rootDeviceIndex); + EXPECT_EQ(nullptr, kernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { diff --git a/opencl/test/unit_test/kernel/svm_object_arg_tests.cpp b/opencl/test/unit_test/kernel/svm_object_arg_tests.cpp new file mode 100644 index 0000000000..01a3f69bb1 --- /dev/null +++ b/opencl/test/unit_test/kernel/svm_object_arg_tests.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/multi_graphics_allocation.h" + +#include "opencl/source/kernel/svm_object_arg.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +TEST(SvmObjectArgTest, givenSingleGraphicsAllocationWhenCreatingSvmObjectArgThenProperPropertiesAreStored) { + GraphicsAllocation graphicsAllocation{0u, GraphicsAllocation::AllocationType::BUFFER, nullptr, 0u, 0, MemoryPool::MemoryNull, 1u}; + + SvmObjectArg svmObjectArg(&graphicsAllocation); + + EXPECT_EQ(&graphicsAllocation, svmObjectArg.getGraphicsAllocation(0u)); + EXPECT_EQ(graphicsAllocation.isCoherent(), svmObjectArg.isCoherent()); + EXPECT_EQ(graphicsAllocation.getAllocationType(), svmObjectArg.getAllocationType()); + EXPECT_EQ(nullptr, svmObjectArg.getMultiDeviceSvmAlloc()); +} +TEST(SvmObjectArgTest, givenMultiGraphicsAllocationWhenCreatingSvmObjectArgThenProperPropertiesAreStored) { + GraphicsAllocation graphicsAllocation0{0u, GraphicsAllocation::AllocationType::BUFFER, nullptr, 0u, 0, MemoryPool::MemoryNull, 1u}; + GraphicsAllocation graphicsAllocation1{1u, GraphicsAllocation::AllocationType::BUFFER, nullptr, 0u, 0, MemoryPool::MemoryNull, 1u}; + + MultiGraphicsAllocation multiGraphicsAllocation(1); + multiGraphicsAllocation.addAllocation(&graphicsAllocation0); + multiGraphicsAllocation.addAllocation(&graphicsAllocation1); + + SvmObjectArg svmObjectArg(&multiGraphicsAllocation); + + EXPECT_EQ(&graphicsAllocation0, svmObjectArg.getGraphicsAllocation(0u)); + EXPECT_EQ(&graphicsAllocation1, svmObjectArg.getGraphicsAllocation(1u)); + EXPECT_EQ(multiGraphicsAllocation.isCoherent(), svmObjectArg.isCoherent()); + EXPECT_EQ(multiGraphicsAllocation.getAllocationType(), svmObjectArg.getAllocationType()); + EXPECT_EQ(&multiGraphicsAllocation, svmObjectArg.getMultiDeviceSvmAlloc()); +} diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index b6522777cb..ff5ff1973c 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -66,7 +66,6 @@ class MockKernel : public Kernel { using Kernel::hasIndirectStatelessAccessToHostMemory; using Kernel::isSchedulerKernel; using Kernel::kernelArgHandlers; - using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::KernelConfig; using Kernel::kernelDeviceInfos; @@ -364,7 +363,9 @@ class MockKernelWithInternals { kernelInfo.kernelArgInfo[1].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadWrite; mockKernel->setKernelArguments(defaultKernelArguments); - mockKernel->kernelArgRequiresCacheFlush.resize(2); + for (const auto &pClDevice : deviceVector) { + mockKernel->kernelDeviceInfos[pClDevice->getRootDeviceIndex()].kernelArgRequiresCacheFlush.resize(2); + } mockKernel->kernelArgHandlers.resize(2); mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer; mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer; diff --git a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp index 861227d041..1c38e652e6 100644 --- a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp +++ b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,6 +10,7 @@ #include "shared/test/unit_test/mocks/mock_graphics_allocation.h" #include "shared/test/unit_test/utilities/base_object_utils.h" +#include "opencl/source/kernel/svm_object_arg.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" @@ -151,7 +152,8 @@ TEST(SchedulerKernelTest, WhenSettingArgsForSchedulerKernelThenAllocationsAreCor allocs[8].get()); for (uint32_t i = 0; i < 9; i++) { - EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i)); + auto graphicsAllocation = reinterpret_cast(scheduler->getKernelArg(i))->getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_EQ(allocs[i].get(), graphicsAllocation); } } @@ -180,7 +182,8 @@ TEST(SchedulerKernelTest, GivenNullDebugQueueWhenSettingArgsForSchedulerKernelTh allocs[7].get()); for (uint32_t i = 0; i < 8; i++) { - EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i)); + auto graphicsAllocation = reinterpret_cast(scheduler->getKernelArg(i))->getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_EQ(allocs[i].get(), graphicsAllocation); } EXPECT_EQ(nullptr, scheduler->getKernelArg(8)); }