diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index a82d417375..760a9e52a5 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -88,7 +88,7 @@ inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef void *addressToPatch = reinterpret_cast(allocation.getUnderlyingBuffer()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, - &allocation, 0, 0, false, 1u); + &allocation, 0, 0, false, device.getNumAvailableDevices() > 1); } } diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 10ae732764..2e639f8e7f 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -544,7 +544,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) { Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, - kernel->getTotalNumDevicesInContext()); + kernel->areMultipleSubDevicesInContext()); return true; } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index bbd1d1d81a..59c832e752 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -806,7 +806,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( usePerDssBackedBuffer, //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice useGlobalAtomics, //useGlobalAtomics - kernel->getTotalNumDevicesInContext() //numDevicesInContext + kernel->areMultipleSubDevicesInContext() //areMultipleSubDevicesInContext ); dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 469b978452..4dd685cb7b 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -103,7 +103,7 @@ void HardwareInterface::dispatchWalker( Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh), false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, mainKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, - mainKernel->getTotalNumDevicesInContext()); + mainKernel->areMultipleSubDevicesInContext()); } auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices(); diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index b65546dbcc..79f5b293ae 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -295,12 +295,8 @@ size_t Context::getNumDevices() const { return devices.size(); } -size_t Context::getTotalNumDevices() const { - size_t numAvailableDevices = 0u; - for (auto &device : devices) { - numAvailableDevices += device->getNumAvailableDevices(); - } - return numAvailableDevices; +bool Context::containsMultipleSubDevices(uint32_t rootDeviceIndex) const { + return deviceBitfields.at(rootDeviceIndex).count() > 1; } ClDevice *Context::getDevice(size_t deviceOrdinal) const { diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index 453a93938d..8127650980 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -80,7 +80,7 @@ class Context : public BaseObject<_cl_context> { cl_image_format *imageFormats, cl_uint *numImageFormats); size_t getNumDevices() const; - size_t getTotalNumDevices() const; + bool containsMultipleSubDevices(uint32_t rootDeviceIndex) const; ClDevice *getDevice(size_t deviceOrdinal) const; MemoryManager *getMemoryManager() const { diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 0577c61347..ab21b23e29 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -243,7 +243,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate kernel->requiresPerDssBackedBuffer(), //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics - kernel->getTotalNumDevicesInContext()); //numDevicesInContext + kernel->areMultipleSubDevicesInContext()); //areMultipleSubDevicesInContext if (timestampPacketDependencies) { eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 78eff90eb5..7d51d0924e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -146,7 +146,7 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } @@ -172,7 +172,7 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } @@ -347,13 +347,13 @@ cl_int Kernel::initialize() { if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful); - Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext()); + Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, areMultipleSubDevicesInContext()); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful); - Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext()); + Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, areMultipleSubDevicesInContext()); } setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy()); @@ -930,7 +930,7 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; @@ -983,7 +983,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio allocSize -= offset; } Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } if (!kernelArguments[argIndex].isPatched) { @@ -1465,7 +1465,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(), - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable(); @@ -1492,7 +1492,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } return CL_SUCCESS; @@ -1544,7 +1544,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, pipe->getSize(), pipe->getCpuAddress(), 0, graphicsAllocation, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } return CL_SUCCESS; @@ -2389,7 +2389,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), defaultQueueSurfaceAddress.bindful); Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } @@ -2407,7 +2407,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) { auto eventPoolBuffer = devQueue->getEventPoolBuffer(); Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(), (void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } @@ -2439,7 +2439,7 @@ void Kernel::patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOff auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } @@ -2747,9 +2747,9 @@ uint32_t Kernel::getSlmTotalSize() const { return slmTotalSize; } -size_t Kernel::getTotalNumDevicesInContext() const { +bool Kernel::areMultipleSubDevicesInContext() const { auto context = program->getContextPtr(); - return context ? context->getTotalNumDevices() : 1u; + return context ? context->containsMultipleSubDevices(clDevice.getRootDeviceIndex()) : false; } } // namespace NEO diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 0381ccc680..16d723cd4f 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -399,7 +399,7 @@ class Kernel : public ReferenceTrackedObject { MultiDeviceKernel *getMultiDeviceKernel() const { return pMultiDeviceKernel; } void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; } - size_t getTotalNumDevicesInContext() const; + bool areMultipleSubDevicesInContext() const; protected: struct ObjectCounts { diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index 074ca09568..5eec1f49f3 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -767,13 +767,13 @@ void Buffer::setSurfaceState(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, bool useGlobalAtomics, - size_t numAvailableDevices) { + bool areMultipleSubDevicesInContext) { auto multiGraphicsAllocation = MultiGraphicsAllocation(device->getRootDeviceIndex()); if (gfxAlloc) { multiGraphicsAllocation.addAllocation(gfxAlloc); } auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false); - buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, numAvailableDevices); + buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, areMultipleSubDevicesInContext); delete buffer; } diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h index 35df786349..d2e5180602 100644 --- a/opencl/source/mem_obj/buffer.h +++ b/opencl/source/mem_obj/buffer.h @@ -129,7 +129,7 @@ class Buffer : public MemObj { cl_mem_flags flags, cl_mem_flags_intel flagsIntel, bool useGlobalAtomics, - size_t numDevicesInContext); + bool areMultipleSubDevicesInContext); static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType, Context *context, @@ -140,7 +140,7 @@ class Buffer : public MemObj { bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing); virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, - bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) = 0; + bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, @@ -213,7 +213,7 @@ class BufferHw : public Buffer { zeroCopy, isHostPtrSVM, isObjectRedescribed) {} void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, - bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override; + bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override; void appendSurfaceStateExt(void *memory); static Buffer *create(Context *context, diff --git a/opencl/source/mem_obj/buffer_base.inl b/opencl/source/mem_obj/buffer_base.inl index 2f05e1ce62..7177f2b716 100644 --- a/opencl/source/mem_obj/buffer_base.inl +++ b/opencl/source/mem_obj/buffer_base.inl @@ -35,7 +35,7 @@ union SURFACE_STATE_BUFFER_LENGTH { template void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, - bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) { + bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; @@ -43,7 +43,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, boo getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex), getMocsValue(disableL3, isReadOnly, rootDeviceIndex), true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(), - graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, numDevicesInContext); + graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, areMultipleSubDevicesInContext); appendSurfaceStateExt(memory); } } // namespace NEO diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index c643c1b662..faf840fb30 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -67,7 +67,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0, kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, - kernel->getTotalNumDevicesInContext()); + kernel->areMultipleSubDevicesInContext()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 7204d0830a..88d75d3693 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1349,7 +1349,7 @@ HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsIsNotSetWhenEnqueueKernelThenDi EXPECT_FALSE(mockCsr->passedDispatchFlags.useGlobalAtomics); } -HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenDispatchFlagsiHasCorrectNumDevicesValue) { +HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenDispatchFlagsHaveCorrectInfoAboutMultipleSubDevicesInContext) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); @@ -1357,15 +1357,12 @@ HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenD MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, mockCsr->passedDispatchFlags.numDevicesInContext); + EXPECT_FALSE(mockCsr->passedDispatchFlags.areMultipleSubDevicesInContext); - MockDevice subDevice; - context->devices.push_back(pClDevice); - context->devices.push_back(pClDevice); + context->deviceBitfields[rootDeviceIndex].set(7, true); clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(3u, mockCsr->passedDispatchFlags.numDevicesInContext); - context->devices.pop_back(); - context->devices.pop_back(); + EXPECT_TRUE(mockCsr->passedDispatchFlags.areMultipleSubDevicesInContext); + context->deviceBitfields[rootDeviceIndex].set(7, false); } HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 09a3e600b7..88e2c299ca 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -628,7 +628,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMultiOsContextCapableSetAndDi commandStreamReceiver.multiOsContextCapable = true; flushTaskFlags.useGlobalAtomics = true; - flushTaskFlags.numDevicesInContext = 1; + flushTaskFlags.areMultipleSubDevicesInContext = false; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); @@ -657,7 +657,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMultiOsContextCapableSetAndDi commandStreamReceiver.multiOsContextCapable = false; flushTaskFlags.useGlobalAtomics = true; - flushTaskFlags.numDevicesInContext = 2; + flushTaskFlags.areMultipleSubDevicesInContext = true; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); diff --git a/opencl/test/unit_test/context/context_tests.cpp b/opencl/test/unit_test/context/context_tests.cpp index 3314801fa0..bbdb685b47 100644 --- a/opencl/test/unit_test/context/context_tests.cpp +++ b/opencl/test/unit_test/context/context_tests.cpp @@ -337,22 +337,21 @@ TEST(Context, whenCreateContextThenSpecialQueueUsesInternalEngine) { EXPECT_EQ(internalEngine.commandStreamReceiver, specialQueueEngine.commandStreamReceiver); } -TEST(MultiDeviceContextTest, givenContextWithMultipleDevicesWhenGettingTotalNumberOfDevicesThenNumberOfAllAvailableDevicesIsReturned) { - DebugManagerStateRestore restorer; - const uint32_t numRootDevices = 1u; - const uint32_t numSubDevices = 3u; - DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); - initPlatform(); - auto device = platform()->getClDevice(0); +TEST(MultiDeviceContextTest, givenContextWithMultipleDevicesWhenGettingInfoAboutSubDevicesThenCorrectValueIsReturned) { + MockSpecializedContext context1; + MockUnrestrictiveContext context2; + MockDefaultContext context3; - cl_device_id clDevice = device; - ClDeviceVector deviceVector(&clDevice, numRootDevices); - cl_int retVal = CL_OUT_OF_HOST_MEMORY; - auto context = std::unique_ptr(Context::create(nullptr, deviceVector, nullptr, nullptr, retVal)); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(numSubDevices, device->getNumAvailableDevices()); - EXPECT_EQ(numRootDevices, context->getNumDevices()); - EXPECT_EQ(numRootDevices * numSubDevices, context->getTotalNumDevices()); + EXPECT_EQ(2u, context1.getNumDevices()); + EXPECT_TRUE(context1.containsMultipleSubDevices(0)); + + EXPECT_EQ(3u, context2.getNumDevices()); + EXPECT_TRUE(context2.containsMultipleSubDevices(0)); + + EXPECT_EQ(3u, context3.getNumDevices()); + EXPECT_FALSE(context3.containsMultipleSubDevices(0)); + EXPECT_FALSE(context3.containsMultipleSubDevices(1)); + EXPECT_FALSE(context3.containsMultipleSubDevices(2)); } class ContextWithAsyncDeleterTest : public ::testing::WithParamInterface, diff --git a/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl index 987d5f5461..6ae85392e3 100644 --- a/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl @@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgramm ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -133,7 +133,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); diff --git a/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl index 61b2edb83e..ee0891afdd 100644 --- a/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl @@ -21,6 +21,7 @@ struct ProfilingTestsGen12LP : public CommandEnqueueFixture, } void TearDown() override { + mockKernelWithInternals.reset(); CommandEnqueueFixture::TearDown(); } diff --git a/opencl/test/unit_test/helpers/base_object_tests.cpp b/opencl/test/unit_test/helpers/base_object_tests.cpp index b051c621a6..8de74faaaa 100644 --- a/opencl/test/unit_test/helpers/base_object_tests.cpp +++ b/opencl/test/unit_test/helpers/base_object_tests.cpp @@ -80,7 +80,7 @@ class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {} + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override {} }; template <> @@ -295,7 +295,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { } }; diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 445c5f4e81..436752bee9 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -271,7 +271,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { void *addressToPatch = svmAlloc.getUnderlyingBuffer(); size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize(); Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false, - sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0, false, 1u); + sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0, false, false); } // verify ssh was properly patched @@ -426,7 +426,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN } Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(), - svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0, false, 1u); + svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0, false, false); // verify ssh was properly patched int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index cbb25c67c8..8a02aeeee4 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2727,7 +2727,7 @@ TEST(KernelInfoTest, GivenArgNameWhenGettingArgNumberByNameThenCorrectValueIsRet } TEST(KernelTest, GivenNormalKernelWhenGettingInstructionHeapSizeForExecutionModelThenZeroIsReturned) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel()); @@ -2748,7 +2748,7 @@ TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) { mutable std::vector> receivedArgs; }; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); kernel.mockKernel->initialize(); @@ -2977,7 +2977,7 @@ TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediat } TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); @@ -2987,7 +2987,7 @@ TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { } TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializingThenReturnError) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto minSimd = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).getMinimalSIMDSize(); MockKernelWithInternals kernel(*device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8; @@ -3002,7 +3002,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializin } TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1; @@ -3013,7 +3013,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); @@ -3026,7 +3026,7 @@ TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToC TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->svmAllocationsRequireCacheFlush = true; @@ -3044,7 +3044,7 @@ TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfte TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); @@ -3057,7 +3057,7 @@ TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheF TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); @@ -3070,7 +3070,7 @@ TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCa } TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto executionEnvironment = device->getExecutionEnvironment(); auto mockKernel = std::make_unique(*device); @@ -3078,13 +3078,12 @@ TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenK mockKernel->mockKernel->setPrivateSurface(privateSurface, 10); executionEnvironment->incRefInternal(); - device.reset(nullptr); mockKernel.reset(nullptr); executionEnvironment->decRefInternal(); } TEST(KernelTest, givenAllArgumentsAreStatefulBuffersWhenInitializingThenAllBufferArgsStatefulIsTrue) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; @@ -3100,7 +3099,7 @@ TEST(KernelTest, givenAllArgumentsAreStatefulBuffersWhenInitializingThenAllBuffe } TEST(KernelTest, givenAllArgumentsAreBuffersButNotAllAreStatefulWhenInitializingThenAllBufferArgsStatefulIsFalse) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; @@ -3116,7 +3115,7 @@ TEST(KernelTest, givenAllArgumentsAreBuffersButNotAllAreStatefulWhenInitializing } TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenInitializingThenAllBufferArgsStatefulIsTrue) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; @@ -3132,7 +3131,7 @@ TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenIniti } TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); @@ -3148,7 +3147,7 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat } TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); @@ -3159,7 +3158,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitS DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; @@ -3170,7 +3169,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitS } TEST(KernelTest, givenKernelWithPatchInfoCollecitonEnabledAndArgumentWithInvalidCrossThreadDataOffsetWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; ArgDescPointer arg; @@ -3183,7 +3182,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPa DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; ArgDescPointer arg; @@ -3195,7 +3194,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPa } TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; @@ -3206,13 +3205,13 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicit } TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->areStatelessWritesUsed()); } TEST(KernelTest, givenPolicyWhensetKernelThreadArbitrationPolicyThenExpectedClValueIsReturned) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL)); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL)); @@ -3222,7 +3221,7 @@ TEST(KernelTest, givenPolicyWhensetKernelThreadArbitrationPolicyThenExpectedClVa } TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorrectValueIsSet) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernelWithInternals(*device); auto &kernel = *mockKernelWithInternals.mockKernel; cl_int retVal; @@ -3247,7 +3246,7 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3265,7 +3264,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3283,7 +3282,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3304,14 +3303,14 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKerne DebugManagerStateRestore restore; DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true); - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer()); } TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer()); diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index 348d19d6ff..8c4a94c134 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenBufferSetSurfaceThatMemo using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1326,7 +1326,7 @@ HWTEST_F(BufferSetSurfaceTests, givenDebugVariableToDisableCachingForStatefulBuf using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1346,7 +1346,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToC using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1365,7 +1365,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTo using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1384,7 +1384,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCach using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1403,7 +1403,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTh using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, false); auto width = surfaceState.getWidth(); EXPECT_EQ(alignUp(width, 4), width); @@ -1421,7 +1421,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvm using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0, false, false); auto baseAddress = surfaceState.getSurfaceBaseAddress(); EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress); @@ -1437,7 +1437,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenB using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType); @@ -1450,7 +1450,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNull using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, false, false); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType); @@ -1479,7 +1479,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); @@ -1515,7 +1515,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); @@ -1544,7 +1544,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1572,7 +1572,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumen buffer->getGraphicsAllocation(rootDeviceIndex)->setSize(127); - buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1597,7 +1597,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1622,7 +1622,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenAlignedCacheableNonReadO EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1647,7 +1647,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState graphicsAllocation->setDefaultGmm(gmm); gmm->isRenderCompressed = true; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); @@ -1668,7 +1668,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt buffer->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(gmm); gmm->isRenderCompressed = false; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); @@ -1684,7 +1684,7 @@ HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgramm uintptr_t ptr = 0xfffff000; void *svmPtr = reinterpret_cast(ptr); - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, false); EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress()); SURFACE_STATE_BUFFER_LENGTH length = {}; @@ -1701,7 +1701,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBei MockContext context; void *svmPtr = reinterpret_cast(0x1005); - Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u); + Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, false); EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState()); } diff --git a/opencl/test/unit_test/mocks/mock_buffer.h b/opencl/test/unit_test/mocks/mock_buffer.h index 7e111eddb1..a06e1436af 100644 --- a/opencl/test/unit_test/mocks/mock_buffer.h +++ b/opencl/test/unit_test/mocks/mock_buffer.h @@ -58,8 +58,8 @@ class MockBuffer : public MockBufferStorage, public Buffer { this->multiGraphicsAllocation.addAllocation(&this->mockGfxAllocation); } } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { - Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0, false, 1u); + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { + Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0, false, false); } GraphicsAllocation *externalAlloc = nullptr; }; @@ -79,8 +79,8 @@ class AlignedBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { - Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u); + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { + Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, false); } }; @@ -99,8 +99,8 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { - Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u); + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { + Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, false); } }; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index f26e0afbdc..31bbe60f11 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -372,7 +372,7 @@ class MockKernelWithInternals { populateKernelDescriptor(kernelInfo.kernelDescriptor, mediaVfeStateSlot1, 1); if (context == nullptr) { - mockContext = new MockContext; + mockContext = new MockContext(deviceVector); context = mockContext; } else { context->incRefInternal(); diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index 3cea401365..56c0bbda57 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -1537,7 +1537,7 @@ class DrmMockBuffer : public MockBufferStorage, public Buffer { gfxAllocation(alloc) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { } protected: diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index a9d531f3ec..9829069f80 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -233,9 +233,9 @@ struct EncodeSurfaceState { static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, - GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext); + GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext); static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext); + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext); static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 1cf1836a88..f858424fbe 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -318,7 +318,7 @@ void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_ template void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, - GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext) { + GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { auto surfaceState = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); @@ -356,7 +356,7 @@ void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_ surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } - EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, numDevicesInContext); + EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, areMultipleSubDevicesInContext); } template diff --git a/shared/source/command_container/command_encoder_bdw_plus.inl b/shared/source/command_container/command_encoder_bdw_plus.inl index bd46e7985b..8c960d4b10 100644 --- a/shared/source/command_container/command_encoder_bdw_plus.inl +++ b/shared/source/command_container/command_encoder_bdw_plus.inl @@ -409,7 +409,7 @@ inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &devic template void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) { + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo()); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 44c4020ee3..dbf4c68a9f 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -375,7 +375,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( latestSentStatelessMocsConfig = mocsIndex; } - if ((isMultiOsContextCapable() || (dispatchFlags.numDevicesInContext > 1)) && (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics)) { + if ((isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics)) { isStateBaseAddressDirty = true; lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics; } @@ -427,7 +427,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( isMultiOsContextCapable(), memoryCompressionState, dispatchFlags.useGlobalAtomics, - dispatchFlags.numDevicesInContext); + dispatchFlags.areMultipleSubDevicesInContext); *pCmd = cmd; if (sshDirty) { diff --git a/shared/source/command_stream/csr_definitions.h b/shared/source/command_stream/csr_definitions.h index 67c6cac7bf..779464da5d 100644 --- a/shared/source/command_stream/csr_definitions.h +++ b/shared/source/command_stream/csr_definitions.h @@ -57,33 +57,33 @@ struct DispatchFlags { KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP, uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP, bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP, - bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t numDevicesInContextP) : csrDependencies(csrDependenciesP), - barrierTimestampPacketNodes(barrierTimestampPacketNodesP), - pipelineSelectArgs(pipelineSelectArgsP), - flushStampReference(flushStampReferenceP), - throttle(throttleP), - preemptionMode(preemptionModeP), - numGrfRequired(numGrfRequiredP), - l3CacheSettings(l3CacheSettingsP), - threadArbitrationPolicy(threadArbitrationPolicyP), - additionalKernelExecInfo(additionalKernelExecInfoP), - kernelExecutionType(kernelExecutionTypeP), - memoryCompressionState(memoryCompressionStateP), - sliceCount(sliceCountP), - blocking(blockingP), - dcFlush(dcFlushP), - useSLM(useSLMP), - guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), - gsba32BitRequired(gsba32BitRequiredP), - requiresCoherency(requiresCoherencyP), - lowPriority(lowPriorityP), - implicitFlush(implicitFlushP), - outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), - epilogueRequired(epilogueRequiredP), - usePerDssBackedBuffer(usePerDSSbackedBufferP), - useSingleSubdevice(useSingleSubdeviceP), - useGlobalAtomics(useGlobalAtomicsP), - numDevicesInContext(numDevicesInContextP){}; + bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t areMultipleSubDevicesInContextP) : csrDependencies(csrDependenciesP), + barrierTimestampPacketNodes(barrierTimestampPacketNodesP), + pipelineSelectArgs(pipelineSelectArgsP), + flushStampReference(flushStampReferenceP), + throttle(throttleP), + preemptionMode(preemptionModeP), + numGrfRequired(numGrfRequiredP), + l3CacheSettings(l3CacheSettingsP), + threadArbitrationPolicy(threadArbitrationPolicyP), + additionalKernelExecInfo(additionalKernelExecInfoP), + kernelExecutionType(kernelExecutionTypeP), + memoryCompressionState(memoryCompressionStateP), + sliceCount(sliceCountP), + blocking(blockingP), + dcFlush(dcFlushP), + useSLM(useSLMP), + guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), + gsba32BitRequired(gsba32BitRequiredP), + requiresCoherency(requiresCoherencyP), + lowPriority(lowPriorityP), + implicitFlush(implicitFlushP), + outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), + epilogueRequired(epilogueRequiredP), + usePerDssBackedBuffer(usePerDSSbackedBufferP), + useSingleSubdevice(useSingleSubdeviceP), + useGlobalAtomics(useGlobalAtomicsP), + areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP){}; CsrDependencies csrDependencies; TimestampPacketContainer *barrierTimestampPacketNodes = nullptr; @@ -112,7 +112,7 @@ struct DispatchFlags { bool usePerDssBackedBuffer = false; bool useSingleSubdevice = false; bool useGlobalAtomics = false; - size_t numDevicesInContext = 1u; + bool areMultipleSubDevicesInContext = false; }; struct CsrSizeRequestFlags { diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index af46fad5db..f2c0f25117 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -52,7 +52,7 @@ void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStre template <> void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) { + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); if (isL3Allowed) { const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE; diff --git a/shared/source/gen8/state_base_address_gen8.cpp b/shared/source/gen8/state_base_address_gen8.cpp index 18254c1d90..736fea6f5a 100644 --- a/shared/source/gen8/state_base_address_gen8.cpp +++ b/shared/source/gen8/state_base_address_gen8.cpp @@ -29,7 +29,7 @@ void StateBaseAddressHelper::programStateBaseAddress( bool isMultiOsContextCapable, MemoryCompressionState memoryCompressionState, bool useGlobalAtomics, - size_t numDevicesInContext) { + bool areMultipleSubDevicesInContext) { *stateBaseAddress = BDWFamily::cmdInitStateBaseAddress; @@ -77,7 +77,7 @@ void StateBaseAddressHelper::programStateBaseAddress( stateBaseAddress->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex); appendStateBaseAddressParameters(stateBaseAddress, ssh, setGeneralStateBaseAddress, indirectObjectHeapBaseAddress, - gmmHelper, isMultiOsContextCapable, memoryCompressionState, true, useGlobalAtomics, numDevicesInContext); + gmmHelper, isMultiOsContextCapable, memoryCompressionState, true, useGlobalAtomics, areMultipleSubDevicesInContext); } template struct StateBaseAddressHelper; } // namespace NEO diff --git a/shared/source/helpers/state_base_address.h b/shared/source/helpers/state_base_address.h index 90a94a1d41..9f046401de 100644 --- a/shared/source/helpers/state_base_address.h +++ b/shared/source/helpers/state_base_address.h @@ -38,7 +38,7 @@ struct StateBaseAddressHelper { bool isMultiOsContextCapable, MemoryCompressionState memoryCompressionState, bool useGlobalAtomics, - size_t numDevicesInContext); + bool areMultipleSubDevicesInContext); static void appendStateBaseAddressParameters( STATE_BASE_ADDRESS *stateBaseAddress, @@ -50,7 +50,7 @@ struct StateBaseAddressHelper { MemoryCompressionState memoryCompressionState, bool overrideBindlessSurfaceStateBase, bool useGlobalAtomics, - size_t nnumDevicesInContext); + bool areMultipleSubDevicesInContext); static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper); diff --git a/shared/source/helpers/state_base_address_base.inl b/shared/source/helpers/state_base_address_base.inl index 63d0fe7faf..548c9bc5ac 100644 --- a/shared/source/helpers/state_base_address_base.inl +++ b/shared/source/helpers/state_base_address_base.inl @@ -33,7 +33,7 @@ void StateBaseAddressHelper::programStateBaseAddress( bool isMultiOsContextCapable, MemoryCompressionState memoryCompressionState, bool useGlobalAtomics, - size_t numDevicesInContext) { + bool areMultipleSubDevicesInContext) { *stateBaseAddress = GfxFamily::cmdInitStateBaseAddress; bool overrideBindlessSurfaceStateBase = true; @@ -102,7 +102,7 @@ void StateBaseAddressHelper::programStateBaseAddress( stateBaseAddress->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex); appendStateBaseAddressParameters(stateBaseAddress, ssh, setGeneralStateBaseAddress, indirectObjectHeapBaseAddress, gmmHelper, - isMultiOsContextCapable, memoryCompressionState, overrideBindlessSurfaceStateBase, useGlobalAtomics, numDevicesInContext); + isMultiOsContextCapable, memoryCompressionState, overrideBindlessSurfaceStateBase, useGlobalAtomics, areMultipleSubDevicesInContext); } template diff --git a/shared/source/helpers/state_base_address_bdw.inl b/shared/source/helpers/state_base_address_bdw.inl index 4200da4e31..cafa4e348e 100644 --- a/shared/source/helpers/state_base_address_bdw.inl +++ b/shared/source/helpers/state_base_address_bdw.inl @@ -20,7 +20,7 @@ void StateBaseAddressHelper::appendStateBaseAddressParameters( MemoryCompressionState memoryCompressionState, bool overrideBindlessSurfaceStateBase, bool useGlobalAtomics, - size_t numDevicesInContext) { + bool areMultipleSubDevicesInContext) { appendExtraCacheSettings(stateBaseAddress, gmmHelper); } diff --git a/shared/source/helpers/state_base_address_skl_plus.inl b/shared/source/helpers/state_base_address_skl_plus.inl index ce47d101a4..813537ef3b 100644 --- a/shared/source/helpers/state_base_address_skl_plus.inl +++ b/shared/source/helpers/state_base_address_skl_plus.inl @@ -20,7 +20,7 @@ void StateBaseAddressHelper::appendStateBaseAddressParameters( MemoryCompressionState memoryCompressionState, bool overrideBindlessSurfaceStateBase, bool useGlobalAtomics, - size_t numDevicesInContext) { + bool areMultipleSubDevicesInContext) { if (overrideBindlessSurfaceStateBase && ssh) { stateBaseAddress->setBindlessSurfaceStateBaseAddressModifyEnable(true); diff --git a/shared/test/common/helpers/dispatch_flags_helper.h b/shared/test/common/helpers/dispatch_flags_helper.h index d7aa5b738f..87b546c17f 100644 --- a/shared/test/common/helpers/dispatch_flags_helper.h +++ b/shared/test/common/helpers/dispatch_flags_helper.h @@ -41,6 +41,6 @@ struct DispatchFlagsHelper { false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics - 1u); //numDevicesInContext + false); //areMultipleSubDevicesInContext } };