diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 3cb0228884..6d2abd4a2d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -99,7 +99,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), debugSurface->getUnderlyingBufferSize(), mocs, false, false, false, neoDevice->getNumAvailableDevices(), - debugSurface, neoDevice->getGmmHelper()); + debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u); } appendSignalEventPostWalker(hEvent); diff --git a/level_zero/core/source/kernel/kernel_hw.h b/level_zero/core/source/kernel/kernel_hw.h index fb6e4dfda1..ed12be8f38 100644 --- a/level_zero/core/source/kernel/kernel_hw.h +++ b/level_zero/core/source/kernel/kernel_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -68,7 +68,8 @@ struct KernelHw : public KernelImp { NEO::Device *neoDevice = module->getDevice()->getNEODevice(); NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, false, false, false, neoDevice->getNumAvailableDevices(), - alloc, neoDevice->getGmmHelper()); + alloc, neoDevice->getGmmHelper(), + kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u); } std::unique_ptr clone() const override { diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index b56dca29bf..fe14f6780c 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -142,7 +142,8 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); - pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device); + pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device, + pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); } } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 96039d5d8d..f438d0ce18 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1490,7 +1490,10 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers(rootDeviceIndex)) { auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); - buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice()); + auto context = program->getContextPtr(); + size_t numDevicesInContext = context ? context->getNumDevices() : 1u; + buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(), + getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext); } kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable(); diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index 382336aaf5..cab64fd17a 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -752,7 +752,7 @@ void Buffer::setSurfaceState(const Device *device, multiGraphicsAllocation.addAllocation(gfxAlloc); } auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false); - buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device); + buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u); delete buffer; } diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h index bb51674def..6a1ee8224c 100644 --- a/opencl/source/mem_obj/buffer.h +++ b/opencl/source/mem_obj/buffer.h @@ -137,7 +137,8 @@ class Buffer : public MemObj { bool isSubBuffer(); bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing); - virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) = 0; + virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, + bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, @@ -208,7 +209,8 @@ class BufferHw : public Buffer { : Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isObjectRedescribed) {} - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) override; + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, + bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override; void appendSurfaceStateExt(void *memory); static Buffer *create(Context *context, diff --git a/opencl/source/mem_obj/buffer_base.inl b/opencl/source/mem_obj/buffer_base.inl index b994fcd159..2f05e1ce62 100644 --- a/opencl/source/mem_obj/buffer_base.inl +++ b/opencl/source/mem_obj/buffer_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -34,7 +34,8 @@ union SURFACE_STATE_BUFFER_LENGTH { }; template -void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) { +void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, + bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; @@ -42,7 +43,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, boo getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex), getMocsValue(disableL3, isReadOnly, rootDeviceIndex), true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(), - graphicsAllocation, device.getGmmHelper()); + graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, numDevicesInContext); appendSurfaceStateExt(memory); } } // namespace NEO diff --git a/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl index 0ca02383ef..ab5733d9bc 100644 --- a/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgramm ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -133,7 +133,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); diff --git a/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp index e14f55e209..104d53040d 100644 --- a/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp +++ b/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); diff --git a/opencl/test/unit_test/helpers/base_object_tests.cpp b/opencl/test/unit_test/helpers/base_object_tests.cpp index c37f7dbe55..b051c621a6 100644 --- a/opencl/test/unit_test/helpers/base_object_tests.cpp +++ b/opencl/test/unit_test/helpers/base_object_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -80,7 +80,7 @@ class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {} + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {} }; template <> @@ -295,7 +295,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { } }; diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index c0fd89a7fb..9a1704dd8b 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -1382,7 +1382,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); @@ -1418,7 +1418,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); @@ -1447,7 +1447,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, 1u); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1475,7 +1475,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumen buffer->getGraphicsAllocation(rootDeviceIndex)->setSize(127); - buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, 1u); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1500,7 +1500,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1525,7 +1525,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenAlignedCacheableNonReadO EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1550,7 +1550,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState graphicsAllocation->setDefaultGmm(gmm); gmm->isRenderCompressed = true; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); @@ -1571,7 +1571,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt buffer->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(gmm); gmm->isRenderCompressed = false; - buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice()); + buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); diff --git a/opencl/test/unit_test/mocks/mock_buffer.h b/opencl/test/unit_test/mocks/mock_buffer.h index 8357caf41f..32afe105ec 100644 --- a/opencl/test/unit_test/mocks/mock_buffer.h +++ b/opencl/test/unit_test/mocks/mock_buffer.h @@ -58,7 +58,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { this->multiGraphicsAllocation.addAllocation(&this->mockGfxAllocation); } } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0); } GraphicsAllocation *externalAlloc = nullptr; @@ -79,7 +79,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0); } }; @@ -99,7 +99,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer { CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0); } }; diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index 50188dbb3a..686fc2e8c5 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -1489,7 +1489,7 @@ class DrmMockBuffer : public MockBufferStorage, public Buffer { gfxAllocation(alloc) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override { } protected: diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 537ae6d5dd..b84cd2873d 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -230,9 +230,9 @@ struct EncodeSurfaceState { static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, - GraphicsAllocation *allocation, GmmHelper *gmmHelper); + GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext); static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices); + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext); static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index c4db38916c..b34a3566da 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -318,7 +318,7 @@ void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_ template void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs, bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices, - GraphicsAllocation *allocation, GmmHelper *gmmHelper) { + GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext) { auto surfaceState = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); @@ -356,7 +356,7 @@ void EncodeSurfaceState::encodeBuffer(void *dst, uint64_t address, size_ surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } - EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices); + EncodeSurfaceState::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, numDevicesInContext); } template diff --git a/shared/source/command_container/command_encoder_bdw_plus.inl b/shared/source/command_container/command_encoder_bdw_plus.inl index 5332ec2d18..66001d9f66 100644 --- a/shared/source/command_container/command_encoder_bdw_plus.inl +++ b/shared/source/command_container/command_encoder_bdw_plus.inl @@ -400,7 +400,7 @@ inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &devic template void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices) { + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) { encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo()); } diff --git a/shared/source/command_stream/csr_definitions.h b/shared/source/command_stream/csr_definitions.h index 90899a1db1..67c6cac7bf 100644 --- a/shared/source/command_stream/csr_definitions.h +++ b/shared/source/command_stream/csr_definitions.h @@ -51,39 +51,39 @@ constexpr uint32_t NotApplicable = 3u; struct DispatchFlags { DispatchFlags() = delete; - DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs, - FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired, - uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, - KernelExecutionType kernelExecutionType, MemoryCompressionState memoryCompressionState, - uint64_t sliceCount, bool blocking, bool dcFlush, bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired, - bool requiresCoherency, bool lowPriority, bool implicitFlush, bool outOfOrderExecutionAllowed, bool epilogueRequired, - bool usePerDSSbackedBuffer, bool useSingleSubdevice, bool useGlobalAtomics, size_t numDevicesInContext) : csrDependencies(csrDependencies), - barrierTimestampPacketNodes(barrierTimestampPacketNodes), - pipelineSelectArgs(pipelineSelectArgs), - flushStampReference(flushStampReference), - throttle(throttle), - preemptionMode(preemptionMode), - numGrfRequired(numGrfRequired), - l3CacheSettings(l3CacheSettings), - threadArbitrationPolicy(threadArbitrationPolicy), - additionalKernelExecInfo(additionalKernelExecInfo), - kernelExecutionType(kernelExecutionType), - memoryCompressionState(memoryCompressionState), - sliceCount(sliceCount), - blocking(blocking), - dcFlush(dcFlush), - useSLM(useSLM), - guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl), - gsba32BitRequired(gsba32BitRequired), - requiresCoherency(requiresCoherency), - lowPriority(lowPriority), - implicitFlush(implicitFlush), - outOfOrderExecutionAllowed(outOfOrderExecutionAllowed), - epilogueRequired(epilogueRequired), - usePerDssBackedBuffer(usePerDSSbackedBuffer), - useSingleSubdevice(useSingleSubdevice), - useGlobalAtomics(useGlobalAtomics), - numDevicesInContext(numDevicesInContext){}; + DispatchFlags(CsrDependencies csrDependenciesP, TimestampPacketContainer *barrierTimestampPacketNodesP, PipelineSelectArgs pipelineSelectArgsP, + FlushStampTrackingObj *flushStampReferenceP, QueueThrottle throttleP, PreemptionMode preemptionModeP, uint32_t numGrfRequiredP, + uint32_t l3CacheSettingsP, uint32_t threadArbitrationPolicyP, uint32_t additionalKernelExecInfoP, + KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP, + uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP, + bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP, + bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t numDevicesInContextP) : csrDependencies(csrDependenciesP), + barrierTimestampPacketNodes(barrierTimestampPacketNodesP), + pipelineSelectArgs(pipelineSelectArgsP), + flushStampReference(flushStampReferenceP), + throttle(throttleP), + preemptionMode(preemptionModeP), + numGrfRequired(numGrfRequiredP), + l3CacheSettings(l3CacheSettingsP), + threadArbitrationPolicy(threadArbitrationPolicyP), + additionalKernelExecInfo(additionalKernelExecInfoP), + kernelExecutionType(kernelExecutionTypeP), + memoryCompressionState(memoryCompressionStateP), + sliceCount(sliceCountP), + blocking(blockingP), + dcFlush(dcFlushP), + useSLM(useSLMP), + guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), + gsba32BitRequired(gsba32BitRequiredP), + requiresCoherency(requiresCoherencyP), + lowPriority(lowPriorityP), + implicitFlush(implicitFlushP), + outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), + epilogueRequired(epilogueRequiredP), + usePerDssBackedBuffer(usePerDSSbackedBufferP), + useSingleSubdevice(useSingleSubdeviceP), + useGlobalAtomics(useGlobalAtomicsP), + numDevicesInContext(numDevicesInContextP){}; CsrDependencies csrDependencies; TimestampPacketContainer *barrierTimestampPacketNodes = nullptr; diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index d1babd1ebd..af46fad5db 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -52,7 +52,7 @@ void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStre template <> void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, - bool isReadOnly, uint32_t numAvailableDevices) { + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) { const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); if (isL3Allowed) { const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE; diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index d532c604b0..2e2b425b97 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -155,7 +155,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1); EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, false, false, false, 1u, - &allocation, pDevice->getGmmHelper()); + &allocation, pDevice->getGmmHelper(), false, 1u); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); @@ -182,7 +182,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, true, false, false, 1u, - nullptr, pDevice->getGmmHelper()); + nullptr, pDevice->getGmmHelper(), false, 1u); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType()); @@ -208,7 +208,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency EncodeSurfaceState::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1, false, false, false, 1u, - nullptr, pDevice->getGmmHelper()); + nullptr, pDevice->getGmmHelper(), false, 1u); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());