From be90b9ff932754b24cc5553e6f231cd622ece91c Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Tue, 17 Nov 2020 08:37:44 +0000 Subject: [PATCH] Add support for ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED Add support for device and shared allocations that use the ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED flag, whether the kernel using the memory is stateless or statefull. Signed-off-by: Jaime Arteaga --- level_zero/core/source/cmdlist/cmdlist.h | 2 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 1 + .../core/source/cmdlist/cmdlist_hw_base.inl | 8 +- level_zero/core/source/kernel/kernel_hw.h | 10 +- level_zero/core/source/kernel/kernel_imp.cpp | 5 + level_zero/core/source/kernel/kernel_imp.h | 2 + level_zero/core/source/memory/memory.cpp | 9 + .../sources/cmdlist/test_cmdlist_1.cpp | 14 + .../sources/cmdqueue/test_cmdqueue.cpp | 24 ++ .../unit_tests/sources/kernel/test_kernel.cpp | 52 ++++ .../unit_tests/sources/memory/test_memory.cpp | 43 +++ .../unit_tests/sources/module/test_module.cpp | 84 ++++++ .../command_container/command_encoder.h | 11 +- .../command_encoder_bdw_plus.inl | 21 +- .../encoders/test_encode_dispatch_kernel.cpp | 262 ++++++++++++++++-- 15 files changed, 518 insertions(+), 30 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 2c3578d59d..c357566acb 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -186,6 +186,7 @@ struct CommandList : _ze_command_list_handle_t { virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType) = 0; virtual ~CommandList(); NEO::CommandContainer commandContainer; + bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; } protected: std::map hostPtrMap; @@ -197,6 +198,7 @@ struct CommandList : _ze_command_list_handle_t { bool internalUsage = false; NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset); + bool containsStatelessUncachedResource = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 6dc70bf40a..cacfa4c6fa 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -66,6 +66,7 @@ ze_result_t CommandListCoreFamily::reset() { removeDeallocationContainerData(); removeHostPtrAllocations(); commandContainer.reset(); + containsStatelessUncachedResource = false; if (!isCopyOnly()) { programStateBaseAddress(commandContainer, true); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 92da65a144..b2ead82e5d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -16,6 +16,8 @@ #include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" +#include "level_zero/core/source/kernel/kernel_imp.h" + #include "pipe_control_args.h" #include @@ -70,6 +72,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z this->indirectAllocationsAllowed = true; } + KernelImp *kernelImp = static_cast(kernel); + this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); + NEO::EncodeDispatchKernel::encode(commandContainer, reinterpret_cast(pThreadGroupDimensions), isIndirect, @@ -77,7 +82,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernel, 0, device->getNEODevice(), - commandListPreemptionMode); + commandListPreemptionMode, + this->containsStatelessUncachedResource); if (device->getNEODevice()->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); diff --git a/level_zero/core/source/kernel/kernel_hw.h b/level_zero/core/source/kernel/kernel_hw.h index bb002e3e30..fb6e4dfda1 100644 --- a/level_zero/core/source/kernel/kernel_hw.h +++ b/level_zero/core/source/kernel/kernel_hw.h @@ -8,6 +8,8 @@ #pragma once #include "shared/source/command_container/command_encoder.h" +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" @@ -56,7 +58,13 @@ struct KernelHw : public KernelImp { bufferSizeForSsh += sizeTillEndOfSurface; // take address alignment offset into account bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment); - auto mocs = this->module->getDevice()->getMOCS(true, false); + bool l3Enabled = true; + auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(alloc->getGpuAddress())); + if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) { + l3Enabled = false; + } + auto mocs = this->module->getDevice()->getMOCS(l3Enabled, false); + NEO::Device *neoDevice = module->getDevice()->getNEODevice(); NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, false, false, false, neoDevice->getNumAvailableDevices(), diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index f67b28b20d..5935e75a0a 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -475,6 +475,11 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, val); if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) { setBufferSurfaceState(argIndex, reinterpret_cast(val), allocation); + } else { + auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); + if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) { + kernelRequiresUncachedMocs = true; + } } residencyContainer[argIndex] = allocation; diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 39e14d8a86..7d3c6a6599 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -103,6 +103,7 @@ struct KernelImp : Kernel { uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; } bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; } + bool getKernelRequiresUncachedMocs() { return kernelRequiresUncachedMocs; } protected: KernelImp() = default; @@ -147,6 +148,7 @@ struct KernelImp : Kernel { uint32_t requiredWorkgroupOrder = 0u; bool kernelRequiresGenerationOfLocalIdsByRuntime = true; + bool kernelRequiresUncachedMocs = false; }; } // namespace L0 diff --git a/level_zero/core/source/memory/memory.cpp b/level_zero/core/source/memory/memory.cpp index 8e3307b55f..a55e5bc103 100644 --- a/level_zero/core/source/memory/memory.cpp +++ b/level_zero/core/source/memory/memory.cpp @@ -164,6 +164,11 @@ ze_result_t DriverHandleImp::allocDeviceMem(ze_device_handle_t hDevice, const ze Device::fromHandle(hDevice)->getNEODevice()->getDeviceBitfield()); unifiedMemoryProperties.allocationFlags.flags.shareable = 1u; unifiedMemoryProperties.device = Device::fromHandle(hDevice)->getNEODevice(); + + if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { + unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; + } + void *usmPtr = svmAllocsManager->createUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(), size, unifiedMemoryProperties); @@ -191,6 +196,10 @@ ze_result_t DriverHandleImp::allocSharedMem(ze_device_handle_t hDevice, const ze NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, Device::fromHandle(device)->getNEODevice()->getDeviceBitfield()); unifiedMemoryProperties.device = unifiedMemoryPropertiesDevice; + if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { + unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; + } + if (size > this->devices[0]->getDeviceInfo().maxMemAllocSize) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 721a61ec19..fd57a84651 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -394,6 +394,20 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlus EXPECT_NE(cmdList.end(), itor); } +HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) { + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, + device, + NEO::EngineGroupType::Compute, + returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + returnValue = commandList->reset(); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + EXPECT_FALSE(commandList->getContainsStatelessUncachedResource()); +} + HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenResetThenStateBaseAddressNotProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index d7fe8015c9..e9c019c771 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" @@ -154,6 +156,28 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize commandQueue->destroy(); } +HWTEST2_F(CommandQueueProgramSBATest, + whenProgrammingStateBaseAddressWithcontainsStatelessUncachedResourceThenCorrectMocsAreSet, CommandQueueSBASupport) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + ze_command_queue_desc_t desc = {}; + auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); + auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); + commandQueue->initialize(false, false); + + uint32_t alignedSize = 4096u; + NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); + + commandQueue->programGeneralStateBaseAddress(0u, true, child); + auto pSbaCmd = static_cast(commandQueue->commandStream->getSpace(sizeof(STATE_BASE_ADDRESS))); + uint32_t statelessMocsIndex = pSbaCmd->getStatelessDataPortAccessMemoryObjectControlState(); + + auto gmmHelper = device->getNEODevice()->getGmmHelper(); + uint32_t expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + EXPECT_EQ(statelessMocsIndex, expectedMocs); + + commandQueue->destroy(); +} + TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) { const ze_command_queue_desc_t desc = {}; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 9d5b6e07b8..81f68c0458 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -759,6 +759,58 @@ TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetA EXPECT_FALSE(mockKernel.setSurfaceStateCalled); } + +using KernelBindlessUncachedMemoryTests = Test; + +TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet) { + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + MyMockKernel mockKernel; + + mockKernel.module = module.get(); + mockKernel.initialize(&desc); + + auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); + arg.bindless = undefined; + arg.bindful = undefined; + + NEO::MockGraphicsAllocation alloc; + + mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc); + EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); +} + +TEST_F(KernelBindlessUncachedMemoryTests, givenDeviceAllocationWithUncachedFlagThenKernelRequiresUncachedMocsIsSet) { + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + MyMockKernel mockKernel; + + mockKernel.module = module.get(); + mockKernel.initialize(&desc); + + auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); + arg.bindless = undefined; + arg.bindful = undefined; + + void *devicePtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; + ze_result_t res = device->getDriverHandle()->allocDeviceMem(device->toHandle(), + &deviceDesc, + 16384u, + 0u, + &devicePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_NE(nullptr, alloc); + + mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); + EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs()); + + device->getDriverHandle()->freeMem(devicePtr); +} + template struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily> { //MyMockImage() : WhiteBox<::L0::ImageCoreFamily>(); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index d2d26f172a..d8579af502 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -41,6 +41,49 @@ TEST_F(MemoryTest, givenDevicePointerThenDriverGetAllocPropertiesReturnsDeviceHa result = driverHandle->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } +TEST_F(MemoryTest, whenAllocatingDeviceMemoryWithUncachedFlagThenLocallyUncachedResourceIsSet) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; + ze_result_t result = driverHandle->allocDeviceMem(device->toHandle(), + &deviceDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); + EXPECT_NE(nullptr, allocData); + EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); + + result = driverHandle->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); +} + +TEST_F(MemoryTest, whenAllocatingSharedMemoryWithUncachedFlagThenLocallyUncachedResourceIsSet) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = driverHandle->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); + EXPECT_NE(nullptr, allocData); + EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); + + result = driverHandle->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); +} struct DriverHandleGetFdMock : public DriverHandleImp { void *importFdHandle(ze_device_handle_t hDevice, uint64_t handle) override { diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 722b340661..847e6f7d3d 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -5,8 +5,11 @@ * */ +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/source/program/kernel_info.h" @@ -17,6 +20,7 @@ #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" using ::testing::Return; @@ -134,6 +138,86 @@ HWTEST2_F(ModuleTest, givenNonPatchedTokenThenSurfaceBaseAddressIsCorrectlySet, device->getDriverHandle()->freeMem(devicePtr); } +using ModuleUncachedBufferTest = Test; + +HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithNonUncachedArgumentThenUncachedMocsNotRequired, ModuleTestSupport) { + ze_kernel_handle_t kernelHandle; + + ze_kernel_desc_t kernelDesc = {}; + kernelDesc.pKernelName = kernelName.c_str(); + + ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); + + void *devicePtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + res = device->getDriverHandle()->allocDeviceMem(device->toHandle(), + &deviceDesc, + 16384u, + 0u, + &devicePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_NE(nullptr, gpuAlloc); + + uint32_t argIndex = 0u; + kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); + EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); + + Kernel::fromHandle(kernelHandle)->destroy(); + + device->getDriverHandle()->freeMem(devicePtr); +} + +HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithUncachedArgumentThenCorrectMocsAreSet, ModuleTestSupport) { + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + ze_kernel_handle_t kernelHandle; + + ze_kernel_desc_t kernelDesc = {}; + kernelDesc.pKernelName = kernelName.c_str(); + + ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); + + void *devicePtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; + res = device->getDriverHandle()->allocDeviceMem(device->toHandle(), + &deviceDesc, + 16384u, + 0u, + &devicePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_NE(nullptr, gpuAlloc); + + uint32_t argIndex = 0u; + kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); + EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); + + auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); + auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); + auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); + EXPECT_EQ(devicePtr, reinterpret_cast(surfaceStateAddress->getSurfaceBaseAddress())); + + auto gmmHelper = device->getNEODevice()->getGmmHelper(); + uint32_t expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + EXPECT_EQ(expectedMocs, surfaceStateAddress->getMemoryObjectControlStateReserved()); + + Kernel::fromHandle(kernelHandle)->destroy(); + + device->getDriverHandle()->freeMem(devicePtr); +} + HWTEST_F(ModuleTest, givenKernelCreateWithIncorrectKernelNameReturnsFailure) { ze_kernel_handle_t kernelHandle; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 3b236003e6..bb56c71dd9 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -29,7 +29,15 @@ struct EncodeDispatchKernel { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; static void encode(CommandContainer &container, - const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface, uint64_t eventAddress, Device *device, PreemptionMode preemptionMode); + const void *pThreadGroupDimensions, + bool isIndirect, + bool isPredicate, + DispatchKernelEncoderI *dispatchInterface, + uint64_t eventAddress, + Device *device, + PreemptionMode preemptionMode, + bool &requiresUncachedMocs); + static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd); static void appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize); @@ -182,6 +190,7 @@ template struct EncodeStateBaseAddress { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd); + static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex); }; template diff --git a/shared/source/command_container/command_encoder_bdw_plus.inl b/shared/source/command_container/command_encoder_bdw_plus.inl index 227ed57ec3..83d7692316 100644 --- a/shared/source/command_container/command_encoder_bdw_plus.inl +++ b/shared/source/command_container/command_encoder_bdw_plus.inl @@ -24,7 +24,7 @@ namespace NEO { template void EncodeDispatchKernel::encode(CommandContainer &container, const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface, - uint64_t eventAddress, Device *device, PreemptionMode preemptionMode) { + uint64_t eventAddress, Device *device, PreemptionMode preemptionMode, bool &requiresUncachedMocs) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; @@ -150,7 +150,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, auto slmSizeNew = dispatchInterface->getSlmTotalSize(); bool dirtyHeaps = container.isAnyHeapDirty(); - bool flush = container.slmSize != slmSizeNew || dirtyHeaps; + bool flush = container.slmSize != slmSizeNew || dirtyHeaps || requiresUncachedMocs; if (flush) { PipeControlArgs args(true); @@ -159,10 +159,14 @@ void EncodeDispatchKernel::encode(CommandContainer &container, } MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), args); - if (dirtyHeaps) { + if (dirtyHeaps || requiresUncachedMocs) { STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(container, sba); + auto gmmHelper = container.getDevice()->getGmmHelper(); + uint32_t statelessMocsIndex = + requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + EncodeStateBaseAddress::encode(container, sba, statelessMocsIndex); container.setDirtyStateForAllHeaps(false); + requiresUncachedMocs = false; } if (container.slmSize != slmSizeNew) { @@ -326,6 +330,13 @@ size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) { + auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex); +} + +template +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex) { EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true); auto gmmHelper = container.getDevice()->getGmmHelper(); @@ -337,7 +348,7 @@ void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_B container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr, 0, false, - (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1), + statelessMocsIndex, container.getIndirectObjectHeapBaseAddress(), container.getInstructionHeapBaseAddress(), false, diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index a084b1e071..2342379b49 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/kernel/kernel_descriptor.h" @@ -43,7 +45,8 @@ TEST_F(CommandEncodeStatesTest, givenCommandConatinerCreatedWithMaxNumAggregateI HWTEST_F(CommandEncodeStatesTest, givenenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) { uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -53,6 +56,171 @@ HWTEST_F(CommandEncodeStatesTest, givenenDispatchInterfaceWhenDispatchKernelThen ASSERT_NE(itorPC, commands.end()); } +using CommandEncodeStatesUncachedMocsTests = Test; + +HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceL1Caching.set(0u); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + cmdContainer->setDirtyStateForAllHeaps(true); + bool requiresUncachedMocs = true; + EncodeDispatchKernel::encode(*cmdContainer.get(), + dims, + false, + false, + dispatchInterface.get(), + 0, + pDevice, + NEO::PreemptionMode::Disabled, + requiresUncachedMocs); + EXPECT_FALSE(requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), + cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(commands.end(), itor); + + auto cmdSba = genCmdCast(*itor); + auto gmmHelper = cmdContainer->getDevice()->getGmmHelper(); + EXPECT_EQ(cmdSba->getStatelessDataPortAccessMemoryObjectControlState(), + (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED))); +} + +HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndNonDirtyHeapsThenCorrectMocsIsSet) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceL1Caching.set(0u); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + cmdContainer->setDirtyStateForAllHeaps(false); + bool requiresUncachedMocs = true; + EncodeDispatchKernel::encode(*cmdContainer.get(), + dims, + false, + false, + dispatchInterface.get(), + 0, + pDevice, + NEO::PreemptionMode::Disabled, + requiresUncachedMocs); + EXPECT_FALSE(requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), + cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(commands.end(), itor); + + auto cmdSba = genCmdCast(*itor); + auto gmmHelper = cmdContainer->getDevice()->getGmmHelper(); + EXPECT_EQ(cmdSba->getStatelessDataPortAccessMemoryObjectControlState(), + (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED))); +} + +HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndDirtyHeapsThenSbaIsNotProgrammed) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceL1Caching.set(0u); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + cmdContainer->setDirtyStateForAllHeaps(true); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), + dims, + false, + false, + dispatchInterface.get(), + 0, + pDevice, + NEO::PreemptionMode::Disabled, + requiresUncachedMocs); + EXPECT_FALSE(requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), + cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(commands.end(), itor); + + auto cmdSba = genCmdCast(*itor); + auto gmmHelper = cmdContainer->getDevice()->getGmmHelper(); + EXPECT_EQ(cmdSba->getStatelessDataPortAccessMemoryObjectControlState(), + (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); +} + +HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsThenSbaIsNotProgrammed) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceL1Caching.set(0u); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + cmdContainer->setDirtyStateForAllHeaps(false); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), + dims, + false, + false, + dispatchInterface.get(), + 0, + pDevice, + NEO::PreemptionMode::Disabled, + requiresUncachedMocs); + EXPECT_FALSE(requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), + cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_EQ(commands.end(), itor); +} + +HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsAndSlmSizeThenSbaIsNotProgrammed) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceL1Caching.set(0u); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + uint32_t slmTotalSize = 1; + EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(slmTotalSize)); + + cmdContainer->setDirtyStateForAllHeaps(false); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), + dims, + false, + false, + dispatchInterface.get(), + 0, + pDevice, + NEO::PreemptionMode::Disabled, + requiresUncachedMocs); + EXPECT_FALSE(requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), + cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_EQ(commands.end(), itor); +} + HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) { uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -61,7 +229,8 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen cmdContainer->getCommandStream()->getSpace(cmdContainer->getCommandStream()->getAvailableSpace() - sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size(); @@ -74,7 +243,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); uint32_t slmTotalSize = 1; EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(slmTotalSize)); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -90,7 +261,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); uint32_t slmTotalSize = 0; EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(slmTotalSize)); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -122,7 +295,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); EXPECT_EQ(interfaceDescriptorData->getBindingTablePointer(), expectedOffset); @@ -149,7 +323,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); EXPECT_EQ(interfaceDescriptorData->getBindingTablePointer(), 0u); @@ -174,7 +349,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa const uint8_t *dshData = reinterpret_cast(&samplerState); EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(dshData)); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); auto borderColorOffsetInDsh = usedBefore; @@ -205,7 +381,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp const uint8_t *dshData = reinterpret_cast(&samplerState); EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(dshData)); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); auto borderColorOffsetInDsh = usedBefore; @@ -225,7 +402,9 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = offsets[0]; dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = offsets[1]; dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = offsets[2]; - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -249,7 +428,9 @@ HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsSizeWhenDispatchingKernelT dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = offsets[0]; dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = offsets[1]; dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = offsets[2]; - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -286,7 +467,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe DebugManager.flags.ForceBtpPrefetchMode.set(-1); cmdContainer.reset(new MyMockCommandContainer()); cmdContainer->initialize(pDevice); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -314,7 +496,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe DebugManager.flags.ForceBtpPrefetchMode.set(0); cmdContainer.reset(new MyMockCommandContainer()); cmdContainer->initialize(pDevice); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -337,7 +521,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe DebugManager.flags.ForceBtpPrefetchMode.set(1); cmdContainer.reset(new MyMockCommandContainer()); cmdContainer->initialize(pDevice); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -365,7 +551,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize)); cmdContainer->setDirtyStateForAllHeaps(false); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -379,6 +566,29 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha } } +HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + cmdContainer->slmSize = 1; + EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize)); + cmdContainer->setDirtyStateForAllHeaps(false); + + bool requiresUncachedMocs = true; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(commands.end(), itor); + + auto cmdSba = genCmdCast(*itor); + auto gmmHelper = cmdContainer->getDevice()->getGmmHelper(); + EXPECT_EQ(cmdSba->getStatelessDataPortAccessMemoryObjectControlState(), + (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED))); +} + HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -388,7 +598,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize)); cmdContainer->setDirtyStateForAllHeaps(true); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -408,7 +619,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize)); cmdContainer->setDirtyStateForAllHeaps(true); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList cmdList; CmdParse::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -445,7 +657,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange auto slmSizeBefore = cmdContainer->slmSize; - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -466,7 +679,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeorWhenD cmdContainer->setIddBlock(cmdContainer->getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, sizeof(INTERFACE_DESCRIPTOR_DATA) * cmdContainer->getNumIddPerBlock())); cmdContainer->nextIddInBlock = 0; - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -527,7 +741,8 @@ HWTEST_F(EncodeDispatchKernelTest, givenBindlessBufferArgWhenDispatchingKernelTh EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); DataPortBindlessSurfaceExtendedMessageDescriptor extMessageDesc; extMessageDesc.setBindlessSurfaceOffset(surfaceStateOffsetOnHeap); @@ -585,7 +800,8 @@ HWTEST_F(EncodeDispatchKernelTest, givenBindlessImageArgWhenDispatchingKernelThe EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); DataPortBindlessSurfaceExtendedMessageDescriptor extMessageDesc; extMessageDesc.setBindlessSurfaceOffset(surfaceStateOffsetOnHeap); @@ -644,7 +860,8 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + bool requiresUncachedMocs = false; + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); EXPECT_EQ(pattern, *patchLocation); iOpenCL::SPatchSamplerKernelArgument samplerArg = {}; @@ -668,7 +885,8 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching ioh->replaceBuffer(ioh->getCpuBase(), ioh->getMaxAvailableSpace()); memset(ioh->getCpuBase(), 0, ioh->getMaxAvailableSpace()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled); + + EncodeDispatchKernel::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs); EXPECT_THAT(ptrOffset(ioh->getCpuBase(), iohOffset), MemoryZeroed(ioh->getMaxAvailableSpace() - iohOffset)); }