From 7b95dc4be45a9e0b4d07119891748243dbce9ad2 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 27 Sep 2021 23:27:46 +0000 Subject: [PATCH] Add partitioned post sync for completion fence of direct submission Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz --- .../direct_submission/direct_submission_hw.h | 14 ++-- .../direct_submission_hw.inl | 12 ++-- .../dispatchers/blitter_dispatcher.h | 3 +- .../dispatchers/blitter_dispatcher.inl | 3 +- .../dispatchers/render_dispatcher.h | 3 +- .../dispatchers/render_dispatcher.inl | 4 +- .../linux/drm_direct_submission.inl | 18 +++-- shared/test/common/mocks/mock_device.h | 1 + .../common/mocks/mock_direct_submission_hw.h | 2 + .../windows/mock_wddm_direct_submission.h | 1 + .../direct_submission_tests.cpp | 67 ++++++++++++++++++- .../dispatchers/blitter_dispatcher_tests.cpp | 2 +- .../dispatchers/render_dispatcher_tests.cpp | 34 +++++++++- .../linux/drm_direct_submission_tests.cpp | 45 +++++++++++++ 14 files changed, 187 insertions(+), 22 deletions(-) diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 14ec8dfa9a..67bc85286b 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -18,15 +18,17 @@ namespace NEO { #pragma pack(1) struct RingSemaphoreData { uint32_t QueueWorkCount; - uint8_t ReservedCacheline[60]; + uint8_t ReservedCacheline0[60]; uint32_t tagAllocation; - uint8_t ReservedCacheline2[60]; + uint8_t ReservedCacheline1[60]; uint32_t DiagnosticModeCounter; uint32_t Reserved0Uint32; - uint64_t Reserved0Uint64; - uint8_t ReservedCacheline3[48]; + uint64_t Reserved1Uint64; + uint8_t ReservedCacheline2[48]; + uint64_t miFlushSpace; + uint8_t ReservedCacheline3[56]; }; -static_assert((64u * 3) == sizeof(RingSemaphoreData), "Invalid size for RingSemaphoreData"); +static_assert((64u * 4) == sizeof(RingSemaphoreData), "Invalid size for RingSemaphoreData"); #pragma pack() using DirectSubmissionAllocations = StackVec; @@ -144,10 +146,12 @@ class DirectSubmissionHw { RingBufferUse currentRingBuffer = RingBufferUse::FirstBuffer; uint32_t workloadMode = 0; uint32_t workloadModeOneExpectedValue = 0u; + uint32_t activeTiles = 1u; bool ringStart = false; bool disableCpuCacheFlush = true; bool disableCacheFlush = false; bool disableMonitorFence = false; + bool partitionedMode = false; }; } // namespace NEO diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 6c45076bde..49445a3484 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -96,7 +96,7 @@ bool DirectSubmissionHw::allocateResources() { workloadModeOneStoreAddress = static_cast(&semaphoreData->DiagnosticModeCounter); *static_cast(workloadModeOneStoreAddress) = 0u; - this->gpuVaForMiFlush = this->semaphoreGpuVa + 2 * MemoryConstants::cacheLineSize; + this->gpuVaForMiFlush = this->semaphoreGpuVa + offsetof(RingSemaphoreData, miFlushSpace); auto ret = makeResourcesResident(allocations); @@ -185,7 +185,7 @@ bool DirectSubmissionHw::stopRingBuffer() { if (disableMonitorFence) { TagData currentTagData = {}; getTagAddressValue(currentTagData); - Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false); + Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false, this->partitionedMode); } Dispatcher::dispatchStopCommandBuffer(ringCommandStream); @@ -240,7 +240,7 @@ inline void DirectSubmissionHw::dispatchSwitchRingBufferS if (disableMonitorFence) { TagData currentTagData = {}; getTagAddressValue(currentTagData); - Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false); + Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false, this->partitionedMode); } Dispatcher::dispatchStartCommandBuffer(ringCommandStream, nextBufferGpuAddress); } @@ -321,7 +321,7 @@ void *DirectSubmissionHw::dispatchWorkloadSection(BatchBu if (!disableMonitorFence) { TagData currentTagData = {}; getTagAddressValue(currentTagData); - Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false); + Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false, this->partitionedMode); } dispatchSemaphoreSection(currentQueueWorkCount + 1); @@ -340,10 +340,10 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(); bool buffersSwitched = false; - uint64_t startGpuVa = getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0)); + getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0)); if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) { - startGpuVa = switchRingBuffers(); + switchRingBuffers(); buffersSwitched = true; } diff --git a/shared/source/direct_submission/dispatchers/blitter_dispatcher.h b/shared/source/direct_submission/dispatchers/blitter_dispatcher.h index 001697d6b9..a4cee09516 100644 --- a/shared/source/direct_submission/dispatchers/blitter_dispatcher.h +++ b/shared/source/direct_submission/dispatchers/blitter_dispatcher.h @@ -20,7 +20,8 @@ class BlitterDispatcher : public Dispatcher { uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, - bool useNotifyEnable); + bool useNotifyEnable, + bool partitionedWorkload); static size_t getSizeMonitorFence(const HardwareInfo &hwInfo); static void dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo, uint64_t address); diff --git a/shared/source/direct_submission/dispatchers/blitter_dispatcher.inl b/shared/source/direct_submission/dispatchers/blitter_dispatcher.inl index feaab27eab..994685c022 100644 --- a/shared/source/direct_submission/dispatchers/blitter_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/blitter_dispatcher.inl @@ -27,7 +27,8 @@ inline void BlitterDispatcher::dispatchMonitorFence(LinearStream &cmd uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, - bool useNotifyEnable) { + bool useNotifyEnable, + bool partitionedWorkload) { MiFlushArgs args; args.commandWithPostSync = true; args.notifyEnable = useNotifyEnable; diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.h b/shared/source/direct_submission/dispatchers/render_dispatcher.h index 297de14039..983b544ad8 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.h +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.h @@ -20,7 +20,8 @@ class RenderDispatcher : public Dispatcher { uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, - bool useNotifyEnable); + bool useNotifyEnable, + bool partitionedWorkload); static size_t getSizeMonitorFence(const HardwareInfo &hwInfo); static void dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo, uint64_t address); diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index d2e4d20ed8..556b4de62c 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -30,9 +30,11 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, - bool useNotifyEnable) { + bool useNotifyEnable, + bool partitionedWorkload) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; PipeControlArgs args(true); + args.workloadPartitionOffset = partitionedWorkload; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdBuffer, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index f3804f3ef3..1ac542001d 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" @@ -12,6 +13,7 @@ #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_context_linux.h" +#include "shared/source/utilities/wait_util.h" #include @@ -27,7 +29,11 @@ DrmDirectSubmission::DrmDirectSubmission(Device &device, if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) { this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get(); } - + auto subDevices = device.getDeviceBitfield(); + this->activeTiles = ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) + ? static_cast(subDevices.count()) + : 1u; + this->partitionedMode = this->activeTiles > 1u; auto osContextLinux = static_cast(&this->osContext); osContextLinux->getDrm().setDirectSubmissionActive(true); }; @@ -41,9 +47,9 @@ inline DrmDirectSubmission::~DrmDirectSubmission() { template bool DrmDirectSubmission::allocateOsResources() { - this->currentTagData.tagAddress = this->semaphoreGpuVa + MemoryConstants::cacheLineSize; + this->currentTagData.tagAddress = this->semaphoreGpuVa + offsetof(RingSemaphoreData, tagAllocation); this->currentTagData.tagValue = 0u; - this->tagAddress = reinterpret_cast(reinterpret_cast(this->semaphorePtr) + MemoryConstants::cacheLineSize); + this->tagAddress = reinterpret_cast(reinterpret_cast(this->semaphorePtr) + offsetof(RingSemaphoreData, tagAllocation)); return true; } @@ -162,7 +168,11 @@ void DrmDirectSubmission::getTagAddressValue(TagData &tag template void DrmDirectSubmission::wait(uint32_t taskCountToWait) { - while (taskCountToWait > *this->tagAddress) { + auto pollAddress = this->tagAddress; + for (uint32_t i = 0; i < this->activeTiles; i++) { + while (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { + } + pollAddress = ptrOffset(pollAddress, CommonConstants::partitionAddressOffset); } } diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 9014596188..d3b3844815 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -51,6 +51,7 @@ class MockDevice : public RootDevice { using Device::createDeviceInternals; using Device::createEngine; using Device::createSubDevices; + using Device::deviceBitfield; using Device::deviceInfo; using Device::engineGroups; using Device::engineInstanced; diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 4f74572379..ef3ae89eef 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -15,6 +15,7 @@ namespace NEO { template struct MockDirectSubmissionHw : public DirectSubmissionHw { using BaseClass = DirectSubmissionHw; + using BaseClass::activeTiles; using BaseClass::allocateResources; using BaseClass::completionRingBuffers; using BaseClass::cpuCachelineFlush; @@ -44,6 +45,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::getSizeSwitchRingBufferSection; using BaseClass::hwInfo; using BaseClass::osContext; + using BaseClass::partitionedMode; using BaseClass::performDiagnosticMode; using BaseClass::ringBuffer; using BaseClass::ringBuffer2; diff --git a/shared/test/common/mocks/windows/mock_wddm_direct_submission.h b/shared/test/common/mocks/windows/mock_wddm_direct_submission.h index 826a19a94c..20dcc56903 100644 --- a/shared/test/common/mocks/windows/mock_wddm_direct_submission.h +++ b/shared/test/common/mocks/windows/mock_wddm_direct_submission.h @@ -13,6 +13,7 @@ namespace NEO { template struct MockWddmDirectSubmission : public WddmDirectSubmission { using BaseClass = WddmDirectSubmission; + using BaseClass::activeTiles; using BaseClass::allocateOsResources; using BaseClass::allocateResources; using BaseClass::commandBufferHeader; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp index dd894941a8..8d9e7e6ac8 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp @@ -595,7 +595,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchTagUpdateSection bool ret = directSubmission.initialize(false); EXPECT_TRUE(ret); - Dispatcher::dispatchMonitorFence(directSubmission.ringCommandStream, 0ull, 0ull, *directSubmission.hwInfo, false); + Dispatcher::dispatchMonitorFence(directSubmission.ringCommandStream, 0ull, 0ull, *directSubmission.hwInfo, false, false); EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed()); EXPECT_EQ(Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo), directSubmission.ringCommandStream.getUsed()); } @@ -1270,3 +1270,68 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(expectedVfprintfCall, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_EQ(2u, NEO::IoFunctions::mockFcloseCalled); } + +HWCMDTEST_F(IGFX_GEN12_CORE, DirectSubmissionDispatchBufferTest, + givenDirectSubmissionInPartitionModeWhenDispatchingCommandBufferThenExpectDispatchPartitonedPipeControlInCommandBuffer) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; + + FlushStampTracker flushStamp(true); + + MockDirectSubmissionHw> directSubmission(*pDevice, + *osContext.get()); + directSubmission.disableMonitorFence = false; + directSubmission.partitionedMode = true; + + bool ret = directSubmission.initialize(true); + EXPECT_TRUE(ret); + EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed()); + GraphicsAllocation *oldRingAllocation = directSubmission.ringCommandStream.getGraphicsAllocation(); + + EXPECT_EQ(0u, directSubmission.semaphoreData->QueueWorkCount); + EXPECT_EQ(1u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(1u, directSubmission.submitCount); + size_t submitSize = RenderDispatcher::getSizePreemption() + + directSubmission.getSizeSemaphoreSection(); + EXPECT_EQ(submitSize, directSubmission.submitSize); + EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); + EXPECT_EQ(1u, directSubmission.handleResidencyCount); + + directSubmission.ringCommandStream.getSpace(directSubmission.ringCommandStream.getAvailableSpace() - + directSubmission.getSizeSwitchRingBufferSection()); + + directSubmission.tagValueSetValue = 0x4343123ull; + directSubmission.tagAddressSetValue = 0xBEEF00000ull; + ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); + EXPECT_TRUE(ret); + EXPECT_NE(oldRingAllocation, directSubmission.ringCommandStream.getGraphicsAllocation()); + EXPECT_EQ(1u, directSubmission.semaphoreData->QueueWorkCount); + EXPECT_EQ(2u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(1u, directSubmission.submitCount); + EXPECT_EQ(2u, directSubmission.handleResidencyCount); + + EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed()); + EXPECT_TRUE(directSubmission.ringStart); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(directSubmission.ringCommandStream, 0); + hwParse.findHardwareCommands(); + + bool foundFenceUpdate = false; + for (auto &it : hwParse.pipeControlList) { + PIPE_CONTROL *pipeControl = reinterpret_cast(it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundFenceUpdate = true; + uint32_t addressHigh = pipeControl->getAddressHigh(); + uint32_t address = pipeControl->getAddress(); + uint64_t actualAddress = (static_cast(addressHigh) << 32ull) | address; + EXPECT_EQ(directSubmission.tagAddressSetValue, actualAddress); + uint64_t data = pipeControl->getImmediateData(); + EXPECT_EQ(directSubmission.tagValueSetValue, data); + EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable()); + break; + } + } + EXPECT_TRUE(foundFenceUpdate); +} diff --git a/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp index 7293c6ffe0..e0e9caf18c 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp @@ -37,7 +37,7 @@ HWTEST_F(BlitterDispatcheTest, givenBlitterWhenDispatchingMonitorFenceCmdThenDis uint64_t expectedGpuAddress = 0x5100ull; uint64_t expectedValue = 0x1234ull; - BlitterDispatcher::dispatchMonitorFence(cmdBuffer, expectedGpuAddress, expectedValue, pDevice->getHardwareInfo(), false); + BlitterDispatcher::dispatchMonitorFence(cmdBuffer, expectedGpuAddress, expectedValue, pDevice->getHardwareInfo(), false, false); EXPECT_EQ(expectedSize, cmdBuffer.getUsed()); diff --git a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp index 7ac7d95240..01af2f5571 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp @@ -64,7 +64,7 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingMonitorFenceCmdThenExpectPip uint32_t gpuVaLow = static_cast(gpuVa & 0x0000FFFFFFFFull); uint32_t gpuVaHigh = static_cast(gpuVa >> 32); - RenderDispatcher::dispatchMonitorFence(cmdBuffer, gpuVa, value, hardwareInfo, false); + RenderDispatcher::dispatchMonitorFence(cmdBuffer, gpuVa, value, hardwareInfo, false, false); HardwareParse hwParse; hwParse.parseCommands(cmdBuffer); @@ -118,3 +118,35 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingCacheFlushCmdThenExpectPipeC } EXPECT_TRUE(foundCacheFlush); } + +HWCMDTEST_F(IGFX_GEN12_CORE, RenderDispatcherTest, + givenRenderDispatcherPartitionedWorkloadFlagTrueWhenAddingMonitorFenceCmdThenExpectPipeControlWithProperAddressAndValueAndPartitionParameter) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; + + uint64_t gpuVa = 0xBADA550000ull; + uint64_t value = 0x102030; + uint32_t gpuVaLow = static_cast(gpuVa & 0x0000FFFFFFFFull); + uint32_t gpuVaHigh = static_cast(gpuVa >> 32); + + RenderDispatcher::dispatchMonitorFence(cmdBuffer, gpuVa, value, hardwareInfo, false, true); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(cmdBuffer); + hwParse.findHardwareCommands(); + + bool foundMonitorFence = false; + for (auto &it : hwParse.pipeControlList) { + PIPE_CONTROL *pipeControl = reinterpret_cast(it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundMonitorFence = true; + EXPECT_EQ(gpuVaLow, pipeControl->getAddress()); + EXPECT_EQ(gpuVaHigh, pipeControl->getAddressHigh()); + EXPECT_EQ(value, pipeControl->getImmediateData()); + EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable()); + break; + } + } + EXPECT_TRUE(foundMonitorFence); +} diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index 8121b999a8..815126a735 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" #include "shared/source/os_interface/linux/os_context_linux.h" @@ -12,6 +13,7 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" +#include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/mock_device.h" @@ -20,6 +22,10 @@ #include +namespace CpuIntrinsicsTests { +extern std::atomic pauseCounter; +} + struct DrmDirectSubmissionTest : public DrmMemoryManagerBasic { void SetUp() override { DrmMemoryManagerBasic::SetUp(); @@ -47,6 +53,7 @@ struct DrmDirectSubmissionTest : public DrmMemoryManagerBasic { template struct MockDrmDirectSubmission : public DrmDirectSubmission { using BaseClass = DrmDirectSubmission; + using BaseClass::activeTiles; using BaseClass::allocateResources; using BaseClass::currentTagData; using BaseClass::disableMonitorFence; @@ -58,11 +65,13 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission(device, osContext) { this->disableMonitorFence = false; @@ -281,3 +290,39 @@ HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionNewResourceTlbFlusZeroAnd EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u); } + +HWTEST_F(DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdateThenQueryAllActiveTiles) { + using Dispatcher = RenderDispatcher; + + MockDrmDirectSubmission directSubmission(*device.get(), + *osContext.get()); + + bool ret = directSubmission.allocateResources(); + EXPECT_TRUE(ret); + directSubmission.activeTiles = 2; + + auto pollAddress = directSubmission.tagAddress; + *pollAddress = 10; + pollAddress = ptrOffset(pollAddress, 8); + *pollAddress = 10; + + CpuIntrinsicsTests::pauseCounter = 0; + directSubmission.wait(9); + EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter); +} + +HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) { + using Dispatcher = RenderDispatcher; + + VariableBackup backup(&ImplicitScaling::apiSupport, true); + device->deviceBitfield.set(0b11); + + MockDrmDirectSubmission directSubmission(*device.get(), + *osContext.get()); + + EXPECT_EQ(2u, directSubmission.activeTiles); + EXPECT_TRUE(directSubmission.partitionedMode); + + bool ret = directSubmission.allocateResources(); + EXPECT_TRUE(ret); +}