diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 51725cfb93..a53195ed9a 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -20,12 +20,13 @@ struct RingSemaphoreData { uint32_t QueueWorkCount; uint8_t ReservedCacheline[60]; uint32_t tagAllocation; - uint32_t Reserved2Uint32; - uint32_t Reserved3Uint32; - uint32_t Reserved4Uint32; - uint64_t Reserved1Uint64; - uint64_t Reserved2Uint64; + uint8_t ReservedCacheline2[60]; + uint32_t DiagnosticModeCounter; + uint32_t Reserved0Uint32; + uint64_t Reserved0Uint64; + uint8_t ReservedCacheline3[48]; }; +static_assert((64u * 3) == sizeof(RingSemaphoreData), "Invalid size for RingSemaphoreData"); #pragma pack() using DirectSubmissionAllocations = StackVec; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 457106ec0a..5062523165 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -86,7 +86,7 @@ bool DirectSubmissionHw::allocateResources() { memset(semaphorePtr, 0, sizeof(RingSemaphoreData)); semaphoreData->QueueWorkCount = 0; cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); - workloadModeOneStoreAddress = static_cast(&semaphoreData->Reserved4Uint32); + workloadModeOneStoreAddress = static_cast(&semaphoreData->DiagnosticModeCounter); *static_cast(workloadModeOneStoreAddress) = 0u; auto ret = makeResourcesResident(allocations); @@ -130,10 +130,11 @@ bool DirectSubmissionHw::initialize(bool submitOnInit) { size_t startBufferSize = Dispatcher::getSizePreemption() + getSizeSemaphoreSection(); Dispatcher::dispatchPreemption(ringCommandStream); - dispatchSemaphoreSection(currentQueueWorkCount); if (workloadMode == 1) { dispatchDiagnosticModeSection(); + startBufferSize += getDiagnosticModeSection(); } + dispatchSemaphoreSection(currentQueueWorkCount); ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize); performDiagnosticMode(); diff --git a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl index 4b737b91eb..f0057828a3 100644 --- a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl +++ b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl @@ -5206,8 +5206,7 @@ typedef struct tagMI_STORE_DATA_IMM { uint32_t CommandType : BITFIELD_RANGE(29, 31); uint64_t CoreModeEnable : BITFIELD_RANGE(0, 0); uint64_t Reserved_33 : BITFIELD_RANGE(1, 1); - uint64_t Address_Graphicsaddress39_2 : BITFIELD_RANGE(2, 39); - uint64_t Address_Reserved : BITFIELD_RANGE(40, 63); + uint64_t Address : BITFIELD_RANGE(2, 63); uint32_t DataDword0; uint32_t DataDword1; } Common; @@ -5262,25 +5261,15 @@ typedef struct tagMI_STORE_DATA_IMM { inline uint64_t getCoreModeEnable(void) const { return (TheStructure.Common.CoreModeEnable); } - typedef enum tagADDRESS_GRAPHICSADDRESS39_2 { - ADDRESS_GRAPHICSADDRESS39_2_BIT_SHIFT = 0x2, - ADDRESS_GRAPHICSADDRESS39_2_ALIGN_SIZE = 0x4, - } ADDRESS_GRAPHICSADDRESS39_2; + typedef enum tagADDRESS { + ADDRESS_BIT_SHIFT = 0x2, + ADDRESS_ALIGN_SIZE = 0x4, + } ADDRESS; inline void setAddress(const uint64_t value) { - TheStructure.Common.Address_Graphicsaddress39_2 = value >> ADDRESS_GRAPHICSADDRESS39_2_BIT_SHIFT; + TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT; } inline uint64_t getAddress(void) const { - return (TheStructure.Common.Address_Graphicsaddress39_2 << ADDRESS_GRAPHICSADDRESS39_2_BIT_SHIFT); - } - typedef enum tagADDRESS_RESERVED { - ADDRESS_RESERVED_BIT_SHIFT = 0x2, - ADDRESS_RESERVED_ALIGN_SIZE = 0x4, - } ADDRESS_RESERVED; - inline void setAddressReserved(const uint64_t value) { - TheStructure.Common.Address_Reserved = value >> ADDRESS_RESERVED_BIT_SHIFT; - } - inline uint64_t getAddressReserved(void) const { - return (TheStructure.Common.Address_Reserved << ADDRESS_RESERVED_BIT_SHIFT); + return (TheStructure.Common.Address << ADDRESS_BIT_SHIFT); } inline void setDataDword0(const uint32_t value) { TheStructure.Common.DataDword0 = value; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp index 8b9b6e22b3..d6023a54f7 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp @@ -465,7 +465,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(0x40u + 1u, storeData->getDataDword0()); uint64_t expectedGpuVa = directSubmission.semaphoreGpuVa; auto semaphore = static_cast(directSubmission.semaphorePtr); - expectedGpuVa += ptrDiff(&semaphore->Reserved4Uint32, directSubmission.semaphorePtr); + expectedGpuVa += ptrDiff(&semaphore->DiagnosticModeCounter, directSubmission.semaphorePtr); EXPECT_EQ(expectedGpuVa, storeData->getAddress()); } @@ -1023,6 +1023,9 @@ HWTEST_F(DirectSubmissionTest, HWTEST_F(DirectSubmissionTest, givenDirectSubmissionDiagnosticAvailableWhenDiagnosticRegistryUsedThenDoPerformDiagnosticRun) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using COMPARE_OPERATION = typename FamilyType::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + using WAIT_MODE = typename FamilyType::MI_SEMAPHORE_WAIT::WAIT_MODE; using Dispatcher = RenderDispatcher; if (!NEO::directSubmissionDiagnosticAvailable) { @@ -1079,13 +1082,33 @@ HWTEST_F(DirectSubmissionTest, execCount += 1; ASSERT_EQ(execCount, storeDataCmdList.size()); + uint64_t expectedStoreAddress = directSubmission.semaphoreGpuVa; + expectedStoreAddress += ptrDiff(directSubmission.workloadModeOneStoreAddress, directSubmission.semaphorePtr); + uint32_t expectedData = 1u; for (auto &storeCmdData : storeDataCmdList) { MI_STORE_DATA_IMM *storeCmd = static_cast(storeCmdData); auto storeData = storeCmd->getDataDword0(); EXPECT_EQ(expectedData, storeData); expectedData++; + EXPECT_EQ(expectedStoreAddress, storeCmd->getAddress()); } + + uint8_t *cmdBufferPosition = static_cast(directSubmission.ringCommandStream.getCpuBase()) + Dispatcher::getSizePreemption(); + MI_STORE_DATA_IMM *storeDataCmdAtPosition = genCmdCast(cmdBufferPosition); + ASSERT_NE(nullptr, storeDataCmdAtPosition); + EXPECT_EQ(1u, storeDataCmdAtPosition->getDataDword0()); + EXPECT_EQ(expectedStoreAddress, storeDataCmdAtPosition->getAddress()); + + cmdBufferPosition += sizeof(MI_STORE_DATA_IMM); + cmdBufferPosition += directSubmission.getSizeDisablePrefetcher(); + MI_SEMAPHORE_WAIT *semaphoreWaitCmdAtPosition = genCmdCast(cmdBufferPosition); + ASSERT_NE(nullptr, semaphoreWaitCmdAtPosition); + EXPECT_EQ(COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, + semaphoreWaitCmdAtPosition->getCompareOperation()); + EXPECT_EQ(1u, semaphoreWaitCmdAtPosition->getSemaphoreDataDword()); + EXPECT_EQ(directSubmission.semaphoreGpuVa, semaphoreWaitCmdAtPosition->getSemaphoreGraphicsAddress()); + EXPECT_EQ(WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreWaitCmdAtPosition->getWaitMode()); } HWTEST_F(DirectSubmissionTest, diff --git a/shared/test/unit_test/mocks/mock_direct_submission_hw.h b/shared/test/unit_test/mocks/mock_direct_submission_hw.h index 1cf49d7e5e..f69234cbc3 100644 --- a/shared/test/unit_test/mocks/mock_direct_submission_hw.h +++ b/shared/test/unit_test/mocks/mock_direct_submission_hw.h @@ -29,14 +29,18 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::disableCacheFlush; using BaseClass::disableCpuCacheFlush; using BaseClass::disableMonitorFence; + using BaseClass::dispatchDisablePrefetcher; + using BaseClass::dispatchPrefetchMitigation; using BaseClass::dispatchSemaphoreSection; using BaseClass::dispatchStartSection; using BaseClass::dispatchSwitchRingBufferSection; using BaseClass::dispatchWorkloadSection; using BaseClass::getCommandBufferPositionGpuAddress; using BaseClass::getDiagnosticModeSection; + using BaseClass::getSizeDisablePrefetcher; using BaseClass::getSizeDispatch; using BaseClass::getSizeEnd; + using BaseClass::getSizePrefetchMitigation; using BaseClass::getSizeSemaphoreSection; using BaseClass::getSizeStartSection; using BaseClass::getSizeSwitchRingBufferSection;