diff --git a/runtime/command_stream/command_stream_receiver_hw_base.inl b/runtime/command_stream/command_stream_receiver_hw_base.inl index dd3455f151..4e684379b1 100644 --- a/runtime/command_stream/command_stream_receiver_hw_base.inl +++ b/runtime/command_stream/command_stream_receiver_hw_base.inl @@ -670,7 +670,10 @@ inline void CommandStreamReceiverHw::programPreamble(LinearStream &cs template inline void CommandStreamReceiverHw::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads) { if (mediaVfeStateDirty) { - PreambleHelper::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads); + auto commandOffset = PreambleHelper::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads); + if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { + flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr); + } setMediaVFEStateDirty(false); } } diff --git a/runtime/helpers/address_patch.h b/runtime/helpers/address_patch.h index 70c6ed911f..ebd58020b8 100644 --- a/runtime/helpers/address_patch.h +++ b/runtime/helpers/address_patch.h @@ -21,6 +21,7 @@ enum PatchInfoAllocationType { TagAddress, TagValue, GUCStartMessage, + ScratchSpace }; struct PatchInfoData { diff --git a/runtime/helpers/flat_batch_buffer_helper.h b/runtime/helpers/flat_batch_buffer_helper.h index 10256a9247..7bb20b5e4b 100644 --- a/runtime/helpers/flat_batch_buffer_helper.h +++ b/runtime/helpers/flat_batch_buffer_helper.h @@ -32,6 +32,7 @@ class FlatBatchBufferHelper { virtual GraphicsAllocation *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0; virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector &indirectPatchInfo) = 0; virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) = 0; + virtual void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) = 0; static void fixCrossThreadDataInfo(std::vector &data, size_t offsetCrossThreadData, uint64_t gpuAddress); std::vector &getCommandChunkList() { return commandChunkList; } diff --git a/runtime/helpers/flat_batch_buffer_helper_hw.h b/runtime/helpers/flat_batch_buffer_helper_hw.h index f1b2e7d7b7..0d66fce61f 100644 --- a/runtime/helpers/flat_batch_buffer_helper_hw.h +++ b/runtime/helpers/flat_batch_buffer_helper_hw.h @@ -18,6 +18,7 @@ class FlatBatchBufferHelperHw : public FlatBatchBufferHelper { GraphicsAllocation *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override; char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector &indirectPatchInfo) override; void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) override; + void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) override; }; } // namespace NEO diff --git a/runtime/helpers/flat_batch_buffer_helper_hw.inl b/runtime/helpers/flat_batch_buffer_helper_hw.inl index cbb531e379..32deefcebe 100644 --- a/runtime/helpers/flat_batch_buffer_helper_hw.inl +++ b/runtime/helpers/flat_batch_buffer_helper_hw.inl @@ -190,4 +190,13 @@ void FlatBatchBufferHelperHw::removePipeControlData(size_t pipeContro } } +template +void FlatBatchBufferHelperHw::collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) { + if (scratchAddress) { + auto scratchOffset = reinterpret_cast(reinterpret_cast(csr.getCpuBase()) + commandOffset)[0] & 0x3FF; + PatchInfoData patchInfoData(scratchAddress, scratchOffset, PatchInfoAllocationType::ScratchSpace, csr.getGraphicsAllocation()->getGpuAddress(), commandOffset, PatchInfoAllocationType::Default); + patchInfoCollection.push_back(patchInfoData); + } +} + }; // namespace NEO diff --git a/runtime/helpers/preamble.h b/runtime/helpers/preamble.h index 9274f486ce..359a6f6471 100644 --- a/runtime/helpers/preamble.h +++ b/runtime/helpers/preamble.h @@ -32,11 +32,11 @@ struct PreambleHelper { static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr); static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo); - static void programVFEState(LinearStream *pCommandStream, - const HardwareInfo &hwInfo, - int scratchSize, - uint64_t scratchAddress, - uint32_t maxFrontEndThreads); + static uint64_t programVFEState(LinearStream *pCommandStream, + const HardwareInfo &hwInfo, + int scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads); static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config, uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr); static void programKernelDebugging(LinearStream *pCommandStream); diff --git a/runtime/helpers/preamble_bdw_plus.inl b/runtime/helpers/preamble_bdw_plus.inl index 5b73fe759e..e6799ed078 100644 --- a/runtime/helpers/preamble_bdw_plus.inl +++ b/runtime/helpers/preamble_bdw_plus.inl @@ -5,6 +5,7 @@ * */ +#include "runtime/helpers/flat_batch_buffer_helper.h" #include "runtime/helpers/hw_helper.h" #include "runtime/helpers/preamble_base.inl" @@ -25,15 +26,16 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { } template -void PreambleHelper::programVFEState(LinearStream *pCommandStream, - const HardwareInfo &hwInfo, - int scratchSize, - uint64_t scratchAddress, - uint32_t maxFrontEndThreads) { +uint64_t PreambleHelper::programVFEState(LinearStream *pCommandStream, + const HardwareInfo &hwInfo, + int scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads) { using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo); + auto scratchSpaceAddressOffset = static_cast(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET); auto pMediaVfeState = reinterpret_cast(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE))); *pMediaVfeState = GfxFamily::cmdInitMediaVfeState; pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads); @@ -45,6 +47,8 @@ void PreambleHelper::programVFEState(LinearStream *pCommandStream, uint32_t highAddress = static_cast(0xFFFFFFFF & (scratchAddress >> 32)); pMediaVfeState->setScratchSpaceBasePointer(lowAddress); pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress); + + return scratchSpaceAddressOffset; } template diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 5ee5a2995f..9104217b37 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -159,6 +159,10 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic auto pp = ptrOffset(crossThreadData, crossThreadDataOffset); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); patchWithRequiredSize(pp, pointerSize, addressToPatch); + if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { + PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(getCrossThreadData()), crossThreadDataOffset, PatchInfoAllocationType::IndirectObjectHeap, pointerSize); + this->patchInfoDataList.push_back(patchInfoData); + } } if (ssh) { diff --git a/unit_tests/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp b/unit_tests/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp index e49bd37673..e7de7ce86b 100644 --- a/unit_tests/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp @@ -14,6 +14,7 @@ #include "runtime/command_stream/command_stream_receiver.h" #include "runtime/command_stream/linear_stream.h" #include "runtime/command_stream/preemption.h" +#include "runtime/command_stream/scratch_space_controller.h" #include "runtime/event/user_event.h" #include "runtime/helpers/cache_policy.h" #include "runtime/helpers/preamble.h" @@ -253,3 +254,60 @@ HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCsrWhenCollectStateB EXPECT_EQ(gshPatch.sourceAllocation, generalStateBase); EXPECT_EQ(gshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET); } + +HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenScratchSpaceIsProgrammedThenPatchInfoIsCollected) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); + + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment)); + mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); + + bool stateBaseAddressDirty; + bool vfeStateDirty; + mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 10u, 0u, 1u, 0u, stateBaseAddressDirty, vfeStateDirty); + + DispatchFlags flags; + mockCsr->requiredScratchSize = 0x200000; + + mockCsr->programVFEState(commandStream, flags, 10); + ASSERT_EQ(1u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); + EXPECT_EQ(mockCsr->getScratchSpaceController()->getScratchPatchAddress(), mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().at(0).sourceAllocation); +} + +HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionDisabledWhenScratchSpaceIsProgrammedThenPatchInfoIsNotCollected) { + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment)); + mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); + + bool stateBaseAddressDirty; + bool vfeStateDirty; + mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 10u, 0u, 1u, 0u, stateBaseAddressDirty, vfeStateDirty); + + DispatchFlags flags; + mockCsr->requiredScratchSize = 0x200000; + + mockCsr->programVFEState(commandStream, flags, 10); + EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); +} + +HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenMediaVfeStateIsProgrammedWithEmptyScratchThenPatchInfoIsNotCollected) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); + + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment)); + mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); + + DispatchFlags flags; + mockCsr->requiredScratchSize = 0x200000; + + mockCsr->programVFEState(commandStream, flags, 10); + EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); +} diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index 9e84f8154a..b7c168fe2e 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -2697,6 +2697,31 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); } +TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { + DebugManagerStateRestore restore; + DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); + + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockKernelWithInternals kernel(*device); + MockGraphicsAllocation mockAllocation; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; + uint64_t crossThreadData = 0; + EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); + kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); + EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); +} + +TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockKernelWithInternals kernel(*device); + MockGraphicsAllocation mockAllocation; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; + uint64_t crossThreadData = 0; + EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); + kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); + EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); +} + namespace NEO { template diff --git a/unit_tests/mocks/mock_csr.h b/unit_tests/mocks/mock_csr.h index 2447f2db89..ddb544a191 100644 --- a/unit_tests/mocks/mock_csr.h +++ b/unit_tests/mocks/mock_csr.h @@ -157,12 +157,14 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiverHw::flushStamp; using CommandStreamReceiverHw::programL3; using CommandStreamReceiverHw::csrSizeRequestFlags; + using CommandStreamReceiverHw::programVFEState; using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::isPreambleSent; using CommandStreamReceiver::lastSentCoherencyRequest; using CommandStreamReceiver::mediaVfeStateDirty; using CommandStreamReceiver::nTo1SubmissionModelEnabled; + using CommandStreamReceiver::requiredScratchSize; using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskLevel; using CommandStreamReceiver::timestampPacketWriteEnabled; diff --git a/unit_tests/preamble/preamble_tests.cpp b/unit_tests/preamble/preamble_tests.cpp index 5285263f95..6803795ac4 100644 --- a/unit_tests/preamble/preamble_tests.cpp +++ b/unit_tests/preamble/preamble_tests.cpp @@ -5,7 +5,9 @@ * */ +#include "core/unit_tests/helpers/debug_manager_state_restore.h" #include "runtime/command_stream/preemption.h" +#include "runtime/helpers/flat_batch_buffer_helper_hw.h" #include "runtime/helpers/preamble.h" #include "runtime/utilities/stackvec.h" #include "test.h" @@ -187,3 +189,15 @@ HWTEST_F(PreambleTest, givenDefaultPreambleWhenGetThreadsMaxNumberIsCalledThenMa uint32_t expected = hwInfo.gtSystemInfo.EUCount * threadsPerEU; EXPECT_EQ(expected, value); } + +HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenPreambleHelperWhenMediaVfeStateIsProgrammedThenOffsetToCommandIsReturned) { + char buffer[64]; + MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); + LinearStream preambleStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + FlatBatchBufferHelperHw helper(*mockDevice->getExecutionEnvironment()); + uint64_t addressToPatch = 0xC0DEC0DE; + + auto offset = PreambleHelper::programVFEState(&preambleStream, mockDevice->getHardwareInfo(), 1024u, addressToPatch, 10u); + EXPECT_NE(0u, offset); +}