diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 1d43d3aa86..1717f2684e 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -161,62 +161,63 @@ class CommandStreamReceiver { disableL3Cache = val; } - bool timestampPacketWriteEnabled = false; - - // taskCount - # of tasks submitted - uint32_t taskCount = 0; - // current taskLevel. Used for determining if a PIPE_CONTROL is needed. - std::atomic taskLevel{0}; - - std::atomic latestSentTaskCount{0}; - std::atomic latestFlushedTaskCount{0}; - std::unique_ptr flushStamp; + std::unique_ptr submissionAggregator; + std::unique_ptr flatBatchBufferHelper; + std::unique_ptr experimentalCmdBuffer; + std::unique_ptr internalAllocationStorage; + std::unique_ptr kmdNotifyHelper; - volatile uint32_t *tagAddress = nullptr; - GraphicsAllocation *tagAllocation = nullptr; - - bool isPreambleSent = false; - bool GSBAFor32BitProgrammed = false; - bool mediaVfeStateDirty = true; - bool lastVmeSubslicesConfig = false; - - uint32_t lastSentL3Config = 0; - int8_t lastSentCoherencyRequest = -1; - int8_t lastMediaSamplerConfig = -1; - PreemptionMode lastPreemptionMode = PreemptionMode::Initial; - uint32_t latestSentStatelessMocsConfig = 0; - uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber; + ResidencyContainer residencyAllocations; + ResidencyContainer evictionAllocations; + MutexType ownershipMutex; + ExecutionEnvironment &executionEnvironment; LinearStream commandStream; - bool stallingPipeControlOnNextFlushRequired = false; - uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; - uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + volatile uint32_t *tagAddress = nullptr; + GraphicsAllocation *tagAllocation = nullptr; GraphicsAllocation *scratchAllocation = nullptr; GraphicsAllocation *preemptionCsrAllocation = nullptr; GraphicsAllocation *debugSurface = nullptr; OSInterface *osInterface = nullptr; - std::unique_ptr submissionAggregator; - ResidencyContainer residencyAllocations; - ResidencyContainer evictionAllocations; - - bool nTo1SubmissionModelEnabled = false; - DispatchMode dispatchMode = DispatchMode::ImmediateDispatch; - bool disableL3Cache = false; - uint32_t requiredScratchSize = 0; - uint64_t totalMemoryUsed = 0u; - SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; IndirectHeap *indirectHeap[IndirectHeap::NUM_TYPES]; - std::unique_ptr flatBatchBufferHelper; - std::unique_ptr experimentalCmdBuffer; - MutexType ownershipMutex; - std::unique_ptr kmdNotifyHelper; - ExecutionEnvironment &executionEnvironment; + + // current taskLevel. Used for determining if a PIPE_CONTROL is needed. + std::atomic taskLevel{0}; + std::atomic latestSentTaskCount{0}; + std::atomic latestFlushedTaskCount{0}; + + DispatchMode dispatchMode = DispatchMode::ImmediateDispatch; + SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; + PreemptionMode lastPreemptionMode = PreemptionMode::Initial; + uint64_t totalMemoryUsed = 0u; + uint32_t deviceIndex = 0u; - std::unique_ptr internalAllocationStorage; + // taskCount - # of tasks submitted + uint32_t taskCount = 0; + uint32_t lastSentL3Config = 0; + uint32_t latestSentStatelessMocsConfig = 0; + uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber; + uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; + uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + + uint32_t requiredScratchSize = 0; + + int8_t lastSentCoherencyRequest = -1; + int8_t lastMediaSamplerConfig = -1; + + bool isPreambleSent = false; + bool isStateSipSent = false; + bool GSBAFor32BitProgrammed = false; + bool mediaVfeStateDirty = true; + bool lastVmeSubslicesConfig = false; + bool disableL3Cache = false; + bool stallingPipeControlOnNextFlushRequired = false; + bool timestampPacketWriteEnabled = false; + bool nTo1SubmissionModelEnabled = false; }; typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump, ExecutionEnvironment &executionEnvironment); diff --git a/runtime/command_stream/command_stream_receiver_hw.h b/runtime/command_stream/command_stream_receiver_hw.h index fed3f5bd10..646463a02c 100644 --- a/runtime/command_stream/command_stream_receiver_hw.h +++ b/runtime/command_stream/command_stream_receiver_hw.h @@ -77,6 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config); void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags); void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); + void programStateSip(LinearStream &cmdStream, Device &device); void handleEventsTimestampPacketTags(LinearStream &csr, DispatchFlags &dispatchFlags, Device ¤tDevice); virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags); virtual void initPageTableManagerRegisters(LinearStream &csr){}; diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 9df26a5194..1cec2403e2 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -335,6 +335,8 @@ CompletionStamp CommandStreamReceiverHw::flushTask( device.getGmmHelper()); } + programStateSip(commandStreamCSR, device); + latestSentStatelessMocsConfig = requiredL3Index; if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { @@ -641,6 +643,9 @@ template size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdSizeForPreamble(device); size += getRequiredStateBaseAddressSize(); + if (!this->isStateSipSent) { + size += PreemptionHelper::getRequiredStateSipCmdSize(device); + } size += getRequiredPipeControlSize(); size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); @@ -704,6 +709,14 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPreemption(const return PreemptionHelper::getRequiredCmdStreamSize(dispatchFlags.preemptionMode, this->lastPreemptionMode); } +template +inline void CommandStreamReceiverHw::programStateSip(LinearStream &cmdStream, Device &device) { + if (!this->isStateSipSent) { + PreemptionHelper::programStateSip(cmdStream, device); + this->isStateSipSent = true; + } +} + template inline void CommandStreamReceiverHw::programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config) { typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; diff --git a/runtime/command_stream/preemption.h b/runtime/command_stream/preemption.h index ff9dda42e8..b9a2a288b4 100644 --- a/runtime/command_stream/preemption.h +++ b/runtime/command_stream/preemption.h @@ -28,9 +28,14 @@ class PreemptionHelper { template static size_t getRequiredPreambleSize(const Device &device); + template + static size_t getRequiredStateSipCmdSize(const Device &device); template - static void programPreamble(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); + static void programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); + + template + static void programStateSip(LinearStream &preambleCmdStream, Device &device); template static size_t getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); diff --git a/runtime/command_stream/preemption.inl b/runtime/command_stream/preemption.inl index cd2684cd69..328794e384 100644 --- a/runtime/command_stream/preemption.inl +++ b/runtime/command_stream/preemption.inl @@ -59,19 +59,25 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, co } template -void PreemptionHelper::programPreamble(LinearStream &preambleCmdStream, Device &device, - const GraphicsAllocation *preemptionCsr) { +void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { using GPGPU_CSR_BASE_ADDRESS = typename GfxFamily::GPGPU_CSR_BASE_ADDRESS; - using STATE_SIP = typename GfxFamily::STATE_SIP; - bool sourceLevelDebuggerActive = device.isSourceLevelDebuggerActive(); - if (device.getPreemptionMode() == PreemptionMode::MidThread) { + bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; + if (isMidThreadPreemption) { UNRECOVERABLE_IF(nullptr == preemptionCsr); auto csr = reinterpret_cast(preambleCmdStream.getSpace(sizeof(GPGPU_CSR_BASE_ADDRESS))); csr->init(); csr->setGpgpuCsrBaseAddress(preemptionCsr->getGpuAddressToPatch()); } - if (device.getPreemptionMode() == PreemptionMode::MidThread || sourceLevelDebuggerActive) { +} + +template +void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { + using STATE_SIP = typename GfxFamily::STATE_SIP; + bool sourceLevelDebuggerActive = device.isSourceLevelDebuggerActive(); + bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; + + if (isMidThreadPreemption || sourceLevelDebuggerActive) { auto sip = reinterpret_cast(preambleCmdStream.getSpace(sizeof(STATE_SIP))); sip->init(); auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().pPlatform->eRenderCoreFamily, sourceLevelDebuggerActive); @@ -109,14 +115,20 @@ size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMo template size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { - size_t size = 0; if (device.getPreemptionMode() == PreemptionMode::MidThread) { - size += sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS); + return sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS); } - if (device.getPreemptionMode() == PreemptionMode::MidThread || device.isSourceLevelDebuggerActive()) { + return 0; +} + +template +size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device) { + size_t size = 0; + bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; + + if (isMidThreadPreemption || device.isSourceLevelDebuggerActive()) { size += sizeof(typename GfxFamily::STATE_SIP); } - return size; } diff --git a/runtime/gen10/preamble_gen10.cpp b/runtime/gen10/preamble_gen10.cpp index a848cada4a..a7448f99b0 100644 --- a/runtime/gen10/preamble_gen10.cpp +++ b/runtime/gen10/preamble_gen10.cpp @@ -49,13 +49,6 @@ size_t PreambleHelper::getThreadArbitrationCommandsSize() { return sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); } -template <> -size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { - size_t size = PreemptionHelper::getRequiredPreambleSize(device); - size += getKernelDebuggingCommandsSize(device.isSourceLevelDebuggerActive()); - return size; -} - template <> uint32_t PreambleHelper::getUrbEntryAllocationSize() { return 1024; diff --git a/runtime/gen10/preemption_gen10.cpp b/runtime/gen10/preemption_gen10.cpp index 2afe9ae892..c670065d24 100644 --- a/runtime/gen10/preemption_gen10.cpp +++ b/runtime/gen10/preemption_gen10.cpp @@ -42,8 +42,11 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pComman template void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr, Device &device); -template void PreemptionHelper::programPreamble(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); + template size_t PreemptionHelper::getRequiredPreambleSize(const Device &device); +template void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); +template void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device); +template size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device); template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); template <> diff --git a/runtime/gen8/hw_cmds_generated_patched.h b/runtime/gen8/hw_cmds_generated_patched.h index c52eb5f4b5..71c9070051 100644 --- a/runtime/gen8/hw_cmds_generated_patched.h +++ b/runtime/gen8/hw_cmds_generated_patched.h @@ -487,4 +487,129 @@ typedef struct tagMI_STORE_DATA_IMM { } } MI_STORE_DATA_IMM; STATIC_ASSERT(20 == sizeof(MI_STORE_DATA_IMM)); + +typedef struct tagSTATE_SIP { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t Reserved_32 : BITFIELD_RANGE(0, 3); + uint64_t SystemInstructionPointer : BITFIELD_RANGE(4, 63); + } Common; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x1, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_STATE_SIP = 0x2, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagPATCH_CONSTANTS { + SYSTEMINSTRUCTIONPOINTER_BYTEOFFSET = 0x4, + SYSTEMINSTRUCTIONPOINTER_INDEX = 0x1, + } PATCH_CONSTANTS; + void init() { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_STATE_SIP; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + } + static tagSTATE_SIP sInit() { + STATE_SIP state; + state.init(); + return state; + } + inline uint32_t &getRawData(uint32_t const index) { + DEBUG_BREAK_IF(index >= 3); + return TheStructure.RawData[index]; + } + typedef enum tagSYSTEMINSTRUCTIONPOINTER { + SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT = 0x4, + SYSTEMINSTRUCTIONPOINTER_ALIGN_SIZE = 0x10, + } SYSTEMINSTRUCTIONPOINTER; + inline uint64_t getSystemInstructionPointer() const { + return (uint64_t)TheStructure.Common.SystemInstructionPointer << SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT; + } + inline void setSystemInstructionPointer(uint64_t value) { + TheStructure.Common.SystemInstructionPointer = value >> SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT; + } +} STATE_SIP; +STATIC_ASSERT(12 == sizeof(STATE_SIP)); +typedef struct tagGPGPU_CSR_BASE_ADDRESS { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t Reserved_32 : BITFIELD_RANGE(0, 11); + uint64_t GpgpuCsrBaseAddress : BITFIELD_RANGE(12, 63); + } Common; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_UNNAMED_1 = 0x1, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_GPGPU_CSR_BASE_ADDRESS = 0x4, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagPATCH_CONSTANTS { + GPGPUCSRBASEADDRESS_BYTEOFFSET = 0x4, + GPGPUCSRBASEADDRESS_INDEX = 0x1, + } PATCH_CONSTANTS; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_UNNAMED_1; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_GPGPU_CSR_BASE_ADDRESS; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + } + static tagGPGPU_CSR_BASE_ADDRESS sInit(void) { + GPGPU_CSR_BASE_ADDRESS state; + state.init(); + return state; + } + inline uint32_t &getRawData(uint32_t const index) { + DEBUG_BREAK_IF(index >= 3); + return TheStructure.RawData[index]; + } + typedef enum tagGPGPUCSRBASEADDRESS { + GPGPUCSRBASEADDRESS_BIT_SHIFT = 0xC, + GPGPUCSRBASEADDRESS_ALIGN_SIZE = 0x1000, + } GPGPUCSRBASEADDRESS; + inline uint64_t getGpgpuCsrBaseAddress() const { + return (uint64_t)TheStructure.Common.GpgpuCsrBaseAddress << GPGPUCSRBASEADDRESS_BIT_SHIFT; + } + inline void setGpgpuCsrBaseAddress(uint64_t value) { + TheStructure.Common.GpgpuCsrBaseAddress = value >> GPGPUCSRBASEADDRESS_BIT_SHIFT; + } +} GPGPU_CSR_BASE_ADDRESS; +STATIC_ASSERT(12 == sizeof(GPGPU_CSR_BASE_ADDRESS)); #pragma pack() diff --git a/runtime/gen8/preamble_gen8.cpp b/runtime/gen8/preamble_gen8.cpp index 6775721dea..607915de2c 100644 --- a/runtime/gen8/preamble_gen8.cpp +++ b/runtime/gen8/preamble_gen8.cpp @@ -40,5 +40,10 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStre pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } +template <> +size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { + return getKernelDebuggingCommandsSize(device.isSourceLevelDebuggerActive()); +} + template struct PreambleHelper; } // namespace OCLRT diff --git a/runtime/gen8/preemption_gen8.cpp b/runtime/gen8/preemption_gen8.cpp index 8c1520d61c..592ae62c0c 100644 --- a/runtime/gen8/preemption_gen8.cpp +++ b/runtime/gen8/preemption_gen8.cpp @@ -47,8 +47,16 @@ size_t PreemptionHelper::getRequiredPreambleSize(const Device &device } template <> -void PreemptionHelper::programPreamble(LinearStream &preambleCmdStream, Device &device, - const GraphicsAllocation *preemptionCsr) { +size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device) { + return 0; +} + +template <> +void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { +} + +template <> +void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { } template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); diff --git a/runtime/gen9/preamble_gen9.cpp b/runtime/gen9/preamble_gen9.cpp index 2c47df7f7d..541e4efcf8 100644 --- a/runtime/gen9/preamble_gen9.cpp +++ b/runtime/gen9/preamble_gen9.cpp @@ -76,12 +76,5 @@ size_t PreambleHelper::getThreadArbitrationCommandsSize() { return sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); } -template <> -size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { - size_t totalSize = PreemptionHelper::getRequiredPreambleSize(device); - totalSize += getKernelDebuggingCommandsSize(device.isSourceLevelDebuggerActive()); - return totalSize; -} - template struct PreambleHelper; } // namespace OCLRT diff --git a/runtime/gen9/preemption_gen9.cpp b/runtime/gen9/preemption_gen9.cpp index 3e9277d267..5c754a6726 100644 --- a/runtime/gen9/preemption_gen9.cpp +++ b/runtime/gen9/preemption_gen9.cpp @@ -32,8 +32,11 @@ template void PreemptionHelper::programCmdStream(LinearStream &cmdStr PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr, Device &device); -template void PreemptionHelper::programPreamble(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); + template size_t PreemptionHelper::getRequiredPreambleSize(const Device &device); +template void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); +template void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device); +template size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device); template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); template size_t PreemptionHelper::getPreemptionWaCsSize(const Device &device); template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); diff --git a/runtime/helpers/preamble.inl b/runtime/helpers/preamble.inl index 36e1ee1073..0ae5184fab 100644 --- a/runtime/helpers/preamble.inl +++ b/runtime/helpers/preamble.inl @@ -37,7 +37,8 @@ void PreambleHelper::programGenSpecificPreambleWorkArounds(LinearStre template size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { - size_t totalSize = getKernelDebuggingCommandsSize(device.isSourceLevelDebuggerActive()); + size_t totalSize = PreemptionHelper::getRequiredPreambleSize(device); + totalSize += getKernelDebuggingCommandsSize(device.isSourceLevelDebuggerActive()); return totalSize; } @@ -83,7 +84,7 @@ void PreambleHelper::programPreamble(LinearStream *pCommandStream, De template void PreambleHelper::programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr) { - PreemptionHelper::programPreamble(*pCommandStream, device, preemptionCsr); + PreemptionHelper::programCsrBaseAddress(*pCommandStream, device, preemptionCsr); } template diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index a2dd700f46..329bc51dd5 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -50,6 +50,74 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenPreambleSentAndTh EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice)); } +HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenNotSentStateSipWhenFirstTaskIsFlushedThenStateSipCmdIsAddedAndIsStateSipSentSetToTrue) { + using STATE_SIP = typename FamilyType::STATE_SIP; + + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { + mockDevice->setPreemptionMode(PreemptionMode::MidThread); + + auto &csr = mockDevice->getUltCommandStreamReceiver(); + csr.isPreambleSent = true; + + CommandQueueHw commandQueue(nullptr, mockDevice.get(), 0); + auto &commandStream = commandQueue.getCS(4096u); + + DispatchFlags dispatchFlags; + dispatchFlags.preemptionMode = PreemptionMode::MidThread; + + void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); + + std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); + std::unique_ptr heap(new IndirectHeap(allocation.get())); + + csr.flushTask(commandStream, + 0, + *heap.get(), + *heap.get(), + *heap.get(), + 0, + dispatchFlags, + *mockDevice); + + EXPECT_TRUE(csr.isStateSipSent); + + HardwareParse hwParser; + hwParser.parseCommands(csr.getCS(0)); + + auto stateSipItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_NE(hwParser.cmdList.end(), stateSipItor); + + alignedFree(buffer); + } +} + +HWTEST_F(UltCommandStreamReceiverTest, givenCsrWhenProgramStateSipIsCalledThenIsStateSipCalledIsSetToTrue) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + auto requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); + StackVec buffer(requiredSize); + LinearStream cmdStream(buffer.begin(), buffer.size()); + + commandStreamReceiver.programStateSip(cmdStream, *pDevice); + EXPECT_TRUE(commandStreamReceiver.isStateSipSent); +} + +HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsNotIncluded) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + DispatchFlags dispatchFlags; + + commandStreamReceiver.isStateSipSent = false; + auto sizeWithStateSipIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); + + commandStreamReceiver.isStateSipSent = true; + auto sizeWhenSipIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); + + auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); + EXPECT_EQ(sizeForStateSip, sizeWithStateSipIsNotSent - sizeWhenSipIsSent); +} + HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingCmdSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; diff --git a/unit_tests/gen10/preamble_tests_gen10.cpp b/unit_tests/gen10/preamble_tests_gen10.cpp index 2400047f4b..9dcc9bafad 100644 --- a/unit_tests/gen10/preamble_tests_gen10.cpp +++ b/unit_tests/gen10/preamble_tests_gen10.cpp @@ -16,8 +16,8 @@ typedef CNLFamily GfxFamily; using PreambleTestGen10 = ::testing::Test; -GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsed) { - SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); +GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsed) { + SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsedTest(); } GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturned) { @@ -32,8 +32,8 @@ GEN10TEST_F(PreambleTestGen10, givenPreemptionDisabledAndDebuggingActiveWhenPrea SourceLevelDebuggerPreambleTest::givenPreemptionDisabledAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); } -GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsed) { - SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); +GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsed) { + SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsedTest(); } GEN10TEST_F(PreambleTestGen10, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturned) { @@ -45,17 +45,5 @@ GEN10TEST_F(PreambleTestGen10, givenDisabledPreemptionAndDisabledDebuggingWhenPr } GEN10TEST_F(PreambleTestGen10, givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAndStateSipAreInlcuded) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); - auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); - - mockDevice->setSourceLevelDebuggerActive(false); - size_t withoutDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - mockDevice->setSourceLevelDebuggerActive(true); - size_t withDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - EXPECT_LT(withoutDebugging, withDebugging); - - size_t diff = withDebugging - withoutDebugging; - size_t sizeExpected = sizeof(typename FamilyType::STATE_SIP) + 2 * sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM); - EXPECT_EQ(sizeExpected, diff); + SourceLevelDebuggerPreambleTest::givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreInlcudedTest(); } diff --git a/unit_tests/gen10/test_preamble_gen10.cpp b/unit_tests/gen10/test_preamble_gen10.cpp index f2b2d7b2a6..cdfc4bc89e 100644 --- a/unit_tests/gen10/test_preamble_gen10.cpp +++ b/unit_tests/gen10/test_preamble_gen10.cpp @@ -145,10 +145,7 @@ GEN10TEST_F(PreemptionWatermarkGen10, givenPreambleThenPreambleWorkAroundsIsNotP parseCommands(linearStream); auto cmd = findMmioCmd(cmdList.begin(), cmdList.end(), FfSliceCsChknReg2::address); - ASSERT_EQ(nullptr, cmd); - - size_t expectedSize = PreemptionHelper::getRequiredPreambleSize(MockDevice(*platformDevices[0])); - EXPECT_EQ(expectedSize, PreambleHelper::getAdditionalCommandsSize(MockDevice(*platformDevices[0]))); + EXPECT_EQ(nullptr, cmd); } typedef PreambleFixture ThreadArbitrationGen10; @@ -181,38 +178,3 @@ GEN10TEST_F(ThreadArbitrationGen10, givenPreambleWhenItIsProgrammedThenThreadArb GEN10TEST_F(ThreadArbitrationGen10, defaultArbitrationPolicy) { EXPECT_EQ(ThreadArbitrationPolicy::RoundRobinAfterDependency, PreambleHelper::getDefaultThreadArbitrationPolicy()); } - -using PreambleTestGen10 = ::testing::Test; - -GEN10TEST_F(PreambleTestGen10, givenProgrammingPreambleWhenPreemptionIsTakenIntoAccountThenCSRBaseAddressIsEqualCSRGpuAddress) { - using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; - auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); - - mockDevice->setPreemptionMode(PreemptionMode::MidThread); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - std::array preambleBuffer{}; - LinearStream preambleStream(&preambleBuffer, preambleBuffer.size()); - StackVec preemptionBuffer; - preemptionBuffer.resize(cmdSizePreemptionMidThread); - LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); - - uintptr_t csrGpuAddr = 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface(reinterpret_cast(csrGpuAddr), 1024); - - PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::RoundRobin, &csrSurface); - - PreemptionHelper::programPreamble(preemptionStream, *mockDevice, &csrSurface); - - HardwareParse hwParserFullPreamble; - hwParserFullPreamble.parseCommands(preambleStream, 0); - auto cmd = hwParserFullPreamble.getCommand(); - EXPECT_NE(nullptr, cmd); - EXPECT_EQ(static_cast(csrGpuAddr), cmd->getGpgpuCsrBaseAddress()); - - HardwareParse hwParserOnlyPreemption; - hwParserOnlyPreemption.parseCommands(preemptionStream, 0); - cmd = hwParserOnlyPreemption.getCommand(); - EXPECT_NE(nullptr, cmd); - EXPECT_EQ(static_cast(csrGpuAddr), cmd->getGpgpuCsrBaseAddress()); -} diff --git a/unit_tests/gen10/test_preemption_gen10.cpp b/unit_tests/gen10/test_preemption_gen10.cpp index 923588f87b..e46cf496b4 100644 --- a/unit_tests/gen10/test_preemption_gen10.cpp +++ b/unit_tests/gen10/test_preemption_gen10.cpp @@ -45,44 +45,35 @@ PreemptionTestHwDetails GetPreemptionTestHwDetails() { return ret; } -GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsNotAvailableThenDoesNotProgramPreamble) { +GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsNotAvailableThenDoesNotProgramStateSip) { device->setPreemptionMode(PreemptionMode::ThreadGroup); - size_t requiredSize = PreemptionHelper::getRequiredPreambleSize(*device); + size_t requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); EXPECT_EQ(0U, requiredSize); LinearStream cmdStream{nullptr, 0}; - PreemptionHelper::programPreamble(cmdStream, *device, nullptr); + PreemptionHelper::programStateSip(cmdStream, *device); EXPECT_EQ(0U, cmdStream.getUsed()); } -GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsAvailableThenProgramsPreamble) { - using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; +GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsAvailableThenStateSipIsProgrammed) { using STATE_SIP = typename FamilyType::STATE_SIP; device->setPreemptionMode(PreemptionMode::MidThread); executionEnvironment->DisableMidThreadPreemption = 0; - size_t minCsrSize = device->getHardwareInfo().pSysInfo->CsrSizeInMb * MemoryConstants::megaByte; - uint64_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface((void *)minCsrAlignment, minCsrSize); + size_t requiredCmdStreamSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); + size_t expectedPreambleSize = sizeof(STATE_SIP); + EXPECT_EQ(expectedPreambleSize, requiredCmdStreamSize); - // verify preamble programming - size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*device); - size_t expectedPreambleSize = sizeof(GPGPU_CSR_BASE_ADDRESS) + sizeof(STATE_SIP); - EXPECT_EQ(expectedPreambleSize, requiredPreambleSize); + StackVec streamStorage(requiredCmdStreamSize); + ASSERT_LE(requiredCmdStreamSize, streamStorage.size()); - StackVec preambleStorage(requiredPreambleSize); - ASSERT_LE(requiredPreambleSize, preambleStorage.size()); - LinearStream preambleCmdStream{preambleStorage.begin(), preambleStorage.size()}; - PreemptionHelper::programPreamble(preambleCmdStream, *device, &csrSurface); + LinearStream cmdStream{streamStorage.begin(), streamStorage.size()}; + PreemptionHelper::programStateSip(cmdStream, *device); HardwareParse hwParsePreamble; - hwParsePreamble.parseCommands(preambleCmdStream); - - auto csrBaseAddressCmd = hwParsePreamble.getCommand(); - ASSERT_NE(nullptr, csrBaseAddressCmd); - EXPECT_EQ(csrSurface.getGpuAddressToPatch(), csrBaseAddressCmd->getGpgpuCsrBaseAddress()); + hwParsePreamble.parseCommands(cmdStream); auto stateSipCmd = hwParsePreamble.getCommand(); ASSERT_NE(nullptr, stateSipCmd); @@ -294,3 +285,24 @@ GEN10TEST_F(Gen10PreemptionTests, givenInterfaceDescriptorDataWhenNoMidThreadPre PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::ThreadGroup); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); } + +GEN10TEST_F(Gen10PreemptionTests, givenMidThreadPreemptionModeWhenStateSipIsProgrammedThenSipEqualsSipAllocationGpuAddressToPatch) { + using STATE_SIP = typename FamilyType::STATE_SIP; + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + mockDevice->setPreemptionMode(PreemptionMode::MidThread); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + + StackVec preemptionBuffer; + preemptionBuffer.resize(cmdSizePreemptionMidThread); + LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); + + PreemptionHelper::programStateSip(preemptionStream, *mockDevice); + + HardwareParse hwParserOnlyPreemption; + hwParserOnlyPreemption.parseCommands(preemptionStream, 0); + auto cmd = hwParserOnlyPreemption.getCommand(); + + EXPECT_NE(nullptr, cmd); + auto sipType = SipKernel::getSipKernelType(mockDevice->getHardwareInfo().pPlatform->eRenderCoreFamily, mockDevice->isSourceLevelDebuggerActive()); + EXPECT_EQ(mockDevice->getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, *mockDevice).getSipAllocation()->getGpuAddressToPatch(), cmd->getSystemInstructionPointer()); +} diff --git a/unit_tests/gen8/cmd_parse_gen8.cpp b/unit_tests/gen8/cmd_parse_gen8.cpp index f94dbdbed3..8569fe7f7c 100644 --- a/unit_tests/gen8/cmd_parse_gen8.cpp +++ b/unit_tests/gen8/cmd_parse_gen8.cpp @@ -12,6 +12,7 @@ using GenGfxFamily = OCLRT::BDWFamily; #include "unit_tests/gen_common/cmd_parse_base.inl" #include "unit_tests/gen_common/cmd_parse_base_mi_arb.inl" #include "unit_tests/gen_common/cmd_parse_gpgpu_walker.inl" +#include "unit_tests/gen_common/cmd_parse_sip.inl" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/hw_parse.inl" diff --git a/unit_tests/gen8/test_preemption_gen8.cpp b/unit_tests/gen8/test_preemption_gen8.cpp index ed7dc925f1..91d8add075 100644 --- a/unit_tests/gen8/test_preemption_gen8.cpp +++ b/unit_tests/gen8/test_preemption_gen8.cpp @@ -35,12 +35,12 @@ GEN8TEST_F(Gen8PreemptionTests, allowThreadGroupPreemptionReturnsTrue) { EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(kernel.get(), waTable)); } -GEN8TEST_F(Gen8PreemptionTests, doesNotProgramPreamble) { - size_t requiredSize = PreemptionHelper::getRequiredPreambleSize(*device); +GEN8TEST_F(Gen8PreemptionTests, whenProgramStateSipIsCalledThenNoCmdsAreProgrammed) { + size_t requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); EXPECT_EQ(0U, requiredSize); LinearStream cmdStream{nullptr, 0}; - PreemptionHelper::programPreamble(cmdStream, *device, nullptr); + PreemptionHelper::programStateSip(cmdStream, *device); EXPECT_EQ(0U, cmdStream.getUsed()); } diff --git a/unit_tests/gen9/command_stream_receiver_hw_tests_gen9.cpp b/unit_tests/gen9/command_stream_receiver_hw_tests_gen9.cpp index 5c3d8f21ae..419543f61c 100644 --- a/unit_tests/gen9/command_stream_receiver_hw_tests_gen9.cpp +++ b/unit_tests/gen9/command_stream_receiver_hw_tests_gen9.cpp @@ -30,71 +30,27 @@ using namespace OCLRT; using CommandStreamReceiverHwTestGen9 = CommandStreamReceiverHwTest; -GEN9TEST_F(UltCommandStreamReceiverTest, givenNotSentPreambleAndMidThreadPreemptionWhenPreambleIsProgrammedThenCorrectSipKernelGpuAddressIsProgrammed) { +GEN9TEST_F(UltCommandStreamReceiverTest, whenPreambleIsProgrammedThenStateSipCmdIsNotPresentInPreambleCmdStream) { using STATE_SIP = typename FamilyType::STATE_SIP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; - size_t minCsrSize = pDevice->getHardwareInfo().pSysInfo->CsrSizeInMb * MemoryConstants::megaByte; - uint64_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface((void *)minCsrAlignment, minCsrSize); - commandStreamReceiver.setPreemptionCsrAllocation(&csrSurface); - - pDevice->setPreemptionMode(PreemptionMode::MidThread); - uint32_t newL3Config; - DispatchFlags dispatchFlags; - - auto cmdSizePreambleMidThread = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); - StackVec preemptionBuffer; - preemptionBuffer.resize(cmdSizePreambleMidThread); - LinearStream preambleStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); - auto sipAllocation = pDevice->getExecutionEnvironment()->getBuiltIns()->getSipKernel(SipKernelType::Csr, *pDevice).getSipAllocation(); - commandStreamReceiver.programPreamble(preambleStream, *pDevice, dispatchFlags, newL3Config); - - this->parseCommands(preambleStream); - auto itorStateSip = find(this->cmdList.begin(), this->cmdList.end()); - ASSERT_NE(this->cmdList.end(), itorStateSip); - - STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip; - auto sipAddress = stateSipCmd->getSystemInstructionPointer(); - EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); -} - -GEN9TEST_F(UltCommandStreamReceiverTest, givenNotSentPreambleAndKernelDebuggingActiveWhenPreambleIsProgrammedThenCorrectSipKernelGpuAddressIsProgrammed) { - using STATE_SIP = typename FamilyType::STATE_SIP; - auto &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns(); - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.isPreambleSent = false; - size_t minCsrSize = pDevice->getHardwareInfo().pSysInfo->CsrSizeInMb * MemoryConstants::megaByte; - uint64_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface((void *)minCsrAlignment, minCsrSize); - commandStreamReceiver.setPreemptionCsrAllocation(&csrSurface); - pDevice->setPreemptionMode(PreemptionMode::Disabled); pDevice->setSourceLevelDebuggerActive(true); uint32_t newL3Config; DispatchFlags dispatchFlags; - auto cmdSizePreambleMidThread = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); - StackVec preemptionBuffer; - preemptionBuffer.resize(cmdSizePreambleMidThread); - LinearStream preambleStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); - auto dbgLocalSipAllocation = builtIns.getSipKernel(SipKernelType::DbgCsrLocal, *pDevice).getSipAllocation(); - auto sipAllocation = builtIns.getSipKernel(SipKernelType::Csr, *pDevice).getSipAllocation(); + auto cmdSizePreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); + StackVec preambleBuffer; + preambleBuffer.resize(cmdSizePreamble); - ASSERT_NE(builtIns.getSipKernel(SipKernelType::DbgCsrLocal, *pDevice).getType(), builtIns.getSipKernel(SipKernelType::Csr, *pDevice).getType()); - ASSERT_NE(dbgLocalSipAllocation, nullptr); - ASSERT_NE(sipAllocation, nullptr); + LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); commandStreamReceiver.programPreamble(preambleStream, *pDevice, dispatchFlags, newL3Config); this->parseCommands(preambleStream); auto itorStateSip = find(this->cmdList.begin(), this->cmdList.end()); - ASSERT_NE(this->cmdList.end(), itorStateSip); - - STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip; - auto sipAddress = stateSipCmd->getSystemInstructionPointer(); - EXPECT_EQ(dbgLocalSipAllocation->getGpuAddressToPatch(), sipAddress); + EXPECT_EQ(this->cmdList.end(), itorStateSip); } GEN9TEST_F(CommandStreamReceiverHwTestGen9, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { diff --git a/unit_tests/gen9/preamble_tests_gen9.cpp b/unit_tests/gen9/preamble_tests_gen9.cpp index 855105d9f6..150c3e4751 100644 --- a/unit_tests/gen9/preamble_tests_gen9.cpp +++ b/unit_tests/gen9/preamble_tests_gen9.cpp @@ -16,8 +16,8 @@ typedef SKLFamily GfxFamily; using PreambleTestGen9 = ::testing::Test; -GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsed) { - SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); +GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsed) { + SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsedTest(); } GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturned) { @@ -32,8 +32,8 @@ GEN9TEST_F(PreambleTestGen9, givenPreemptionDisabledAndDebuggingActiveWhenPreamb SourceLevelDebuggerPreambleTest::givenPreemptionDisabledAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); } -GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsed) { - SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); +GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsed) { + SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsedTest(); } GEN9TEST_F(PreambleTestGen9, givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturned) { @@ -44,52 +44,6 @@ GEN9TEST_F(PreambleTestGen9, givenDisabledPreemptionAndDisabledDebuggingWhenPrea SourceLevelDebuggerPreambleTest::givenDisabledPreemptionAndDisabledDebuggingWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); } -GEN9TEST_F(PreambleTestGen9, givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAndStateSipAreInlcuded) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); - auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); - - mockDevice->setSourceLevelDebuggerActive(false); - size_t withoutDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - mockDevice->setSourceLevelDebuggerActive(true); - size_t withDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - EXPECT_LT(withoutDebugging, withDebugging); - - size_t diff = withDebugging - withoutDebugging; - size_t sizeExpected = sizeof(typename FamilyType::STATE_SIP) + 2 * sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM); - EXPECT_EQ(sizeExpected, diff); -} - -GEN9TEST_F(PreambleTestGen9, givenProgrammingPreambleWhenPreemptionIsTakenIntoAccountThenCSRBaseAddressIsEqualCSRGpuAddress) { - using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; - auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); - - mockDevice->setPreemptionMode(PreemptionMode::MidThread); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - std::array preambleBuffer{}; - LinearStream preambleStream(&preambleBuffer, preambleBuffer.size()); - - StackVec preemptionBuffer; - preemptionBuffer.resize(cmdSizePreemptionMidThread); - LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); - - uintptr_t csrGpuAddr = 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface(reinterpret_cast(csrGpuAddr), 1024); - - PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::RoundRobin, &csrSurface); - - PreemptionHelper::programPreamble(preemptionStream, *mockDevice, &csrSurface); - - HardwareParse hwParserFullPreamble; - hwParserFullPreamble.parseCommands(preambleStream, 0); - auto cmd = hwParserFullPreamble.getCommand(); - EXPECT_NE(nullptr, cmd); - EXPECT_EQ(static_cast(csrGpuAddr), cmd->getGpgpuCsrBaseAddress()); - - HardwareParse hwParserOnlyPreemption; - hwParserOnlyPreemption.parseCommands(preemptionStream, 0); - cmd = hwParserOnlyPreemption.getCommand(); - EXPECT_NE(nullptr, cmd); - EXPECT_EQ(static_cast(csrGpuAddr), cmd->getGpgpuCsrBaseAddress()); +GEN9TEST_F(PreambleTestGen9, givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreInlcuded) { + SourceLevelDebuggerPreambleTest::givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreInlcudedTest(); } diff --git a/unit_tests/gen9/test_preemption_gen9.cpp b/unit_tests/gen9/test_preemption_gen9.cpp index 3f5ab3e7ee..5f146f49b4 100644 --- a/unit_tests/gen9/test_preemption_gen9.cpp +++ b/unit_tests/gen9/test_preemption_gen9.cpp @@ -49,16 +49,15 @@ PreemptionTestHwDetails GetPreemptionTestHwDetails() { GEN9TEST_F(Gen9PreemptionTests, whenMidThreadPreemptionIsNotAvailableThenDoesNotProgramPreamble) { device->setPreemptionMode(PreemptionMode::ThreadGroup); - size_t requiredSize = PreemptionHelper::getRequiredPreambleSize(*device); + size_t requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); EXPECT_EQ(0U, requiredSize); LinearStream cmdStream{nullptr, 0}; - PreemptionHelper::programPreamble(cmdStream, *device, nullptr); + PreemptionHelper::programStateSip(cmdStream, *device); EXPECT_EQ(0U, cmdStream.getUsed()); } -GEN9TEST_F(Gen9PreemptionTests, whenMidThreadPreemptionIsAvailableThenProgramsPreamble) { - using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; +GEN9TEST_F(Gen9PreemptionTests, whenMidThreadPreemptionIsAvailableThenStateSipIsProgrammed) { using STATE_SIP = typename FamilyType::STATE_SIP; device->setPreemptionMode(PreemptionMode::MidThread); @@ -68,22 +67,18 @@ GEN9TEST_F(Gen9PreemptionTests, whenMidThreadPreemptionIsAvailableThenProgramsPr uint64_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; MockGraphicsAllocation csrSurface((void *)minCsrAlignment, minCsrSize); - // verify preamble programming - size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*device); - size_t expectedPreambleSize = sizeof(GPGPU_CSR_BASE_ADDRESS) + sizeof(STATE_SIP); - EXPECT_EQ(expectedPreambleSize, requiredPreambleSize); + size_t requiredCmdStreamSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); + size_t expectedPreambleSize = sizeof(STATE_SIP); + EXPECT_EQ(expectedPreambleSize, requiredCmdStreamSize); - StackVec preambleStorage(requiredPreambleSize); - ASSERT_LE(requiredPreambleSize, preambleStorage.size()); - LinearStream preambleCmdStream{preambleStorage.begin(), preambleStorage.size()}; - PreemptionHelper::programPreamble(preambleCmdStream, *device, &csrSurface); + StackVec streamStorage(requiredCmdStreamSize); + ASSERT_LE(requiredCmdStreamSize, streamStorage.size()); + + LinearStream cmdStream{streamStorage.begin(), streamStorage.size()}; + PreemptionHelper::programStateSip(cmdStream, *device); HardwareParse hwParsePreamble; - hwParsePreamble.parseCommands(preambleCmdStream); - - auto csrBaseAddressCmd = hwParsePreamble.getCommand(); - ASSERT_NE(nullptr, csrBaseAddressCmd); - EXPECT_EQ(csrSurface.getGpuAddressToPatch(), csrBaseAddressCmd->getGpgpuCsrBaseAddress()); + hwParsePreamble.parseCommands(cmdStream); auto stateSipCmd = hwParsePreamble.getCommand(); ASSERT_NE(nullptr, stateSipCmd); @@ -527,3 +522,25 @@ GEN9TEST_F(Gen9PreemptionTests, givenInterfaceDescriptorDataWhenAnyPreemptionMod ret = memcmp(&idd, &iddArg, sizeof(INTERFACE_DESCRIPTOR_DATA)); EXPECT_EQ(0, ret); } + +GEN9TEST_F(Gen9PreemptionTests, givenMidThreadPreemptionModeWhenStateSipIsProgrammedThenSipEqualsSipAllocationGpuAddressToPatch) { + using STATE_SIP = typename FamilyType::STATE_SIP; + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + mockDevice->setPreemptionMode(PreemptionMode::MidThread); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + + StackVec preemptionBuffer; + preemptionBuffer.resize(cmdSizePreemptionMidThread); + LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); + + PreemptionHelper::programStateSip(preemptionStream, *mockDevice); + + HardwareParse hwParserOnlyPreemption; + hwParserOnlyPreemption.parseCommands(preemptionStream, 0); + auto cmd = hwParserOnlyPreemption.getCommand(); + EXPECT_NE(nullptr, cmd); + + auto sipType = SipKernel::getSipKernelType(mockDevice->getHardwareInfo().pPlatform->eRenderCoreFamily, mockDevice->isSourceLevelDebuggerActive()); + EXPECT_EQ(mockDevice->getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, *mockDevice).getSipAllocation()->getGpuAddressToPatch(), cmd->getSystemInstructionPointer()); +} diff --git a/unit_tests/libult/mock_gfx_family.cpp b/unit_tests/libult/mock_gfx_family.cpp index 4dbb8ffa12..a4f9f95fa6 100644 --- a/unit_tests/libult/mock_gfx_family.cpp +++ b/unit_tests/libult/mock_gfx_family.cpp @@ -105,8 +105,12 @@ size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { } template <> -void PreemptionHelper::programPreamble(LinearStream &preambleCmdStream, Device &device, - const GraphicsAllocation *preemptionCsr) { +size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device) { + return 0; +} + +template <> +void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { } template <> diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index e03b2a78f4..178db0a66c 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -27,6 +27,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::programPreamble; + using BaseClass::programStateSip; using BaseClass::sshState; using BaseClass::CommandStreamReceiver::cleanupResources; using BaseClass::CommandStreamReceiver::commandStream; @@ -36,6 +37,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::experimentalCmdBuffer; using BaseClass::CommandStreamReceiver::flushStamp; using BaseClass::CommandStreamReceiver::isPreambleSent; + using BaseClass::CommandStreamReceiver::isStateSipSent; using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest; diff --git a/unit_tests/preamble/preamble_tests.cpp b/unit_tests/preamble/preamble_tests.cpp index c1a36cf5dd..9ae9c37ea2 100644 --- a/unit_tests/preamble/preamble_tests.cpp +++ b/unit_tests/preamble/preamble_tests.cpp @@ -22,25 +22,43 @@ using PreambleTest = ::testing::Test; using namespace OCLRT; -HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) { +HWTEST_F(PreambleTest, givenDisabledPreemptioWhenPreambleAdditionalCommandsSizeIsQueriedThenZeroIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); - - mockDevice->setPreemptionMode(PreemptionMode::MidThread); - auto cmdSizePreambleMidThread = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - mockDevice->setPreemptionMode(PreemptionMode::Disabled); - auto cmdSizePreambleDisabled = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - EXPECT_LE(cmdSizePreemptionMidThread, cmdSizePreambleMidThread); - EXPECT_LE(cmdSizePreemptionDisabled, cmdSizePreambleDisabled); + auto cmdSize = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + EXPECT_EQ(PreemptionHelper::getRequiredPreambleSize(*mockDevice), cmdSize); + EXPECT_EQ(0u, cmdSize); +} - EXPECT_LE(cmdSizePreemptionDisabled, cmdSizePreemptionMidThread); - EXPECT_LE((cmdSizePreemptionMidThread - cmdSizePreemptionDisabled), (cmdSizePreambleMidThread - cmdSizePreambleDisabled)); +HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidthreadPreemptionWhenPreambleAdditionalCommandsSizeIsQueriedThenSizeForPreemptionPreambleIsReturned) { + using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); + + auto cmdSize = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + EXPECT_EQ(PreemptionHelper::getRequiredPreambleSize(*mockDevice), cmdSize); + EXPECT_EQ(sizeof(GPGPU_CSR_BASE_ADDRESS), cmdSize); + } +} + +HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidThreadPreemptionWhenPreambleIsProgrammedThenStateSipAndCsrBaseAddressCmdsAreAdded) { + using STATE_SIP = typename FamilyType::STATE_SIP; + using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; + + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + mockDevice->setPreemptionMode(PreemptionMode::Disabled); + auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + EXPECT_EQ(0u, cmdSizePreemptionDisabled); + + if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { + mockDevice->setPreemptionMode(PreemptionMode::MidThread); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + EXPECT_LT(cmdSizePreemptionDisabled, cmdSizePreemptionMidThread); + StackVec preambleBuffer(8192); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); @@ -54,13 +72,20 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) { PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, ThreadArbitrationPolicy::RoundRobin, &csrSurface); - PreemptionHelper::programPreamble(preemptionStream, *mockDevice, &csrSurface); + PreemptionHelper::programStateSip(preemptionStream, *mockDevice); - ASSERT_LE(preemptionStream.getUsed(), preambleStream.getUsed()); + HardwareParse hwParserPreamble; + hwParserPreamble.parseCommands(preambleStream, 0); - auto it = std::search(&preambleBuffer[0], &preambleBuffer[preambleStream.getUsed()], - &preemptionBuffer[0], &preemptionBuffer[preemptionStream.getUsed()]); - EXPECT_NE(&preambleBuffer[preambleStream.getUsed()], it); + auto csrCmd = hwParserPreamble.getCommand(); + EXPECT_NE(nullptr, csrCmd); + EXPECT_EQ(csrSurface.getGpuAddress(), csrCmd->getGpgpuCsrBaseAddress()); + + HardwareParse hwParserPreemption; + hwParserPreemption.parseCommands(preemptionStream, 0); + + auto stateSipCmd = hwParserPreemption.getCommand(); + EXPECT_NE(nullptr, stateSipCmd); } } diff --git a/unit_tests/preemption/preemption_tests.cpp b/unit_tests/preemption/preemption_tests.cpp index 117809554b..52b4637da4 100644 --- a/unit_tests/preemption/preemption_tests.cpp +++ b/unit_tests/preemption/preemption_tests.cpp @@ -6,6 +6,7 @@ */ #include "runtime/command_stream/preemption.h" +#include "runtime/command_queue/command_queue_hw.h" #include "runtime/helpers/options.h" #include "runtime/helpers/dispatch_info.h" #include "unit_tests/fixtures/preemption_fixture.h" @@ -389,6 +390,58 @@ INSTANTIATE_TEST_CASE_P( PreemptionHwTest, ::testing::Values(PreemptionMode::Disabled, PreemptionMode::MidBatch, PreemptionMode::ThreadGroup, PreemptionMode::MidThread)); +struct PreemptionTest : ::testing::Test, ::testing::WithParamInterface { +}; + +HWTEST_P(PreemptionTest, whenInNonMidThreadModeThenSizeForStateSipIsZero) { + PreemptionMode mode = GetParam(); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + mockDevice->setPreemptionMode(mode); + + auto size = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + EXPECT_EQ(0u, size); +} + +HWTEST_P(PreemptionTest, whenInNonMidThreadModeThenStateSipIsNotProgrammed) { + PreemptionMode mode = GetParam(); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + mockDevice->setPreemptionMode(mode); + + auto requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + StackVec buffer(requiredSize); + LinearStream cmdStream(buffer.begin(), buffer.size()); + + PreemptionHelper::programStateSip(cmdStream, *mockDevice); + EXPECT_EQ(0u, cmdStream.getUsed()); +} + +HWTEST_P(PreemptionTest, whenInNonMidThreadModeThenSizeForCsrBaseAddressIsZero) { + PreemptionMode mode = GetParam(); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + mockDevice->setPreemptionMode(mode); + + auto size = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + EXPECT_EQ(0u, size); +} + +HWTEST_P(PreemptionTest, whenInNonMidThreadModeThenCsrBaseAddressIsNotProgrammed) { + PreemptionMode mode = GetParam(); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + mockDevice->setPreemptionMode(mode); + + auto requiredSize = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + StackVec buffer(requiredSize); + LinearStream cmdStream(buffer.begin(), buffer.size()); + + PreemptionHelper::programCsrBaseAddress(cmdStream, *mockDevice, nullptr); + EXPECT_EQ(0u, cmdStream.getUsed()); +} + +INSTANTIATE_TEST_CASE_P( + NonMidThread, + PreemptionTest, + ::testing::Values(PreemptionMode::Disabled, PreemptionMode::MidBatch, PreemptionMode::ThreadGroup)); + HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceNoWa) { const WorkaroundTable *waTable = platformDevices[0]->pWaTable; WorkaroundTable tmpWaTable; @@ -428,3 +481,51 @@ HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceWa) { const_cast(platformDevices[0])->pWaTable = waTable; } + +HWCMDTEST_F(IGFX_GEN8_CORE, MidThreadPreemptionTests, givenDirtyCsrStateWhenStateBaseAddressIsProgrammedThenStateSipIsAdded) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using STATE_SIP = typename FamilyType::STATE_SIP; + + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { + mockDevice->setPreemptionMode(PreemptionMode::MidThread); + + auto &csr = mockDevice->getUltCommandStreamReceiver(); + csr.isPreambleSent = true; + + CommandQueueHw commandQueue(nullptr, device.get(), 0); + auto &commandStream = commandQueue.getCS(4096u); + + DispatchFlags dispatchFlags; + dispatchFlags.preemptionMode = PreemptionMode::MidThread; + + void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); + + std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); + std::unique_ptr heap(new IndirectHeap(allocation.get())); + + csr.flushTask(commandStream, + 0, + *heap.get(), + *heap.get(), + *heap.get(), + 0, + dispatchFlags, + *mockDevice); + + HardwareParse hwParser; + hwParser.parseCommands(csr.getCS(0)); + + auto stateBaseAddressItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_NE(hwParser.cmdList.end(), stateBaseAddressItor); + + auto stateSipItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_NE(hwParser.cmdList.end(), stateSipItor); + + auto stateSipAfterSBA = ++stateBaseAddressItor; + EXPECT_EQ(*stateSipAfterSBA, *stateSipItor); + + alignedFree(buffer); + } +} diff --git a/unit_tests/source_level_debugger/source_level_debugger_csr_tests.cpp b/unit_tests/source_level_debugger/source_level_debugger_csr_tests.cpp index a350620103..010aeb961c 100644 --- a/unit_tests/source_level_debugger/source_level_debugger_csr_tests.cpp +++ b/unit_tests/source_level_debugger/source_level_debugger_csr_tests.cpp @@ -8,6 +8,7 @@ #include "runtime/command_queue/command_queue_hw.h" #include "runtime/source_level_debugger/source_level_debugger.h" #include "unit_tests/fixtures/device_fixture.h" +#include "unit_tests/helpers/hw_parse.h" #include "unit_tests/mocks/mock_builtins.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_device.h" @@ -58,3 +59,59 @@ HWTEST_F(CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebugger EXPECT_TRUE(found); alignedFree(buffer); } + +HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndDisabledPreemptionWhenFlushTaskIsCalledThenStateSipCmdIsProgrammed) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using STATE_SIP = typename FamilyType::STATE_SIP; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + if (device->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { + device->setSourceLevelDebuggerActive(true); + device->allocatePreemptionAllocationIfNotPresent(); + auto mockCsr = new MockCsrHw2(*platformDevices[0], *device->executionEnvironment); + + device->resetCommandStreamReceiver(mockCsr); + + CommandQueueHw commandQueue(nullptr, device.get(), 0); + auto &commandStream = commandQueue.getCS(4096u); + auto &preambleStream = mockCsr->getCS(0); + + DispatchFlags dispatchFlags; + dispatchFlags.preemptionMode = PreemptionMode::Disabled; + + void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); + + std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); + std::unique_ptr heap(new IndirectHeap(allocation.get())); + + mockCsr->flushTask(commandStream, + 0, + *heap.get(), + *heap.get(), + *heap.get(), + 0, + dispatchFlags, + *device); + + auto sipType = SipKernel::getSipKernelType(device->getHardwareInfo().pPlatform->eRenderCoreFamily, true); + auto sipAllocation = device->getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, *device.get()).getSipAllocation(); + + HardwareParse hwParser; + hwParser.parseCommands(preambleStream); + auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + auto itorStateSip = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + + ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); + ASSERT_NE(hwParser.cmdList.end(), itorStateSip); + + STATE_BASE_ADDRESS *sba = (STATE_BASE_ADDRESS *)*itorStateBaseAddr; + STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip; + EXPECT_LT(reinterpret_cast(sba), reinterpret_cast(stateSipCmd)); + + auto sipAddress = stateSipCmd->getSystemInstructionPointer(); + + EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); + alignedFree(buffer); + } +} diff --git a/unit_tests/source_level_debugger/source_level_debugger_preamble_test.h b/unit_tests/source_level_debugger/source_level_debugger_preamble_test.h index 190aa50a47..342f424900 100644 --- a/unit_tests/source_level_debugger/source_level_debugger_preamble_test.h +++ b/unit_tests/source_level_debugger/source_level_debugger_preamble_test.h @@ -21,11 +21,12 @@ class SourceLevelDebuggerPreambleTest { public: SourceLevelDebuggerPreambleTest() {} - static void givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); - static void givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest(); + static void givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsedTest(); + static void givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsedTest(); static void givenPreemptionDisabledAndDebuggingActiveWhenPreambleIsProgrammedThenCorrectSipKernelIsUsedTest(); static void givenMidThreadPreemptionAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); static void givenPreemptionDisabledAndDebuggingActiveWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); static void givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); static void givenDisabledPreemptionAndDisabledDebuggingWhenPreambleSizeIsQueriedThenCorrecrSizeIsReturnedTest(); + static void givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreInlcudedTest(); }; diff --git a/unit_tests/source_level_debugger/source_level_debugger_preamble_test.inl b/unit_tests/source_level_debugger/source_level_debugger_preamble_test.inl index a015face5a..1d5ae6a63c 100644 --- a/unit_tests/source_level_debugger/source_level_debugger_preamble_test.inl +++ b/unit_tests/source_level_debugger/source_level_debugger_preamble_test.inl @@ -8,22 +8,19 @@ using namespace OCLRT; template -void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest() { +void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebuggingActiveWhenStateSipIsProgrammedThenCorrectSipKernelIsUsedTest() { using STATE_SIP = typename GfxFamily::STATE_SIP; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setSourceLevelDebuggerActive(true); mockDevice->setPreemptionMode(PreemptionMode::MidThread); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); StackVec preambleBuffer; preambleBuffer.resize(cmdSizePreemptionMidThread); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); - uintptr_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); - - PreemptionHelper::programPreamble(preambleStream, *mockDevice, &csrSurface); + PreemptionHelper::programStateSip(preambleStream, *mockDevice); HardwareParse hwParser; hwParser.parseCommands(preambleStream); @@ -37,22 +34,19 @@ void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebu } template -void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsPrograamedThenCorrectSipKernelIsUsedTest() { +void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisabledDebuggingWhenPreambleIsProgrammedThenCorrectSipKernelIsUsedTest() { using STATE_SIP = typename GfxFamily::STATE_SIP; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setSourceLevelDebuggerActive(false); mockDevice->setPreemptionMode(PreemptionMode::MidThread); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); StackVec preambleBuffer; preambleBuffer.resize(cmdSizePreemptionMidThread); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); - uintptr_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); - - PreemptionHelper::programPreamble(preambleStream, *mockDevice, &csrSurface); + PreemptionHelper::programStateSip(preambleStream, *mockDevice); HardwareParse hwParser; hwParser.parseCommands(preambleStream); @@ -72,16 +66,13 @@ void SourceLevelDebuggerPreambleTest::givenPreemptionDisabledAndDebug mockDevice->setSourceLevelDebuggerActive(true); mockDevice->setPreemptionMode(PreemptionMode::Disabled); - auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); StackVec preambleBuffer; preambleBuffer.resize(cmdSizePreemptionMidThread); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); - uintptr_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; - MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); - - PreemptionHelper::programPreamble(preambleStream, *mockDevice, &csrSurface); + PreemptionHelper::programStateSip(preambleStream, *mockDevice); HardwareParse hwParser; hwParser.parseCommands(preambleStream); @@ -99,8 +90,8 @@ void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDebu auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setSourceLevelDebuggerActive(true); mockDevice->setPreemptionMode(PreemptionMode::MidThread); - size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - auto sizeExpected = sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS) + sizeof(typename GfxFamily::STATE_SIP); + size_t requiredPreambleSize = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); + auto sizeExpected = sizeof(typename GfxFamily::STATE_SIP); EXPECT_EQ(sizeExpected, requiredPreambleSize); } @@ -109,7 +100,7 @@ void SourceLevelDebuggerPreambleTest::givenPreemptionDisabledAndDebug auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setSourceLevelDebuggerActive(true); mockDevice->setPreemptionMode(PreemptionMode::Disabled); - size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*mockDevice); + size_t requiredPreambleSize = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); auto sizeExpected = sizeof(typename GfxFamily::STATE_SIP); EXPECT_EQ(sizeExpected, requiredPreambleSize); } @@ -120,7 +111,7 @@ void SourceLevelDebuggerPreambleTest::givenMidThreadPreemptionAndDisa mockDevice->setSourceLevelDebuggerActive(false); mockDevice->setPreemptionMode(PreemptionMode::MidThread); size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*mockDevice); - auto sizeExpected = sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS) + sizeof(typename GfxFamily::STATE_SIP); + auto sizeExpected = sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS); EXPECT_EQ(sizeExpected, requiredPreambleSize); } @@ -134,4 +125,21 @@ void SourceLevelDebuggerPreambleTest::givenDisabledPreemptionAndDisab EXPECT_EQ(sizeExpected, requiredPreambleSize); } +template +void SourceLevelDebuggerPreambleTest::givenKernelDebuggingActiveAndDisabledPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreInlcudedTest() { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); + auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + + mockDevice->setSourceLevelDebuggerActive(false); + size_t withoutDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + mockDevice->setSourceLevelDebuggerActive(true); + size_t withDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + EXPECT_LT(withoutDebugging, withDebugging); + + size_t diff = withDebugging - withoutDebugging; + size_t sizeExpected = 2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); + EXPECT_EQ(sizeExpected, diff); +} + template class SourceLevelDebuggerPreambleTest;