diff --git a/runtime/command_stream/command_stream_receiver_hw_base.inl b/runtime/command_stream/command_stream_receiver_hw_base.inl index a44eee06ce..6e17bbd18e 100644 --- a/runtime/command_stream/command_stream_receiver_hw_base.inl +++ b/runtime/command_stream/command_stream_receiver_hw_base.inl @@ -93,6 +93,22 @@ inline void CommandStreamReceiverHw::alignToCacheLine(LinearStream &c } } +template +inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble(Device &device) const { + size_t size = 0; + + if (mediaVfeStateDirty) { + size += PreambleHelper::getVFECommandsSize(); + } + if (!this->isPreambleSent) { + size += PreambleHelper::getAdditionalCommandsSize(device); + } + if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { + size += PreambleHelper::getThreadArbitrationCommandsSize(); + } + return size; +} + template inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlCmd(LinearStream &commandStream) { typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; diff --git a/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl b/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl index 1bcb684a71..85147327d2 100644 --- a/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl +++ b/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl @@ -61,22 +61,6 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPipelineSelect() return 0; } -template -inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble(Device &device) const { - size_t size = 0; - - if (mediaVfeStateDirty) { - size += sizeof(typename GfxFamily::PIPE_CONTROL) + sizeof(typename GfxFamily::MEDIA_VFE_STATE); - } - if (!this->isPreambleSent) { - size += PreambleHelper::getAdditionalCommandsSize(device); - } - if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { - size += PreambleHelper::getThreadArbitrationCommandsSize(); - } - return size; -} - template void CommandStreamReceiverHw::createScratchSpaceController() { scratchSpaceController = std::make_unique(executionEnvironment, *internalAllocationStorage.get()); diff --git a/runtime/helpers/hw_helper.cpp b/runtime/helpers/hw_helper.cpp index 86e739ff3f..bd000d48be 100644 --- a/runtime/helpers/hw_helper.cpp +++ b/runtime/helpers/hw_helper.cpp @@ -40,4 +40,9 @@ bool HwHelper::cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo) { return hwInfo.capabilityTable.supportCacheFlushAfterWalker; } +uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) { + uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; + return hwInfo.gtSystemInfo.EUCount * threadsPerEU; +} + } // namespace NEO diff --git a/runtime/helpers/hw_helper.h b/runtime/helpers/hw_helper.h index afc7480d19..d4123cd715 100644 --- a/runtime/helpers/hw_helper.h +++ b/runtime/helpers/hw_helper.h @@ -63,6 +63,7 @@ class HwHelper { virtual const std::vector getGpgpuEngineInstances() const = 0; virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0; virtual std::string getExtensions() const = 0; + static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo); static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1; diff --git a/runtime/helpers/preamble.h b/runtime/helpers/preamble.h index 972fc5b377..4d9f55f217 100644 --- a/runtime/helpers/preamble.h +++ b/runtime/helpers/preamble.h @@ -39,10 +39,10 @@ struct PreambleHelper { static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM); static size_t getAdditionalCommandsSize(const Device &device); static size_t getThreadArbitrationCommandsSize(); + static size_t getVFECommandsSize(); static size_t getKernelDebuggingCommandsSize(bool debuggingActive); static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo); static uint32_t getUrbEntryAllocationSize(); - static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo); }; template diff --git a/runtime/helpers/preamble_base.inl b/runtime/helpers/preamble_base.inl index 12ab6db486..efb5a182b7 100644 --- a/runtime/helpers/preamble_base.inl +++ b/runtime/helpers/preamble_base.inl @@ -82,10 +82,4 @@ size_t PreambleHelper::getKernelDebuggingCommandsSize(bool debuggingA return 0; } -template -uint32_t PreambleHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) { - uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; - return hwInfo.gtSystemInfo.EUCount * threadsPerEU; -} - } // namespace NEO diff --git a/runtime/helpers/preamble_bdw_plus.inl b/runtime/helpers/preamble_bdw_plus.inl index b24ef00d37..9d72236f3e 100644 --- a/runtime/helpers/preamble_bdw_plus.inl +++ b/runtime/helpers/preamble_bdw_plus.inl @@ -5,6 +5,7 @@ * */ +#include "runtime/helpers/hw_helper.h" #include "runtime/helpers/preamble_base.inl" namespace NEO { @@ -25,21 +26,27 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { template void PreambleHelper::programVFEState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, int scratchSize, uint64_t scratchAddress) { - typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE; + using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo); - auto pMediaVfeState = (MEDIA_VFE_STATE *)pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)); + auto pMediaVfeState = reinterpret_cast(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE))); *pMediaVfeState = GfxFamily::cmdInitMediaVfeState; - pMediaVfeState->setMaximumNumberOfThreads(PreambleHelper::getMaxThreadsForVfe(hwInfo)); + pMediaVfeState->setMaximumNumberOfThreads(HwHelper::getMaxThreadsForVfe(hwInfo)); pMediaVfeState->setNumberOfUrbEntries(1); pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper::getUrbEntryAllocationSize()); pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize)); pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize)); - uint32_t lowAddress = uint32_t(0xFFFFFFFF & scratchAddress); - uint32_t highAddress = uint32_t(0xFFFFFFFF & (scratchAddress >> 32)); + uint32_t lowAddress = static_cast(0xFFFFFFFF & scratchAddress); + uint32_t highAddress = static_cast(0xFFFFFFFF & (scratchAddress >> 32)); pMediaVfeState->setScratchSpaceBasePointer(lowAddress); pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress); } +template +size_t PreambleHelper::getVFECommandsSize() { + using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; + return sizeof(MEDIA_VFE_STATE) + sizeof(PIPE_CONTROL); +} + } // namespace NEO diff --git a/unit_tests/preamble/preamble_tests.cpp b/unit_tests/preamble/preamble_tests.cpp index 887adccd1e..a99f6520e1 100644 --- a/unit_tests/preamble/preamble_tests.cpp +++ b/unit_tests/preamble/preamble_tests.cpp @@ -183,7 +183,7 @@ HWTEST_F(PreambleTest, givenKernelDebuggingActiveAndMidThreadPreemptionWhenGetAd HWTEST_F(PreambleTest, givenDefaultPreambleWhenGetThreadsMaxNumberIsCalledThenMaximumNumberOfThreadsIsReturned) { const HardwareInfo &hwInfo = **platformDevices; uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; - uint32_t value = PreambleHelper::getMaxThreadsForVfe(hwInfo); + uint32_t value = HwHelper::getMaxThreadsForVfe(hwInfo); uint32_t expected = hwInfo.gtSystemInfo.EUCount * threadsPerEU; EXPECT_EQ(expected, value);