mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 07:14:10 +08:00
Remove RMW access patterns from functions programming on gfx memory
Related-To: NEO-4338 Change-Id: I8fe555525f937e75c5439702b328c734af9af1f9 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
719b22ee11
commit
5e98368dad
@@ -240,10 +240,12 @@ struct LriHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
static MI_LOAD_REGISTER_IMM *program(LinearStream *cmdStream, uint32_t address, uint32_t value) {
|
||||
auto lri = (MI_LOAD_REGISTER_IMM *)cmdStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*lri = GfxFamily::cmdInitLoadRegisterImm;
|
||||
lri->setRegisterOffset(address);
|
||||
lri->setDataDword(value);
|
||||
MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm;
|
||||
cmd.setRegisterOffset(address);
|
||||
cmd.setDataDword(value);
|
||||
|
||||
auto lri = cmdStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
*lri = cmd;
|
||||
return lri;
|
||||
}
|
||||
};
|
||||
@@ -268,10 +270,10 @@ struct MemorySynchronizationCommands {
|
||||
|
||||
static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
|
||||
static size_t getSizeForFullCacheFlush();
|
||||
static void setExtraCacheFlushFields(PIPE_CONTROL *pipeControl);
|
||||
static void setExtraCacheFlushFields(PIPE_CONTROL &pipeControl);
|
||||
|
||||
protected:
|
||||
static PIPE_CONTROL *obtainPipeControl(LinearStream &commandStream, bool dcFlush);
|
||||
static void setPipeControl(PIPE_CONTROL &pipeControl, bool dcFlush);
|
||||
};
|
||||
|
||||
union SURFACE_STATE_BUFFER_LENGTH {
|
||||
|
||||
@@ -183,18 +183,22 @@ bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo
|
||||
template <typename Family>
|
||||
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
|
||||
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
|
||||
addPipeControlWA(commandStream, gpuAddress, hwInfo);
|
||||
|
||||
auto pipeControl = obtainPipeControl(commandStream, dcFlush);
|
||||
pipeControl->setPostSyncOperation(operation);
|
||||
pipeControl->setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
|
||||
pipeControl->setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
|
||||
pipeControl->setDcFlushEnable(dcFlush);
|
||||
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
PIPE_CONTROL cmd = Family::cmdInitPipeControl;
|
||||
setPipeControl(cmd, dcFlush);
|
||||
cmd.setPostSyncOperation(operation);
|
||||
cmd.setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
|
||||
cmd.setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
|
||||
cmd.setDcFlushEnable(dcFlush);
|
||||
if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
pipeControl->setImmediateData(immediateData);
|
||||
cmd.setImmediateData(immediateData);
|
||||
}
|
||||
|
||||
setExtraPipeControlProperties(*pipeControl, hwInfo);
|
||||
setExtraPipeControlProperties(cmd, hwInfo);
|
||||
*pipeControl = cmd;
|
||||
|
||||
MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
|
||||
|
||||
@@ -202,28 +206,31 @@ typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipe
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(LinearStream &commandStream, bool dcFlush) {
|
||||
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pCmd = GfxFamily::cmdInitPipeControl;
|
||||
pCmd->setCommandStreamerStallEnable(true);
|
||||
pCmd->setDcFlushEnable(dcFlush);
|
||||
void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, bool dcFlush) {
|
||||
pipeControl.setCommandStreamerStallEnable(true);
|
||||
pipeControl.setDcFlushEnable(dcFlush);
|
||||
|
||||
if (DebugManager.flags.FlushAllCaches.get()) {
|
||||
pCmd->setDcFlushEnable(true);
|
||||
pCmd->setRenderTargetCacheFlushEnable(true);
|
||||
pCmd->setInstructionCacheInvalidateEnable(true);
|
||||
pCmd->setTextureCacheInvalidationEnable(true);
|
||||
pCmd->setPipeControlFlushEnable(true);
|
||||
pCmd->setVfCacheInvalidationEnable(true);
|
||||
pCmd->setConstantCacheInvalidationEnable(true);
|
||||
pCmd->setStateCacheInvalidationEnable(true);
|
||||
pipeControl.setDcFlushEnable(true);
|
||||
pipeControl.setRenderTargetCacheFlushEnable(true);
|
||||
pipeControl.setInstructionCacheInvalidateEnable(true);
|
||||
pipeControl.setTextureCacheInvalidationEnable(true);
|
||||
pipeControl.setPipeControlFlushEnable(true);
|
||||
pipeControl.setVfCacheInvalidationEnable(true);
|
||||
pipeControl.setConstantCacheInvalidationEnable(true);
|
||||
pipeControl.setStateCacheInvalidationEnable(true);
|
||||
}
|
||||
return pCmd;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
|
||||
return MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, dcFlush);
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
|
||||
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, dcFlush);
|
||||
auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pipeControl = cmd;
|
||||
|
||||
return pipeControl;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -321,16 +328,21 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
|
||||
auto pipeControl = MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, true);
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
pipeControl->setRenderTargetCacheFlushEnable(true);
|
||||
pipeControl->setInstructionCacheInvalidateEnable(true);
|
||||
pipeControl->setTextureCacheInvalidationEnable(true);
|
||||
pipeControl->setPipeControlFlushEnable(true);
|
||||
pipeControl->setConstantCacheInvalidationEnable(true);
|
||||
pipeControl->setStateCacheInvalidationEnable(true);
|
||||
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
|
||||
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, true);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(pipeControl);
|
||||
cmd.setRenderTargetCacheFlushEnable(true);
|
||||
cmd.setInstructionCacheInvalidateEnable(true);
|
||||
cmd.setTextureCacheInvalidationEnable(true);
|
||||
cmd.setPipeControlFlushEnable(true);
|
||||
cmd.setConstantCacheInvalidationEnable(true);
|
||||
cmd.setStateCacheInvalidationEnable(true);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(cmd);
|
||||
*pipeControl = cmd;
|
||||
|
||||
return pipeControl;
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIP
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL *pipeControl) {
|
||||
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL &pipeControl) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -13,11 +13,13 @@ namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
|
||||
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm;
|
||||
|
||||
pCmd->setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
|
||||
pCmd->setDataDword(l3Config);
|
||||
cmd.setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
|
||||
cmd.setDataDword(l3Config);
|
||||
|
||||
*pCmd = cmd;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -37,19 +39,20 @@ uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream
|
||||
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
|
||||
|
||||
auto scratchSpaceAddressOffset = static_cast<uint64_t>(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET);
|
||||
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)));
|
||||
*pMediaVfeState = GfxFamily::cmdInitMediaVfeState;
|
||||
pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads);
|
||||
pMediaVfeState->setNumberOfUrbEntries(1);
|
||||
pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
|
||||
pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
auto pMediaVfeState = pCommandStream->getSpaceForCmd<MEDIA_VFE_STATE>();
|
||||
MEDIA_VFE_STATE cmd = GfxFamily::cmdInitMediaVfeState;
|
||||
cmd.setMaximumNumberOfThreads(maxFrontEndThreads);
|
||||
cmd.setNumberOfUrbEntries(1);
|
||||
cmd.setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
|
||||
cmd.setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
cmd.setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
uint32_t lowAddress = static_cast<uint32_t>(0xFFFFFFFF & scratchAddress);
|
||||
uint32_t highAddress = static_cast<uint32_t>(0xFFFFFFFF & (scratchAddress >> 32));
|
||||
pMediaVfeState->setScratchSpaceBasePointer(lowAddress);
|
||||
pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress);
|
||||
cmd.setScratchSpaceBasePointer(lowAddress);
|
||||
cmd.setScratchSpaceBasePointerHigh(highAddress);
|
||||
|
||||
programAdditionalFieldsInVfeState(pMediaVfeState, hwInfo);
|
||||
programAdditionalFieldsInVfeState(&cmd, hwInfo);
|
||||
*pMediaVfeState = cmd;
|
||||
|
||||
return scratchSpaceAddressOffset;
|
||||
}
|
||||
|
||||
@@ -28,41 +28,41 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
bool isMultiOsContextCapable) {
|
||||
|
||||
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
*pCmd = GfxFamily::cmdInitStateBaseAddress;
|
||||
STATE_BASE_ADDRESS cmd = GfxFamily::cmdInitStateBaseAddress;
|
||||
|
||||
if (dsh) {
|
||||
pCmd->setDynamicStateBaseAddressModifyEnable(true);
|
||||
pCmd->setDynamicStateBufferSizeModifyEnable(true);
|
||||
pCmd->setDynamicStateBaseAddress(dsh->getHeapGpuBase());
|
||||
pCmd->setDynamicStateBufferSize(dsh->getHeapSizeInPages());
|
||||
cmd.setDynamicStateBaseAddressModifyEnable(true);
|
||||
cmd.setDynamicStateBufferSizeModifyEnable(true);
|
||||
cmd.setDynamicStateBaseAddress(dsh->getHeapGpuBase());
|
||||
cmd.setDynamicStateBufferSize(dsh->getHeapSizeInPages());
|
||||
}
|
||||
|
||||
if (ioh) {
|
||||
pCmd->setIndirectObjectBaseAddressModifyEnable(true);
|
||||
pCmd->setIndirectObjectBufferSizeModifyEnable(true);
|
||||
pCmd->setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
|
||||
pCmd->setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
|
||||
cmd.setIndirectObjectBaseAddressModifyEnable(true);
|
||||
cmd.setIndirectObjectBufferSizeModifyEnable(true);
|
||||
cmd.setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
|
||||
cmd.setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
|
||||
}
|
||||
|
||||
if (ssh) {
|
||||
pCmd->setSurfaceStateBaseAddressModifyEnable(true);
|
||||
pCmd->setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
|
||||
cmd.setSurfaceStateBaseAddressModifyEnable(true);
|
||||
cmd.setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
|
||||
}
|
||||
|
||||
if (setInstructionStateBaseAddress) {
|
||||
pCmd->setInstructionBaseAddressModifyEnable(true);
|
||||
pCmd->setInstructionBaseAddress(internalHeapBase);
|
||||
pCmd->setInstructionBufferSizeModifyEnable(true);
|
||||
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||
pCmd->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
|
||||
cmd.setInstructionBaseAddressModifyEnable(true);
|
||||
cmd.setInstructionBaseAddress(internalHeapBase);
|
||||
cmd.setInstructionBufferSizeModifyEnable(true);
|
||||
cmd.setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||
cmd.setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
|
||||
}
|
||||
|
||||
if (setGeneralStateBaseAddress) {
|
||||
pCmd->setGeneralStateBaseAddressModifyEnable(true);
|
||||
pCmd->setGeneralStateBufferSizeModifyEnable(true);
|
||||
cmd.setGeneralStateBaseAddressModifyEnable(true);
|
||||
cmd.setGeneralStateBufferSizeModifyEnable(true);
|
||||
// GSH must be set to 0 for stateless
|
||||
pCmd->setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
|
||||
pCmd->setGeneralStateBufferSize(0xfffff);
|
||||
cmd.setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
|
||||
cmd.setGeneralStateBufferSize(0xfffff);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.OverrideStatelessMocsIndex.get() != -1) {
|
||||
@@ -71,9 +71,11 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
|
||||
statelessMocsIndex = statelessMocsIndex << 1;
|
||||
|
||||
pCmd->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
|
||||
cmd.setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
|
||||
|
||||
appendStateBaseAddressParameters(pCmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
|
||||
appendStateBaseAddressParameters(&cmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
|
||||
|
||||
*pCmd = cmd;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user