Remove RMW access patterns from functions programming on gfx memory

Related-To: NEO-4338

Change-Id: I8fe555525f937e75c5439702b328c734af9af1f9
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-04-08 18:33:03 +02:00
committed by sys_ocldev
parent 719b22ee11
commit 5e98368dad
18 changed files with 247 additions and 181 deletions

View File

@@ -240,10 +240,12 @@ struct LriHelper {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
static MI_LOAD_REGISTER_IMM *program(LinearStream *cmdStream, uint32_t address, uint32_t value) {
auto lri = (MI_LOAD_REGISTER_IMM *)cmdStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*lri = GfxFamily::cmdInitLoadRegisterImm;
lri->setRegisterOffset(address);
lri->setDataDword(value);
MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm;
cmd.setRegisterOffset(address);
cmd.setDataDword(value);
auto lri = cmdStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
*lri = cmd;
return lri;
}
};
@@ -268,10 +270,10 @@ struct MemorySynchronizationCommands {
static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
static size_t getSizeForFullCacheFlush();
static void setExtraCacheFlushFields(PIPE_CONTROL *pipeControl);
static void setExtraCacheFlushFields(PIPE_CONTROL &pipeControl);
protected:
static PIPE_CONTROL *obtainPipeControl(LinearStream &commandStream, bool dcFlush);
static void setPipeControl(PIPE_CONTROL &pipeControl, bool dcFlush);
};
union SURFACE_STATE_BUFFER_LENGTH {

View File

@@ -183,18 +183,22 @@ bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo
template <typename Family>
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
addPipeControlWA(commandStream, gpuAddress, hwInfo);
auto pipeControl = obtainPipeControl(commandStream, dcFlush);
pipeControl->setPostSyncOperation(operation);
pipeControl->setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
pipeControl->setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
pipeControl->setDcFlushEnable(dcFlush);
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
PIPE_CONTROL cmd = Family::cmdInitPipeControl;
setPipeControl(cmd, dcFlush);
cmd.setPostSyncOperation(operation);
cmd.setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
cmd.setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
cmd.setDcFlushEnable(dcFlush);
if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
pipeControl->setImmediateData(immediateData);
cmd.setImmediateData(immediateData);
}
setExtraPipeControlProperties(*pipeControl, hwInfo);
setExtraPipeControlProperties(cmd, hwInfo);
*pipeControl = cmd;
MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
@@ -202,28 +206,31 @@ typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipe
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(LinearStream &commandStream, bool dcFlush) {
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
*pCmd = GfxFamily::cmdInitPipeControl;
pCmd->setCommandStreamerStallEnable(true);
pCmd->setDcFlushEnable(dcFlush);
void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, bool dcFlush) {
pipeControl.setCommandStreamerStallEnable(true);
pipeControl.setDcFlushEnable(dcFlush);
if (DebugManager.flags.FlushAllCaches.get()) {
pCmd->setDcFlushEnable(true);
pCmd->setRenderTargetCacheFlushEnable(true);
pCmd->setInstructionCacheInvalidateEnable(true);
pCmd->setTextureCacheInvalidationEnable(true);
pCmd->setPipeControlFlushEnable(true);
pCmd->setVfCacheInvalidationEnable(true);
pCmd->setConstantCacheInvalidationEnable(true);
pCmd->setStateCacheInvalidationEnable(true);
pipeControl.setDcFlushEnable(true);
pipeControl.setRenderTargetCacheFlushEnable(true);
pipeControl.setInstructionCacheInvalidateEnable(true);
pipeControl.setTextureCacheInvalidationEnable(true);
pipeControl.setPipeControlFlushEnable(true);
pipeControl.setVfCacheInvalidationEnable(true);
pipeControl.setConstantCacheInvalidationEnable(true);
pipeControl.setStateCacheInvalidationEnable(true);
}
return pCmd;
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
return MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, dcFlush);
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, dcFlush);
auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
*pipeControl = cmd;
return pipeControl;
}
template <typename GfxFamily>
@@ -321,16 +328,21 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
auto pipeControl = MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, true);
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
pipeControl->setRenderTargetCacheFlushEnable(true);
pipeControl->setInstructionCacheInvalidateEnable(true);
pipeControl->setTextureCacheInvalidationEnable(true);
pipeControl->setPipeControlFlushEnable(true);
pipeControl->setConstantCacheInvalidationEnable(true);
pipeControl->setStateCacheInvalidationEnable(true);
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, true);
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(pipeControl);
cmd.setRenderTargetCacheFlushEnable(true);
cmd.setInstructionCacheInvalidateEnable(true);
cmd.setTextureCacheInvalidationEnable(true);
cmd.setPipeControlFlushEnable(true);
cmd.setConstantCacheInvalidationEnable(true);
cmd.setStateCacheInvalidationEnable(true);
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(cmd);
*pipeControl = cmd;
return pipeControl;
}

View File

@@ -80,7 +80,7 @@ void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIP
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL *pipeControl) {
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL &pipeControl) {
}
} // namespace NEO

View File

@@ -13,11 +13,13 @@ namespace NEO {
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm;
pCmd->setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
pCmd->setDataDword(l3Config);
cmd.setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
cmd.setDataDword(l3Config);
*pCmd = cmd;
}
template <typename GfxFamily>
@@ -37,19 +39,20 @@ uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
auto scratchSpaceAddressOffset = static_cast<uint64_t>(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET);
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)));
*pMediaVfeState = GfxFamily::cmdInitMediaVfeState;
pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads);
pMediaVfeState->setNumberOfUrbEntries(1);
pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
auto pMediaVfeState = pCommandStream->getSpaceForCmd<MEDIA_VFE_STATE>();
MEDIA_VFE_STATE cmd = GfxFamily::cmdInitMediaVfeState;
cmd.setMaximumNumberOfThreads(maxFrontEndThreads);
cmd.setNumberOfUrbEntries(1);
cmd.setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
cmd.setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
cmd.setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
uint32_t lowAddress = static_cast<uint32_t>(0xFFFFFFFF & scratchAddress);
uint32_t highAddress = static_cast<uint32_t>(0xFFFFFFFF & (scratchAddress >> 32));
pMediaVfeState->setScratchSpaceBasePointer(lowAddress);
pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress);
cmd.setScratchSpaceBasePointer(lowAddress);
cmd.setScratchSpaceBasePointerHigh(highAddress);
programAdditionalFieldsInVfeState(pMediaVfeState, hwInfo);
programAdditionalFieldsInVfeState(&cmd, hwInfo);
*pMediaVfeState = cmd;
return scratchSpaceAddressOffset;
}

View File

@@ -28,41 +28,41 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
bool isMultiOsContextCapable) {
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
*pCmd = GfxFamily::cmdInitStateBaseAddress;
STATE_BASE_ADDRESS cmd = GfxFamily::cmdInitStateBaseAddress;
if (dsh) {
pCmd->setDynamicStateBaseAddressModifyEnable(true);
pCmd->setDynamicStateBufferSizeModifyEnable(true);
pCmd->setDynamicStateBaseAddress(dsh->getHeapGpuBase());
pCmd->setDynamicStateBufferSize(dsh->getHeapSizeInPages());
cmd.setDynamicStateBaseAddressModifyEnable(true);
cmd.setDynamicStateBufferSizeModifyEnable(true);
cmd.setDynamicStateBaseAddress(dsh->getHeapGpuBase());
cmd.setDynamicStateBufferSize(dsh->getHeapSizeInPages());
}
if (ioh) {
pCmd->setIndirectObjectBaseAddressModifyEnable(true);
pCmd->setIndirectObjectBufferSizeModifyEnable(true);
pCmd->setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
pCmd->setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
cmd.setIndirectObjectBaseAddressModifyEnable(true);
cmd.setIndirectObjectBufferSizeModifyEnable(true);
cmd.setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
cmd.setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
}
if (ssh) {
pCmd->setSurfaceStateBaseAddressModifyEnable(true);
pCmd->setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
cmd.setSurfaceStateBaseAddressModifyEnable(true);
cmd.setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
}
if (setInstructionStateBaseAddress) {
pCmd->setInstructionBaseAddressModifyEnable(true);
pCmd->setInstructionBaseAddress(internalHeapBase);
pCmd->setInstructionBufferSizeModifyEnable(true);
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
pCmd->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
cmd.setInstructionBaseAddressModifyEnable(true);
cmd.setInstructionBaseAddress(internalHeapBase);
cmd.setInstructionBufferSizeModifyEnable(true);
cmd.setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
cmd.setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
}
if (setGeneralStateBaseAddress) {
pCmd->setGeneralStateBaseAddressModifyEnable(true);
pCmd->setGeneralStateBufferSizeModifyEnable(true);
cmd.setGeneralStateBaseAddressModifyEnable(true);
cmd.setGeneralStateBufferSizeModifyEnable(true);
// GSH must be set to 0 for stateless
pCmd->setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
pCmd->setGeneralStateBufferSize(0xfffff);
cmd.setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
cmd.setGeneralStateBufferSize(0xfffff);
}
if (DebugManager.flags.OverrideStatelessMocsIndex.get() != -1) {
@@ -71,9 +71,11 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
statelessMocsIndex = statelessMocsIndex << 1;
pCmd->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
cmd.setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
appendStateBaseAddressParameters(pCmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
appendStateBaseAddressParameters(&cmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
*pCmd = cmd;
}
} // namespace NEO