Do not use direct submission in internal and low priority contexts

Change-Id: Ifac52dd36737151ea4d84bec95750e1716cafa9a
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-03-03 23:33:31 +01:00
committed by sys_ocldev
parent 4181102ff1
commit ad4925aef9
51 changed files with 432 additions and 185 deletions

View File

@@ -124,7 +124,8 @@ bool Device::createEngine(uint32_t deviceCsrIndex, aub_stream::EngineType engine
bool lowPriority = (deviceCsrIndex == HwHelper::lowPriorityGpgpuEngineIndex);
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineType,
getDeviceBitfield(), preemptionMode, lowPriority);
getDeviceBitfield(), preemptionMode,
lowPriority, internalUsage, false);
commandStreamReceiver->setupContext(*osContext);
if (!commandStreamReceiver->initializeTagAllocation()) {

View File

@@ -98,7 +98,7 @@ void RootDevice::initializeRootCommandStreamReceiver() {
auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(hwInfo);
auto osContext = getMemoryManager()->createAndRegisterOsContext(rootCommandStreamReceiver.get(), defaultEngineType,
getDeviceBitfield(), preemptionMode, false);
getDeviceBitfield(), preemptionMode, false, false, true);
rootCommandStreamReceiver->setupContext(*osContext);
rootCommandStreamReceiver->initializeTagAllocation();

View File

@@ -89,7 +89,7 @@ bool DirectSubmissionHw<GfxFamily>::allocateResources() {
}
template <typename GfxFamily>
void DirectSubmissionHw<GfxFamily>::cpuCachelineFlush(void *ptr, size_t size) {
inline void DirectSubmissionHw<GfxFamily>::cpuCachelineFlush(void *ptr, size_t size) {
if (disableCpuCacheFlush) {
return;
}
@@ -156,7 +156,7 @@ bool DirectSubmissionHw<GfxFamily>::stopRingBuffer() {
}
template <typename GfxFamily>
void *DirectSubmissionHw<GfxFamily>::dispatchSemaphoreSection(uint32_t value) {
inline void *DirectSubmissionHw<GfxFamily>::dispatchSemaphoreSection(uint32_t value) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
@@ -172,13 +172,13 @@ void *DirectSubmissionHw<GfxFamily>::dispatchSemaphoreSection(uint32_t value) {
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeSemaphoreSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeSemaphoreSection() {
size_t semaphoreSize = EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
return (semaphoreSize + prefetchSize);
}
template <typename GfxFamily>
void DirectSubmissionHw<GfxFamily>::dispatchStartSection(uint64_t gpuStartAddress) {
inline void DirectSubmissionHw<GfxFamily>::dispatchStartSection(uint64_t gpuStartAddress) {
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
auto bbufferStart = ringCommandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
*bbufferStart = GfxFamily::cmdInitBatchBufferStart;
@@ -188,14 +188,14 @@ void DirectSubmissionHw<GfxFamily>::dispatchStartSection(uint64_t gpuStartAddres
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeStartSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeStartSection() {
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
size_t size = sizeof(MI_BATCH_BUFFER_START);
return size;
}
template <typename GfxFamily>
void DirectSubmissionHw<GfxFamily>::dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress) {
inline void DirectSubmissionHw<GfxFamily>::dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress) {
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
auto bbufferStart = ringCommandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
@@ -206,38 +206,38 @@ void DirectSubmissionHw<GfxFamily>::dispatchSwitchRingBufferSection(uint64_t nex
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeSwitchRingBufferSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeSwitchRingBufferSection() {
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
return sizeof(MI_BATCH_BUFFER_START);
}
template <typename GfxFamily>
void *DirectSubmissionHw<GfxFamily>::dispatchFlushSection() {
inline void *DirectSubmissionHw<GfxFamily>::dispatchFlushSection() {
void *currentPosition = ringCommandStream.getSpace(0);
cmdDispatcher->dispatchCacheFlush(ringCommandStream, *hwInfo);
return currentPosition;
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeFlushSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeFlushSection() {
return cmdDispatcher->getSizeCacheFlush(*hwInfo);
}
template <typename GfxFamily>
void *DirectSubmissionHw<GfxFamily>::dispatchTagUpdateSection(uint64_t address, uint64_t value) {
inline void *DirectSubmissionHw<GfxFamily>::dispatchTagUpdateSection(uint64_t address, uint64_t value) {
void *currentPosition = ringCommandStream.getSpace(0);
cmdDispatcher->dispatchMonitorFence(ringCommandStream, address, value, *hwInfo);
return currentPosition;
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeTagUpdateSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeTagUpdateSection() {
size_t size = cmdDispatcher->getSizeMonitorFence(*hwInfo);
return size;
}
template <typename GfxFamily>
void DirectSubmissionHw<GfxFamily>::dispatchEndingSection() {
inline void DirectSubmissionHw<GfxFamily>::dispatchEndingSection() {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
auto bbufferEnd = ringCommandStream.getSpaceForCmd<MI_BATCH_BUFFER_END>();
@@ -245,13 +245,13 @@ void DirectSubmissionHw<GfxFamily>::dispatchEndingSection() {
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeEndingSection() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeEndingSection() {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
return sizeof(MI_BATCH_BUFFER_END);
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeDispatch() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeDispatch() {
return getSizeStartSection() +
getSizeFlushSection() +
getSizeTagUpdateSection() +
@@ -259,13 +259,13 @@ size_t DirectSubmissionHw<GfxFamily>::getSizeDispatch() {
}
template <typename GfxFamily>
size_t DirectSubmissionHw<GfxFamily>::getSizeEnd() {
inline size_t DirectSubmissionHw<GfxFamily>::getSizeEnd() {
return getSizeEndingSection() +
getSizeFlushSection();
}
template <typename GfxFamily>
uint64_t DirectSubmissionHw<GfxFamily>::getCommandBufferPositionGpuAddress(void *position) {
inline uint64_t DirectSubmissionHw<GfxFamily>::getCommandBufferPositionGpuAddress(void *position) {
void *currentBase = ringCommandStream.getCpuBase();
size_t offset = ptrDiff(position, currentBase);
@@ -325,7 +325,7 @@ bool DirectSubmissionHw<GfxFamily>::dispatchCommandBuffer(BatchBuffer &batchBuff
}
template <typename GfxFamily>
void DirectSubmissionHw<GfxFamily>::setReturnAddress(void *returnCmd, uint64_t returnAddress) {
inline void DirectSubmissionHw<GfxFamily>::setReturnAddress(void *returnCmd, uint64_t returnAddress) {
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
MI_BATCH_BUFFER_START *returnBBStart = static_cast<MI_BATCH_BUFFER_START *>(returnCmd);
@@ -333,7 +333,7 @@ void DirectSubmissionHw<GfxFamily>::setReturnAddress(void *returnCmd, uint64_t r
}
template <typename GfxFamily>
GraphicsAllocation *DirectSubmissionHw<GfxFamily>::switchRingBuffersAllocations() {
inline GraphicsAllocation *DirectSubmissionHw<GfxFamily>::switchRingBuffersAllocations() {
GraphicsAllocation *nextAllocation = nullptr;
if (currentRingBuffer == RingBufferUse::FirstBuffer) {
nextAllocation = ringBuffer2;

View File

@@ -15,6 +15,9 @@ namespace NEO {
struct DirectSubmissionProperties {
bool engineSupported = false;
bool submitOnInit = false;
bool useInternal = false;
bool useLowPriority = false;
bool useRootDevice = false;
};
using EngineDirectSubmissionInitVec = std::vector<std::pair<aub_stream::EngineType, DirectSubmissionProperties>>;

View File

@@ -13,21 +13,21 @@
namespace NEO {
template <typename GfxFamily>
void RenderDispatcher<GfxFamily>::dispatchPreemption(LinearStream &cmdBuffer) {
inline void RenderDispatcher<GfxFamily>::dispatchPreemption(LinearStream &cmdBuffer) {
PreemptionHelper::programCmdStream<GfxFamily>(cmdBuffer, PreemptionMode::MidBatch, PreemptionMode::Disabled, nullptr);
}
template <typename GfxFamily>
size_t RenderDispatcher<GfxFamily>::getSizePreemption() {
inline size_t RenderDispatcher<GfxFamily>::getSizePreemption() {
size_t size = PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(PreemptionMode::MidBatch, PreemptionMode::Disabled);
return size;
}
template <typename GfxFamily>
void RenderDispatcher<GfxFamily>::dispatchMonitorFence(LinearStream &cmdBuffer,
uint64_t gpuAddress,
uint64_t immediateData,
const HardwareInfo &hwInfo) {
inline void RenderDispatcher<GfxFamily>::dispatchMonitorFence(LinearStream &cmdBuffer,
uint64_t gpuAddress,
uint64_t immediateData,
const HardwareInfo &hwInfo) {
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
cmdBuffer,
@@ -39,18 +39,18 @@ void RenderDispatcher<GfxFamily>::dispatchMonitorFence(LinearStream &cmdBuffer,
}
template <typename GfxFamily>
size_t RenderDispatcher<GfxFamily>::getSizeMonitorFence(const HardwareInfo &hwInfo) {
inline size_t RenderDispatcher<GfxFamily>::getSizeMonitorFence(const HardwareInfo &hwInfo) {
size_t size = MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
return size;
}
template <typename GfxFamily>
void RenderDispatcher<GfxFamily>::dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo) {
inline void RenderDispatcher<GfxFamily>::dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo) {
MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(cmdBuffer);
}
template <typename GfxFamily>
size_t RenderDispatcher<GfxFamily>::getSizeCacheFlush(const HardwareInfo &hwInfo) {
inline size_t RenderDispatcher<GfxFamily>::getSizeCacheFlush(const HardwareInfo &hwInfo) {
size_t size = MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
return size;
}

View File

@@ -200,9 +200,12 @@ bool MemoryManager::isMemoryBudgetExhausted() const {
}
OsContext *MemoryManager::createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority) {
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice) {
auto contextId = ++latestContextId;
auto osContext = OsContext::create(peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->osInterface.get(), contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
auto osContext = OsContext::create(peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->osInterface.get(),
contextId, deviceBitfield, engineType, preemptionMode,
lowPriority, internalEngine, rootDevice);
UNRECOVERABLE_IF(!osContext->isInitialized());
osContext->incRefInternal();

View File

@@ -147,7 +147,8 @@ class MemoryManager {
const ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; }
OsContext *createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority);
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice);
uint32_t getRegisteredEnginesCount() const { return static_cast<uint32_t>(registeredEngines.size()); }
EngineControlContainer &getRegisteredEngines();
EngineControl *getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver);

View File

@@ -14,16 +14,21 @@
namespace NEO {
OsContext *OsContext::create(OSInterface *osInterface, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority) {
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice) {
if (osInterface) {
return new OsContextLinux(*osInterface->get()->getDrm(), contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
return new OsContextLinux(*osInterface->get()->getDrm(), contextId, deviceBitfield, engineType, preemptionMode,
lowPriority, internalEngine, rootDevice);
}
return new OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
return new OsContext(contextId, deviceBitfield, engineType, preemptionMode,
lowPriority, internalEngine, rootDevice);
}
OsContextLinux::OsContextLinux(Drm &drm, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority)
: OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority), drm(drm) {
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice)
: OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority, internalEngine, rootDevice),
drm(drm) {
for (auto deviceIndex = 0u; deviceIndex < deviceBitfield.size(); deviceIndex++) {
if (deviceBitfield.test(deviceIndex)) {
auto drmContextId = drm.createDrmContext();

View File

@@ -19,7 +19,8 @@ class OsContextLinux : public OsContext {
OsContextLinux() = delete;
~OsContextLinux() override;
OsContextLinux(Drm &drm, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority);
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice);
unsigned int getEngineFlag() const { return engineFlag; }
const std::vector<uint32_t> &getDrmContextIds() const { return drmContextIds; }

View File

@@ -22,29 +22,37 @@ class OsContext : public ReferenceTrackedObject<OsContext> {
OsContext() = delete;
static OsContext *create(OSInterface *osInterface, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority);
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice);
uint32_t getContextId() const { return contextId; }
uint32_t getNumSupportedDevices() const { return numSupportedDevices; }
DeviceBitfield getDeviceBitfield() const { return deviceBitfield; }
PreemptionMode getPreemptionMode() const { return preemptionMode; }
aub_stream::EngineType &getEngineType() { return engineType; }
bool isLowPriority() const { return lowPriority; }
bool isInternalEngine() const { return internalEngine; }
bool isRootDevice() const { return rootDevice; }
virtual bool isInitialized() const { return true; }
protected:
OsContext(uint32_t contextId, DeviceBitfield deviceBitfield, aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority)
OsContext(uint32_t contextId, DeviceBitfield deviceBitfield, aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice)
: contextId(contextId),
deviceBitfield(deviceBitfield),
preemptionMode(preemptionMode),
numSupportedDevices(static_cast<uint32_t>(deviceBitfield.count())),
engineType(engineType),
lowPriority(lowPriority) {}
lowPriority(lowPriority),
internalEngine(internalEngine),
rootDevice(rootDevice) {}
const uint32_t contextId;
const DeviceBitfield deviceBitfield;
const PreemptionMode preemptionMode;
const uint32_t numSupportedDevices;
aub_stream::EngineType engineType = aub_stream::ENGINE_RCS;
const bool lowPriority;
const bool lowPriority = false;
const bool internalEngine = false;
const bool rootDevice = false;
};
} // namespace NEO

View File

@@ -14,16 +14,21 @@
namespace NEO {
OsContext *OsContext::create(OSInterface *osInterface, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority) {
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice) {
if (osInterface) {
return new OsContextWin(*osInterface->get()->getWddm(), contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
return new OsContextWin(*osInterface->get()->getWddm(), contextId, deviceBitfield, engineType, preemptionMode,
lowPriority, internalEngine, rootDevice);
}
return new OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
return new OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority, internalEngine, rootDevice);
}
OsContextWin::OsContextWin(Wddm &wddm, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority)
: OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority), wddm(wddm), residencyController(wddm, contextId) {
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice)
: OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority, internalEngine, rootDevice),
wddm(wddm),
residencyController(wddm, contextId) {
auto wddmInterface = wddm.getWddmInterface();
if (!wddm.createContext(*this)) {

View File

@@ -26,7 +26,8 @@ class OsContextWin : public OsContext {
~OsContextWin() override;
OsContextWin(Wddm &wddm, uint32_t contextId, DeviceBitfield deviceBitfield,
aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority);
aub_stream::EngineType engineType, PreemptionMode preemptionMode,
bool lowPriority, bool internalEngine, bool rootDevice);
D3DKMT_HANDLE getWddmContextHandle() const { return wddmContextHandle; }
void setWddmContextHandle(D3DKMT_HANDLE wddmContextHandle) { this->wddmContextHandle = wddmContextHandle; }