2020-01-16 00:02:47 +08:00
/*
2022-02-04 21:59:01 +08:00
* Copyright (C) 2020-2022 Intel Corporation
2020-01-16 00:02:47 +08:00
*
* SPDX-License-Identifier: MIT
*
*/
2020-02-24 05:44:01 +08:00
#include "shared/source/command_container/command_encoder.h"
2022-04-06 22:41:45 +08:00
#include "shared/source/command_stream/command_stream_receiver.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "shared/source/direct_submission/direct_submission_hw.h"
2020-03-27 23:32:07 +08:00
#include "shared/source/direct_submission/direct_submission_hw_diagnostic_mode.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/helpers/flush_stamp.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
2020-07-17 17:28:59 +08:00
#include "shared/source/memory_manager/memory_operations_handler.h"
2022-04-06 22:41:45 +08:00
#include "shared/source/os_interface/hw_info_config.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/os_interface/os_context.h"
#include "shared/source/utilities/cpu_info.h"
#include "shared/source/utilities/cpuintrinsics.h"
2020-01-16 00:02:47 +08:00
2021-06-05 18:09:29 +08:00
#include "create_direct_submission_hw.inl"
2020-01-16 00:02:47 +08:00
#include <cstring>
namespace NEO {
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
2022-04-20 21:55:31 +08:00
DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmissionInputParams &inputParams)
: osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex) {
memoryManager = inputParams.memoryManager;
globalFenceAllocation = inputParams.globalFenceAllocation;
hwInfo = inputParams.rootDeviceEnvironment.getHardwareInfo();
memoryOperationHandler = inputParams.rootDeviceEnvironment.memoryOperationsInterface.get();
2022-04-06 22:41:45 +08:00
auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily);
2020-01-16 00:02:47 +08:00
2020-09-22 23:49:06 +08:00
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
if (DebugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) {
disableCacheFlush = !!DebugManager.flags.DirectSubmissionDisableCacheFlush.get();
}
2020-03-27 23:32:07 +08:00
2022-04-06 22:41:45 +08:00
miMemFenceRequired = hwInfoConfig->isGlobalFenceInCommandStreamRequired(*hwInfo);
if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 0) {
miMemFenceRequired = false;
}
2022-04-06 19:32:56 +08:00
if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() != -1) {
sfenceMode = static_cast<DirectSubmissionSfenceMode>(DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get());
}
2020-01-16 00:02:47 +08:00
int32_t disableCacheFlushKey = DebugManager.flags.DirectSubmissionDisableCpuCacheFlush.get();
if (disableCacheFlushKey != -1) {
disableCpuCacheFlush = disableCacheFlushKey == 1 ? true : false;
}
2021-11-03 20:05:33 +08:00
UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush);
2020-03-27 23:32:07 +08:00
createDiagnostic();
2021-11-30 22:41:26 +08:00
setPostSyncOffset();
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
2020-03-27 23:32:07 +08:00
DirectSubmissionHw<GfxFamily, Dispatcher>::~DirectSubmissionHw() = default;
2020-01-16 00:02:47 +08:00
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
2020-01-16 00:02:47 +08:00
DirectSubmissionAllocations allocations;
bool isMultiOsContextCapable = osContext.getNumSupportedDevices() > 1u;
constexpr size_t minimumRequiredSize = 256 * MemoryConstants::kiloByte;
constexpr size_t additionalAllocationSize = MemoryConstants::pageSize;
const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k);
2022-04-20 21:55:31 +08:00
const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex,
2020-01-16 00:02:47 +08:00
true, allocationSize,
2022-02-04 21:59:01 +08:00
AllocationType::RING_BUFFER,
2022-03-17 02:41:29 +08:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
2020-01-16 00:02:47 +08:00
ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
UNRECOVERABLE_IF(ringBuffer == nullptr);
allocations.push_back(ringBuffer);
ringBuffer2 = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
UNRECOVERABLE_IF(ringBuffer2 == nullptr);
allocations.push_back(ringBuffer2);
2022-04-20 21:55:31 +08:00
const AllocationProperties semaphoreAllocationProperties{rootDeviceIndex,
2020-01-16 00:02:47 +08:00
true, MemoryConstants::pageSize,
2022-02-04 21:59:01 +08:00
AllocationType::SEMAPHORE_BUFFER,
2022-03-17 02:41:29 +08:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
2020-01-16 00:02:47 +08:00
semaphores = memoryManager->allocateGraphicsMemoryWithProperties(semaphoreAllocationProperties);
UNRECOVERABLE_IF(semaphores == nullptr);
allocations.push_back(semaphores);
2021-10-13 05:28:34 +08:00
if (this->workPartitionAllocation != nullptr) {
allocations.push_back(workPartitionAllocation);
}
2021-11-23 19:16:29 +08:00
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer->getGpuAddress(),
ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBuffer(),
ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBufferSize());
printf("Ring buffer 2 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer2->getGpuAddress(),
ptrOffset(ringBuffer2->getGpuAddress(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBuffer(),
ptrOffset(ringBuffer2->getUnderlyingBuffer(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBufferSize());
}
2020-01-16 00:02:47 +08:00
handleResidency();
ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(ringBuffer);
memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize);
memset(ringBuffer2->getUnderlyingBuffer(), 0, allocationSize);
semaphorePtr = semaphores->getUnderlyingBuffer();
semaphoreGpuVa = semaphores->getGpuAddress();
semaphoreData = static_cast<volatile RingSemaphoreData *>(semaphorePtr);
memset(semaphorePtr, 0, sizeof(RingSemaphoreData));
semaphoreData->QueueWorkCount = 0;
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
2020-08-18 19:54:23 +08:00
workloadModeOneStoreAddress = static_cast<volatile void *>(&semaphoreData->DiagnosticModeCounter);
2020-03-27 23:32:07 +08:00
*static_cast<volatile uint32_t *>(workloadModeOneStoreAddress) = 0u;
2020-07-17 17:28:59 +08:00
2021-09-28 07:27:46 +08:00
this->gpuVaForMiFlush = this->semaphoreGpuVa + offsetof(RingSemaphoreData, miFlushSpace);
2021-07-29 14:40:42 +08:00
2020-07-17 17:28:59 +08:00
auto ret = makeResourcesResident(allocations);
return ret && allocateOsResources();
}
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::makeResourcesResident(DirectSubmissionAllocations &allocations) {
2022-04-20 21:55:31 +08:00
auto ret = memoryOperationHandler->makeResidentWithinOsContext(&this->osContext, ArrayRef<GraphicsAllocation *>(allocations), false) == MemoryOperationsStatus::SUCCESS;
2020-07-17 17:28:59 +08:00
return ret;
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::cpuCachelineFlush(void *ptr, size_t size) {
2020-01-16 00:02:47 +08:00
if (disableCpuCacheFlush) {
return;
}
constexpr size_t cachlineBit = 6;
static_assert(MemoryConstants::cacheLineSize == 1 << cachlineBit, "cachlineBit has invalid value");
char *flushPtr = reinterpret_cast<char *>(ptr);
char *flushEndPtr = reinterpret_cast<char *>(ptr) + size;
flushPtr = alignDown(flushPtr, MemoryConstants::cacheLineSize);
flushEndPtr = alignUp(flushEndPtr, MemoryConstants::cacheLineSize);
size_t cachelines = (flushEndPtr - flushPtr) >> cachlineBit;
for (size_t i = 0; i < cachelines; i++) {
CpuIntrinsics::clFlush(flushPtr);
flushPtr += MemoryConstants::cacheLineSize;
}
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
2021-12-23 03:28:02 +08:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bool useNotify) {
useNotifyForPostSync = useNotify;
2020-01-16 00:02:47 +08:00
bool ret = allocateResources();
2020-03-27 23:32:07 +08:00
initDiagnostic(submitOnInit);
2020-01-16 00:02:47 +08:00
if (ret && submitOnInit) {
2020-03-27 03:13:10 +08:00
size_t startBufferSize = Dispatcher::getSizePreemption() +
2020-01-16 00:02:47 +08:00
getSizeSemaphoreSection();
2021-10-26 19:53:01 +08:00
2020-03-27 03:13:10 +08:00
Dispatcher::dispatchPreemption(ringCommandStream);
2021-10-13 05:28:34 +08:00
if (this->partitionedMode) {
2021-11-18 03:51:43 +08:00
startBufferSize += getSizePartitionRegisterConfigurationSection();
dispatchPartitionRegisterConfiguration();
2021-10-26 19:53:01 +08:00
this->partitionConfigSet = true;
2021-10-13 05:28:34 +08:00
}
2022-04-06 22:41:45 +08:00
if (this->miMemFenceRequired) {
startBufferSize += getSizeSystemMemoryFenceAddress();
dispatchSystemMemoryFenceAddress();
this->systemMemoryFenceAddressSet = true;
}
2020-05-19 00:48:43 +08:00
if (workloadMode == 1) {
dispatchDiagnosticModeSection();
2020-08-18 19:54:23 +08:00
startBufferSize += getDiagnosticModeSection();
2020-05-19 00:48:43 +08:00
}
2020-08-18 19:54:23 +08:00
dispatchSemaphoreSection(currentQueueWorkCount);
2020-01-16 00:02:47 +08:00
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
2020-03-27 23:32:07 +08:00
performDiagnosticMode();
2020-01-16 00:02:47 +08:00
return ringStart;
}
return ret;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
2020-01-16 00:02:47 +08:00
if (ringStart) {
return true;
}
2021-08-25 23:19:44 +08:00
2020-01-16 00:02:47 +08:00
size_t startSize = getSizeSemaphoreSection();
2021-10-26 19:53:01 +08:00
if (!this->partitionConfigSet) {
2021-11-18 03:51:43 +08:00
startSize += getSizePartitionRegisterConfigurationSection();
2021-10-26 19:53:01 +08:00
}
2022-04-06 22:41:45 +08:00
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
startSize += getSizeSystemMemoryFenceAddress();
}
2020-01-16 00:02:47 +08:00
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
if (ringCommandStream.getAvailableSpace() < requiredSize) {
switchRingBuffers();
}
uint64_t gpuStartVa = getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0));
2021-10-26 19:53:01 +08:00
if (!this->partitionConfigSet) {
2021-11-18 03:51:43 +08:00
dispatchPartitionRegisterConfiguration();
2021-10-26 19:53:01 +08:00
this->partitionConfigSet = true;
}
2022-04-06 22:41:45 +08:00
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
dispatchSystemMemoryFenceAddress();
this->systemMemoryFenceAddressSet = true;
}
2020-01-16 00:02:47 +08:00
currentQueueWorkCount++;
dispatchSemaphoreSection(currentQueueWorkCount);
ringStart = submit(gpuStartVa, startSize);
return ringStart;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
2021-08-25 23:19:44 +08:00
if (!ringStart) {
return true;
}
2020-01-16 00:02:47 +08:00
void *flushPtr = ringCommandStream.getSpace(0);
2021-07-29 14:40:42 +08:00
Dispatcher::dispatchCacheFlush(ringCommandStream, *hwInfo, gpuVaForMiFlush);
2020-03-23 17:14:50 +08:00
if (disableMonitorFence) {
2020-03-18 21:14:04 +08:00
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2021-12-23 03:28:02 +08:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, this->useNotifyForPostSync, this->partitionedMode);
2020-03-18 21:14:04 +08:00
}
2020-03-27 23:32:07 +08:00
Dispatcher::dispatchStopCommandBuffer(ringCommandStream);
2021-07-30 17:56:58 +08:00
auto bytesToPad = Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer();
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
2021-07-30 17:56:58 +08:00
2020-01-16 00:02:47 +08:00
cpuCachelineFlush(flushPtr, getSizeEnd());
semaphoreData->QueueWorkCount = currentQueueWorkCount;
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
2021-08-25 23:19:44 +08:00
this->handleStopRingBuffer();
2021-07-30 17:56:58 +08:00
this->ringStart = false;
2020-01-16 00:02:47 +08:00
return true;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
2020-03-27 23:32:07 +08:00
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
2020-01-16 00:02:47 +08:00
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
2022-03-30 22:11:26 +08:00
2020-08-18 17:27:44 +08:00
dispatchDisablePrefetcher(true);
2020-01-16 00:02:47 +08:00
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
semaphoreGpuVa,
value,
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
2022-03-30 22:11:26 +08:00
2022-04-06 22:41:45 +08:00
if (miMemFenceRequired) {
2022-04-19 22:44:06 +08:00
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(ringCommandStream, 0, true, *hwInfo);
2022-03-30 22:11:26 +08:00
}
2020-08-18 17:27:44 +08:00
dispatchPrefetchMitigation();
dispatchDisablePrefetcher(false);
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection() {
2020-01-16 00:02:47 +08:00
size_t semaphoreSize = EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
2020-08-18 17:27:44 +08:00
semaphoreSize += getSizePrefetchMitigation();
semaphoreSize += 2 * getSizeDisablePrefetcher();
2022-03-30 22:11:26 +08:00
2022-04-06 22:41:45 +08:00
if (miMemFenceRequired) {
2022-04-19 22:44:06 +08:00
semaphoreSize += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(*hwInfo);
2022-03-30 22:11:26 +08:00
}
2020-08-18 17:27:44 +08:00
return semaphoreSize;
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStartSection(uint64_t gpuStartAddress) {
Dispatcher::dispatchStartCommandBuffer(ringCommandStream, gpuStartAddress);
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeStartSection() {
return Dispatcher::getSizeStartCommandBuffer();
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress) {
2021-03-29 20:23:38 +08:00
if (disableMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2021-12-23 03:28:02 +08:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, this->useNotifyForPostSync, this->partitionedMode);
2021-03-29 20:23:38 +08:00
}
2020-03-27 03:13:10 +08:00
Dispatcher::dispatchStartCommandBuffer(ringCommandStream, nextBufferGpuAddress);
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSwitchRingBufferSection() {
2021-03-29 20:23:38 +08:00
size_t size = Dispatcher::getSizeStartCommandBuffer();
if (disableMonitorFence) {
size += Dispatcher::getSizeMonitorFence(*hwInfo);
}
return size;
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
2020-03-27 23:32:07 +08:00
size_t size = Dispatcher::getSizeStopCommandBuffer() +
2021-07-30 17:56:58 +08:00
Dispatcher::getSizeCacheFlush(*hwInfo) +
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
MemoryConstants::cacheLineSize;
2020-03-23 17:14:50 +08:00
if (disableMonitorFence) {
2020-03-27 23:32:07 +08:00
size += Dispatcher::getSizeMonitorFence(*hwInfo);
2020-03-18 21:14:04 +08:00
}
return size;
2020-01-16 00:02:47 +08:00
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::getCommandBufferPositionGpuAddress(void *position) {
2020-01-16 00:02:47 +08:00
void *currentBase = ringCommandStream.getCpuBase();
size_t offset = ptrDiff(position, currentBase);
return ringCommandStream.getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(offset);
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
2020-03-18 21:14:04 +08:00
size_t size = getSizeSemaphoreSection();
2020-03-23 17:14:50 +08:00
if (workloadMode == 0) {
2020-03-18 21:14:04 +08:00
size += getSizeStartSection();
2020-03-23 17:14:50 +08:00
} else if (workloadMode == 1) {
2020-05-19 00:48:43 +08:00
size += getDiagnosticModeSection();
2020-03-18 21:14:04 +08:00
}
//mode 2 does not dispatch any commands
2020-03-23 17:14:50 +08:00
if (!disableCacheFlush) {
2020-03-27 23:32:07 +08:00
size += Dispatcher::getSizeCacheFlush(*hwInfo);
2020-03-18 21:14:04 +08:00
}
2020-03-23 17:14:50 +08:00
if (!disableMonitorFence) {
2020-03-27 23:32:07 +08:00
size += Dispatcher::getSizeMonitorFence(*hwInfo);
2020-03-18 21:14:04 +08:00
}
2021-04-14 22:40:23 +08:00
size += getSizeNewResourceHandler();
2020-03-18 21:14:04 +08:00
return size;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBuffer &batchBuffer) {
2020-03-18 21:14:04 +08:00
void *currentPosition = ringCommandStream.getSpace(0);
2020-03-23 17:14:50 +08:00
2021-11-23 19:16:29 +08:00
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Client buffer:\n");
printf("Command buffer allocation - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
batchBuffer.commandBufferAllocation->getGpuAddress(),
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBuffer(),
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBufferSize());
printf("Command buffer - start gpu address: %" PRIx64 " - %" PRIx64 ", start cpu address: %p - %p, start offset: %zu, used size: %zu \n",
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset),
ptrOffset(ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset), batchBuffer.usedSize),
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
ptrOffset(ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset), batchBuffer.usedSize),
batchBuffer.startOffset,
batchBuffer.usedSize);
}
2020-03-23 17:14:50 +08:00
if (workloadMode == 0) {
2020-03-18 21:14:04 +08:00
auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
void *returnCmd = batchBuffer.endCmdPtr;
dispatchStartSection(commandStreamAddress);
void *returnPosition = ringCommandStream.getSpace(0);
setReturnAddress(returnCmd, getCommandBufferPositionGpuAddress(returnPosition));
2020-03-23 17:14:50 +08:00
} else if (workloadMode == 1) {
2020-03-27 23:32:07 +08:00
DirectSubmissionDiagnostics::diagnosticModeOneDispatch(diagnostic.get());
2020-05-19 00:48:43 +08:00
dispatchDiagnosticModeSection();
2020-03-18 21:14:04 +08:00
}
//mode 2 does not dispatch any commands
2020-03-23 17:14:50 +08:00
if (!disableCacheFlush) {
2021-07-29 14:40:42 +08:00
Dispatcher::dispatchCacheFlush(ringCommandStream, *hwInfo, gpuVaForMiFlush);
2020-03-18 21:14:04 +08:00
}
2020-03-23 17:14:50 +08:00
if (!disableMonitorFence) {
2020-03-18 21:14:04 +08:00
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2021-12-23 03:28:02 +08:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, this->useNotifyForPostSync, this->partitionedMode);
2020-03-18 21:14:04 +08:00
}
dispatchSemaphoreSection(currentQueueWorkCount + 1);
return currentPosition;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) {
//for now workloads requiring cache coherency are not supported
UNRECOVERABLE_IF(batchBuffer.requiresCoherency);
2021-07-30 17:56:58 +08:00
this->startRingBuffer();
2020-01-16 00:02:47 +08:00
size_t dispatchSize = getSizeDispatch();
size_t cycleSize = getSizeSwitchRingBufferSection();
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd();
bool buffersSwitched = false;
2021-09-28 07:27:46 +08:00
getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0));
2020-01-16 00:02:47 +08:00
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
2021-09-28 07:27:46 +08:00
switchRingBuffers();
2020-01-16 00:02:47 +08:00
buffersSwitched = true;
}
2021-04-14 22:40:23 +08:00
handleNewResourcesSubmission();
2020-03-18 21:14:04 +08:00
void *currentPosition = dispatchWorkloadSection(batchBuffer);
2020-01-16 00:02:47 +08:00
2021-07-30 17:56:58 +08:00
cpuCachelineFlush(currentPosition, dispatchSize);
handleResidency();
2020-01-16 00:02:47 +08:00
2022-03-26 02:13:00 +08:00
if (DebugManager.flags.DirectSubmissionReadBackCommandBuffer.get() == 1) {
volatile auto cmdBufferStart = reinterpret_cast<uint32_t *>(batchBuffer.commandBufferAllocation->getUnderlyingBuffer());
reserved = *cmdBufferStart;
}
if (DebugManager.flags.DirectSubmissionReadBackRingBuffer.get() == 1) {
volatile auto ringBufferStart = reinterpret_cast<uint32_t *>(ringCommandStream.getSpace(0));
reserved = *ringBufferStart;
}
2022-04-06 19:32:56 +08:00
if (sfenceMode >= DirectSubmissionSfenceMode::BeforeSemaphoreOnly) {
2022-04-01 19:47:24 +08:00
CpuIntrinsics::sfence();
}
2020-01-16 00:02:47 +08:00
//unblock GPU
semaphoreData->QueueWorkCount = currentQueueWorkCount;
2022-04-01 19:47:24 +08:00
2022-04-06 19:32:56 +08:00
if (sfenceMode == DirectSubmissionSfenceMode::BeforeAndAfterSemaphore) {
2022-04-01 19:47:24 +08:00
CpuIntrinsics::sfence();
}
2020-01-16 00:02:47 +08:00
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
currentQueueWorkCount++;
2020-03-27 23:32:07 +08:00
DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get());
2021-07-30 17:56:58 +08:00
2020-01-16 00:02:47 +08:00
uint64_t flushValue = updateTagValue();
flushStamp.setStamp(flushValue);
return ringStart;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::setReturnAddress(void *returnCmd, uint64_t returnAddress) {
2020-01-16 00:02:47 +08:00
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
2020-04-09 00:33:03 +08:00
MI_BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart;
2021-12-17 02:02:45 +08:00
cmd.setBatchBufferStartAddress(returnAddress);
2020-04-09 00:33:03 +08:00
cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
2020-01-16 00:02:47 +08:00
MI_BATCH_BUFFER_START *returnBBStart = static_cast<MI_BATCH_BUFFER_START *>(returnCmd);
2020-04-09 00:33:03 +08:00
*returnBBStart = cmd;
2020-01-16 00:02:47 +08:00
}
2020-08-18 17:27:44 +08:00
template <typename GfxFamily, typename Dispatcher>
2021-04-14 22:40:23 +08:00
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::handleNewResourcesSubmission() {
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeNewResourceHandler() {
return 0u;
}
2020-07-17 17:28:59 +08:00
template <typename GfxFamily, typename Dispatcher>
inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers() {
GraphicsAllocation *nextRingBuffer = switchRingBuffersAllocations();
void *flushPtr = ringCommandStream.getSpace(0);
uint64_t currentBufferGpuVa = getCommandBufferPositionGpuAddress(flushPtr);
if (ringStart) {
dispatchSwitchRingBufferSection(nextRingBuffer->getGpuAddress());
cpuCachelineFlush(flushPtr, getSizeSwitchRingBufferSection());
}
ringCommandStream.replaceBuffer(nextRingBuffer->getUnderlyingBuffer(), ringCommandStream.getMaxAvailableSpace());
ringCommandStream.replaceGraphicsAllocation(nextRingBuffer);
handleSwitchRingBuffers();
return currentBufferGpuVa;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
inline GraphicsAllocation *DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffersAllocations() {
2020-01-16 00:02:47 +08:00
GraphicsAllocation *nextAllocation = nullptr;
if (currentRingBuffer == RingBufferUse::FirstBuffer) {
nextAllocation = ringBuffer2;
currentRingBuffer = RingBufferUse::SecondBuffer;
} else {
nextAllocation = ringBuffer;
currentRingBuffer = RingBufferUse::FirstBuffer;
}
return nextAllocation;
}
2020-03-27 03:13:10 +08:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::deallocateResources() {
2020-01-16 00:02:47 +08:00
if (ringBuffer) {
memoryManager->freeGraphicsMemory(ringBuffer);
ringBuffer = nullptr;
}
if (ringBuffer2) {
memoryManager->freeGraphicsMemory(ringBuffer2);
ringBuffer2 = nullptr;
}
if (semaphores) {
memoryManager->freeGraphicsMemory(semaphores);
semaphores = nullptr;
}
}
2020-03-27 23:32:07 +08:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::createDiagnostic() {
if (directSubmissionDiagnosticAvailable) {
workloadMode = DebugManager.flags.DirectSubmissionEnableDebugBuffer.get();
if (workloadMode > 0) {
disableCacheFlush = DebugManager.flags.DirectSubmissionDisableCacheFlush.get();
disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
uint32_t executions = static_cast<uint32_t>(DebugManager.flags.DirectSubmissionDiagnosticExecutionCount.get());
2020-04-03 20:43:13 +08:00
diagnostic = std::make_unique<DirectSubmissionDiagnosticsCollector>(
executions,
workloadMode == 1,
DebugManager.flags.DirectSubmissionBufferPlacement.get(),
DebugManager.flags.DirectSubmissionSemaphorePlacement.get(),
workloadMode,
disableCacheFlush,
disableMonitorFence);
2020-03-27 23:32:07 +08:00
}
}
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::initDiagnostic(bool &submitOnInit) {
if (directSubmissionDiagnosticAvailable) {
if (diagnostic.get()) {
submitOnInit = true;
diagnostic->diagnosticModeAllocation();
}
}
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::performDiagnosticMode() {
if (directSubmissionDiagnosticAvailable) {
if (diagnostic.get()) {
diagnostic->diagnosticModeDiagnostic();
2020-05-19 00:48:43 +08:00
if (workloadMode == 1) {
diagnostic->diagnosticModeOneWait(workloadModeOneStoreAddress, workloadModeOneExpectedValue);
}
2020-03-27 23:32:07 +08:00
BatchBuffer dummyBuffer = {};
FlushStampTracker dummyTracker(true);
for (uint32_t execution = 0; execution < diagnostic->getExecutionsCount(); execution++) {
dispatchCommandBuffer(dummyBuffer, dummyTracker);
if (workloadMode == 1) {
2020-05-19 00:48:43 +08:00
diagnostic->diagnosticModeOneWaitCollect(execution, workloadModeOneStoreAddress, workloadModeOneExpectedValue);
2020-03-27 23:32:07 +08:00
}
}
workloadMode = 0;
2020-09-22 23:49:06 +08:00
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
2020-03-27 23:32:07 +08:00
diagnostic.reset(nullptr);
}
}
}
2020-05-19 00:48:43 +08:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchDiagnosticModeSection() {
workloadModeOneExpectedValue++;
uint64_t storeAddress = semaphoreGpuVa;
storeAddress += ptrDiff(workloadModeOneStoreAddress, semaphorePtr);
Dispatcher::dispatchStoreDwordCommand(ringCommandStream, storeAddress, workloadModeOneExpectedValue);
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getDiagnosticModeSection() {
return Dispatcher::getSizeStoreDwordCommand();
}
2022-04-06 22:41:45 +08:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSystemMemoryFenceAddress() {
2022-04-07 21:03:01 +08:00
UNRECOVERABLE_IF(!this->globalFenceAllocation);
EncodeMemoryFence<GfxFamily>::encodeSystemMemoryFence(ringCommandStream, this->globalFenceAllocation);
2022-04-06 22:41:45 +08:00
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSystemMemoryFenceAddress() {
return EncodeMemoryFence<GfxFamily>::getSystemMemoryFenceSize();
}
2020-01-16 00:02:47 +08:00
} // namespace NEO