Move generic command stream receiver files to shared

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2021-09-22 22:03:07 +00:00
committed by Compute-Runtime-Automation
parent 479035c839
commit f8867e0b97
98 changed files with 246 additions and 159 deletions

View File

@@ -8,6 +8,9 @@ set(NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h
@@ -15,6 +18,14 @@ set(NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tgllp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.inl
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h
${CMAKE_CURRENT_SOURCE_DIR}/csr_definitions.h
${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.h
@@ -50,10 +61,13 @@ set(NEO_CORE_COMMAND_STREAM
if(SUPPORT_XEHP_AND_LATER)
list(APPEND NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_xehp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_xehp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_xehp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/preemption_xehp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.h
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_xehp_and_later.inl
)
endif()

View File

@@ -0,0 +1,123 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/aub/aub_center.h"
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
#include "shared/source/memory_manager/page_table.h"
#include "shared/source/memory_manager/physical_address_allocator.h"
#include "shared/source/utilities/spinlock.h"
#include "aub_mapper.h"
namespace NEO {
class AubSubCaptureManager;
template <typename GfxFamily>
class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFamily> {
protected:
typedef CommandStreamReceiverSimulatedHw<GfxFamily> BaseClass;
using AUB = typename AUBFamilyMapper<GfxFamily>::AUB;
using ExternalAllocationsContainer = std::vector<AllocationView>;
using BaseClass::getParametersForWriteMemory;
using BaseClass::osContext;
public:
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::aubManager;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::hardwareContextController;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::engineInfo;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::stream;
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
void makeResidentExternal(AllocationView &allocationView);
void makeNonResidentExternal(uint64_t gpuAddress);
AubMemDump::AubFileStream *getAubStream() const {
return static_cast<AubMemDump::AubFileStream *>(this->stream);
}
void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override;
bool writeMemory(GraphicsAllocation &gfxAllocation) override;
MOCKABLE_VIRTUAL bool writeMemory(AllocationView &allocationView);
void writeMMIO(uint32_t offset, uint32_t value) override;
void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue);
bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override;
AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override;
void addAubComment(const char *message) override;
// Family specific version
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
void pollForCompletion() override;
void pollForCompletionImpl() override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
uint32_t getDumpHandle();
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
void dumpAllocation(GraphicsAllocation &gfxAllocation) override;
static CommandStreamReceiver *create(const std::string &fileName,
bool standalone,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
AUBCommandStreamReceiverHw(const std::string &fileName,
bool standalone,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
~AUBCommandStreamReceiverHw() override;
AUBCommandStreamReceiverHw(const AUBCommandStreamReceiverHw &) = delete;
AUBCommandStreamReceiverHw &operator=(const AUBCommandStreamReceiverHw &) = delete;
MOCKABLE_VIRTUAL void openFile(const std::string &fileName);
MOCKABLE_VIRTUAL bool reopenFile(const std::string &fileName);
MOCKABLE_VIRTUAL void initFile(const std::string &fileName);
MOCKABLE_VIRTUAL void closeFile();
MOCKABLE_VIRTUAL bool isFileOpen() const;
MOCKABLE_VIRTUAL const std::string getFileName();
MOCKABLE_VIRTUAL void initializeEngine();
std::unique_ptr<AubSubCaptureManager> subCaptureManager;
uint32_t aubDeviceId;
bool standalone;
std::unique_ptr<std::conditional<is64bit, PML4, PDPE>::type> ppgtt;
std::unique_ptr<PDPE> ggtt;
// remap CPU VA -> GGTT VA
AddressMapper *gttRemap;
MOCKABLE_VIRTUAL bool addPatchInfoComments();
void addGUCStartMessage(uint64_t batchBufferAddress);
uint32_t getGUCWorkQueueItemHeader();
CommandStreamReceiverType getType() override {
return CommandStreamReceiverType::CSR_AUB;
}
int getAddressSpaceFromPTEBits(uint64_t entryBits) const;
protected:
constexpr static uint32_t getMaskAndValueForPollForCompletion();
bool dumpAubNonWritable = false;
ExternalAllocationsContainer externalAllocations;
uint32_t pollForCompletionTaskCount = 0u;
SpinLock pollForCompletionLock;
};
} // namespace NEO

View File

@@ -0,0 +1,852 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub/aub_helper.h"
#include "shared/source/aub/aub_stream_provider.h"
#include "shared/source/aub/aub_subcapture.h"
#include "shared/source/aub_mem_dump/aub_alloc_dump.h"
#include "shared/source/aub_mem_dump/aub_alloc_dump.inl"
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/hardware_context_controller.h"
#include "shared/source/helpers/hash.h"
#include "shared/source/helpers/neo_driver_version.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_context.h"
#include "third_party/aub_stream/headers/aub_manager.h"
#include "third_party/aub_stream/headers/aubstream.h"
#include <algorithm>
#include <cstring>
namespace NEO {
template <typename GfxFamily>
AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const std::string &fileName,
bool standalone,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield)
: BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield),
standalone(standalone) {
executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->isLocalMemoryEnabled(), fileName, this->getType());
auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get();
UNRECOVERABLE_IF(nullptr == aubCenter);
auto subCaptureCommon = aubCenter->getSubCaptureCommon();
UNRECOVERABLE_IF(nullptr == subCaptureCommon);
subCaptureManager = std::make_unique<AubSubCaptureManager>(fileName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath());
aubManager = aubCenter->getAubManager();
if (!aubCenter->getPhysicalAddressAllocator()) {
aubCenter->initPhysicalAddressAllocator(this->createPhysicalAddressAllocator(&this->peekHwInfo()));
}
auto physicalAddressAllocator = aubCenter->getPhysicalAddressAllocator();
UNRECOVERABLE_IF(nullptr == physicalAddressAllocator);
ppgtt = std::make_unique<std::conditional<is64bit, PML4, PDPE>::type>(physicalAddressAllocator);
ggtt = std::make_unique<PDPE>(physicalAddressAllocator);
gttRemap = aubCenter->getAddressMapper();
UNRECOVERABLE_IF(nullptr == gttRemap);
auto streamProvider = aubCenter->getStreamProvider();
UNRECOVERABLE_IF(nullptr == streamProvider);
stream = streamProvider->getStream();
UNRECOVERABLE_IF(nullptr == stream);
this->dispatchMode = DispatchMode::BatchedDispatch;
if (DebugManager.flags.CsrDispatchMode.get()) {
this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get();
}
auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get();
this->aubDeviceId = debugDeviceId == -1
? this->peekHwInfo().capabilityTable.aubDeviceId
: static_cast<uint32_t>(debugDeviceId);
this->defaultSshSize = 64 * KB;
}
template <typename GfxFamily>
AUBCommandStreamReceiverHw<GfxFamily>::~AUBCommandStreamReceiverHw() {
if (osContext) {
pollForCompletion();
}
this->freeEngineInfo(*gttRemap);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::openFile(const std::string &fileName) {
auto streamLocked = getAubStream()->lockStream();
initFile(fileName);
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::reopenFile(const std::string &fileName) {
auto streamLocked = getAubStream()->lockStream();
if (isFileOpen()) {
if (fileName != getFileName()) {
closeFile();
this->freeEngineInfo(*gttRemap);
}
}
if (!isFileOpen()) {
initFile(fileName);
return true;
}
return false;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::initFile(const std::string &fileName) {
if (aubManager) {
if (!aubManager->isOpen()) {
aubManager->open(fileName);
UNRECOVERABLE_IF(!aubManager->isOpen());
std::ostringstream str;
str << "driver version: " << driverVersion;
aubManager->addComment(str.str().c_str());
}
return;
}
if (!getAubStream()->isOpen()) {
// Open our file
stream->open(fileName.c_str());
if (!getAubStream()->isOpen()) {
// This UNRECOVERABLE_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder)
// try adding <familycodename>_aub
UNRECOVERABLE_IF(true);
}
// Add the file header
auto &hwInfo = this->peekHwInfo();
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
stream->init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), aubDeviceId);
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::closeFile() {
aubManager ? aubManager->close() : stream->close();
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::isFileOpen() const {
return aubManager ? aubManager->isOpen() : getAubStream()->isOpen();
}
template <typename GfxFamily>
const std::string AUBCommandStreamReceiverHw<GfxFamily>::getFileName() {
return aubManager ? aubManager->getFileName() : getAubStream()->getFileName();
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine() {
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
hardwareContextController->initialize();
return;
}
auto csTraits = this->getCsTraits(osContext->getEngineType());
if (engineInfo.pLRCA) {
return;
}
this->initGlobalMMIO();
this->initEngineMMIO();
this->initAdditionalMMIO();
// Write driver version
{
std::ostringstream str;
str << "driver version: " << driverVersion;
getAubStream()->addComment(str.str().c_str());
}
// Global HW Status Page
{
const size_t sizeHWSP = 0x1000;
const size_t alignHWSP = 0x1000;
engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP);
engineInfo.ggttHWSP = gttRemap->map(engineInfo.pGlobalHWStatusPage, sizeHWSP);
auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt());
// Write our GHWSP
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP;
getAubStream()->addComment(str.str().c_str());
}
AubGTTData data = {0};
this->getGTTData(reinterpret_cast<void *>(physHWSP), data);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data);
stream->writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP);
}
// Allocate the LRCA
const size_t sizeLRCA = csTraits.sizeLRCA;
const size_t alignLRCA = csTraits.alignLRCA;
auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA);
engineInfo.pLRCA = pLRCABase;
// Initialize the LRCA to a known state
csTraits.initialize(pLRCABase);
// Reserve the ring buffer
engineInfo.sizeRingBuffer = 0x4 * 0x1000;
{
const size_t alignRingBuffer = 0x1000;
engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRingBuffer);
engineInfo.ggttRingBuffer = gttRemap->map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer);
auto physRingBuffer = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt());
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer;
getAubStream()->addComment(str.str().c_str());
}
AubGTTData data = {0};
this->getGTTData(reinterpret_cast<void *>(physRingBuffer), data);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer, data);
}
// Initialize the ring MMIO registers
{
uint32_t ringHead = 0x000;
uint32_t ringTail = 0x000;
auto ringBase = engineInfo.ggttRingBuffer;
auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1);
csTraits.setRingHead(pLRCABase, ringHead);
csTraits.setRingTail(pLRCABase, ringTail);
csTraits.setRingBase(pLRCABase, ringBase);
csTraits.setRingCtrl(pLRCABase, ringCtrl);
}
// Write our LRCA
{
engineInfo.ggttLRCA = gttRemap->map(engineInfo.pLRCA, sizeLRCA);
auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt());
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA;
getAubStream()->addComment(str.str().c_str());
}
AubGTTData data = {0};
this->getGTTData(reinterpret_cast<void *>(lrcAddressPhys), data);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data);
AUB::addMemoryWrite(
*stream,
lrcAddressPhys,
pLRCABase,
sizeLRCA,
this->getAddressSpace(csTraits.aubHintLRCA),
csTraits.aubHintLRCA);
}
// Create a context to facilitate AUB dumping of memory using PPGTT
addContextToken(getDumpHandle());
DEBUG_BREAK_IF(!engineInfo.pLRCA);
}
template <typename GfxFamily>
CommandStreamReceiver *AUBCommandStreamReceiverHw<GfxFamily>::create(const std::string &fileName,
bool standalone,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield) {
auto csr = std::make_unique<AUBCommandStreamReceiverHw<GfxFamily>>(fileName, standalone, executionEnvironment, rootDeviceIndex, deviceBitfield);
if (!csr->subCaptureManager->isSubCaptureMode()) {
csr->openFile(fileName);
}
return csr.release();
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
if (subCaptureManager->isSubCaptureMode()) {
if (!subCaptureManager->isSubCaptureEnabled()) {
if (this->standalone) {
*this->tagAddress = this->peekLatestSentTaskCount();
}
return true;
}
}
initializeEngine();
// Write our batch buffer
auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
auto currentOffset = batchBuffer.usedSize;
DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset);
auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset;
std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>> flatBatchBuffer(
nullptr, [&](GraphicsAllocation *ptr) { this->getMemoryManager()->freeGraphicsMemory(ptr); });
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
flatBatchBuffer.reset(this->flatBatchBufferHelper->flattenBatchBuffer(this->rootDeviceIndex, batchBuffer, sizeBatchBuffer, this->dispatchMode, this->getOsContext().getDeviceBitfield()));
if (flatBatchBuffer.get() != nullptr) {
pBatchBuffer = flatBatchBuffer->getUnderlyingBuffer();
batchBufferGpuAddress = flatBatchBuffer->getGpuAddress();
batchBuffer.commandBufferAllocation = flatBatchBuffer.get();
}
}
allocationsForResidency.push_back(batchBuffer.commandBufferAllocation);
processResidency(allocationsForResidency, 0u);
if (!this->standalone || DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
allocationsForResidency.pop_back();
}
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
if (this->standalone) {
*this->tagAddress = this->peekLatestSentTaskCount();
}
if (subCaptureManager->isSubCaptureMode()) {
pollForCompletion();
subCaptureManager->disableSubCapture();
}
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
pollForCompletion();
}
getAubStream()->flush();
return true;
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
std::map<uint64_t, uint64_t> allocationsMap;
std::ostringstream str;
str << "PatchInfoData" << std::endl;
for (auto &patchInfoData : this->flatBatchBufferHelper->getPatchInfoCollection()) {
str << std::hex << patchInfoData.sourceAllocation << ";";
str << std::hex << patchInfoData.sourceAllocationOffset << ";";
str << std::hex << patchInfoData.sourceType << ";";
str << std::hex << patchInfoData.targetAllocation << ";";
str << std::hex << patchInfoData.targetAllocationOffset << ";";
str << std::hex << patchInfoData.targetType << ";";
str << std::endl;
if (patchInfoData.sourceAllocation) {
allocationsMap.insert(std::pair<uint64_t, uint64_t>(patchInfoData.sourceAllocation,
ppgtt->map(static_cast<uintptr_t>(patchInfoData.sourceAllocation), 1, 0, MemoryBanks::MainBank)));
}
if (patchInfoData.targetAllocation) {
allocationsMap.insert(std::pair<uint64_t, uintptr_t>(patchInfoData.targetAllocation,
ppgtt->map(static_cast<uintptr_t>(patchInfoData.targetAllocation), 1, 0, MemoryBanks::MainBank)));
}
}
bool result = getAubStream()->addComment(str.str().c_str());
this->flatBatchBufferHelper->getPatchInfoCollection().clear();
if (!result) {
return false;
}
std::ostringstream allocationStr;
allocationStr << "AllocationsList" << std::endl;
for (auto &element : allocationsMap) {
allocationStr << std::hex << element.first << ";" << element.second << std::endl;
}
result = getAubStream()->addComment(allocationStr.str().c_str());
if (!result) {
return false;
}
return true;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits) {
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
if (batchBufferSize) {
hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, false);
}
return;
}
auto csTraits = this->getCsTraits(osContext->getEngineType());
{
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << batchBuffer;
getAubStream()->addComment(str.str().c_str());
}
auto physBatchBuffer = ppgtt->map(static_cast<uintptr_t>(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank);
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
AUB::reserveAddressPPGTT(*stream, static_cast<uintptr_t>(batchBufferGpuAddress), batchBufferSize, physBatchBuffer,
entryBits, aubHelperHw);
AUB::addMemoryWrite(
*stream,
physBatchBuffer,
batchBuffer,
batchBufferSize,
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary),
AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary);
}
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
addGUCStartMessage(static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(batchBuffer)));
addPatchInfoComments();
}
// Add a batch buffer start to the ring buffer
auto previousTail = engineInfo.tailRingBuffer;
{
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
typedef typename GfxFamily::MI_NOOP MI_NOOP;
auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer);
auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer);
auto sizeNeeded =
sizeof(MI_BATCH_BUFFER_START) +
sizeof(MI_LOAD_REGISTER_IMM);
auto tailAlignment = sizeof(uint64_t);
sizeNeeded = alignUp(sizeNeeded, tailAlignment);
if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) {
// Pad the remaining ring with NOOPs
auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer;
memset(pTail, 0, sizeToWrap);
// write remaining ring
auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt());
AUB::addMemoryWrite(
*stream,
physDumpStart,
pTail,
sizeToWrap,
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer),
AubMemDump::DataTypeHintValues::TraceCommandBuffer);
previousTail = 0;
engineInfo.tailRingBuffer = 0;
pTail = engineInfo.pRingBuffer;
} else if (engineInfo.tailRingBuffer == 0) {
// Add a LRI if this is our first submission
auto lri = GfxFamily::cmdInitLoadRegisterImm;
lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244));
lri.setDataDword(0x00010000);
*(MI_LOAD_REGISTER_IMM *)pTail = lri;
pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1;
}
// Add our BBS
auto bbs = GfxFamily::cmdInitBatchBufferStart;
bbs.setBatchBufferStartAddressGraphicsaddress472(static_cast<uint64_t>(batchBufferGpuAddress));
bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
*(MI_BATCH_BUFFER_START *)pTail = bbs;
pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1;
// Compute our new ring tail.
engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer);
// Add NOOPs as needed as our tail needs to be aligned
while (engineInfo.tailRingBuffer % tailAlignment) {
*(MI_NOOP *)pTail = GfxFamily::cmdInitNoop;
pTail = ((MI_NOOP *)pTail) + 1;
engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer);
}
UNRECOVERABLE_IF((engineInfo.tailRingBuffer % tailAlignment) != 0);
// Only dump the new commands
auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail);
auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail);
auto dumpLength = engineInfo.tailRingBuffer - previousTail;
// write ring
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << ggttDumpStart;
getAubStream()->addComment(str.str().c_str());
}
auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt());
AUB::addMemoryWrite(
*stream,
physDumpStart,
dumpStart,
dumpLength,
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer),
AubMemDump::DataTypeHintValues::TraceCommandBuffer);
// update the ring mmio tail in the LRCA
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c;
getAubStream()->addComment(str.str().c_str());
}
auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt());
AUB::addMemoryWrite(
*stream,
physLRCA + 0x101c,
&engineInfo.tailRingBuffer,
sizeof(engineInfo.tailRingBuffer),
this->getAddressSpace(csTraits.aubHintLRCA));
DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer);
}
// Submit our execlist by submitting to the execlist submit ports
{
typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
contextDescriptor.sData.Valid = true;
contextDescriptor.sData.ForcePageDirRestore = false;
contextDescriptor.sData.ForceRestore = false;
contextDescriptor.sData.Legacy = true;
contextDescriptor.sData.FaultSupport = 0;
contextDescriptor.sData.PrivilegeAccessOrPPGTT = true;
contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32;
auto ggttLRCA = engineInfo.ggttLRCA;
contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096;
contextDescriptor.sData.ContextID = 0;
this->submitLRCA(contextDescriptor);
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion() {
const auto lock = std::unique_lock<decltype(pollForCompletionLock)>{pollForCompletionLock};
if (this->pollForCompletionTaskCount == this->latestSentTaskCount) {
return;
}
pollForCompletionImpl();
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
this->pollForCompletionTaskCount = this->latestSentTaskCount;
if (subCaptureManager->isSubCaptureMode()) {
if (!subCaptureManager->isSubCaptureEnabled()) {
return;
}
}
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
hardwareContextController->pollForCompletion();
return;
}
const auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase;
const bool pollNotEqual = false;
const uint32_t mask = getMaskAndValueForPollForCompletion();
const uint32_t value = mask;
stream->registerPoll(
AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS
mask,
value,
pollNotEqual,
AubMemDump::CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort);
}
template <typename GfxFamily>
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
pollForCompletion();
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeResidentExternal(AllocationView &allocationView) {
externalAllocations.push_back(allocationView);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::makeNonResidentExternal(uint64_t gpuAddress) {
for (auto it = externalAllocations.begin(); it != externalAllocations.end(); it++) {
if (it->first == gpuAddress) {
externalAllocations.erase(it);
break;
}
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) {
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress << " end address: " << gpuAddress + size << " cpu address: " << cpuAddress << " size: " << std::dec << size;
getAubStream()->addComment(str.str().c_str());
}
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) {
AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset, entryBits,
aubHelperHw);
};
ppgtt->pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, entryBits, walker, memoryBank);
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
if (!this->isAubWritable(gfxAllocation)) {
return false;
}
bool ownsLock = !gfxAllocation.isLocked();
uint64_t gpuAddress;
void *cpuAddress;
size_t size;
if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) {
return false;
}
auto streamLocked = getAubStream()->lockStream();
if (aubManager) {
this->writeMemoryWithAubManager(gfxAllocation);
} else {
writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation));
}
streamLocked.unlock();
if (gfxAllocation.isLocked() && ownsLock) {
this->getMemoryManager()->unlockResource(&gfxAllocation);
}
if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) {
this->setAubWritable(false, gfxAllocation);
}
return true;
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(AllocationView &allocationView) {
GraphicsAllocation gfxAllocation(this->rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast<void *>(allocationView.first), allocationView.first, 0llu, allocationView.second, MemoryPool::MemoryNull, 0u);
return writeMemory(gfxAllocation);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::writeMMIO(uint32_t offset, uint32_t value) {
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
hardwareContextController->writeMMIO(offset, value);
}
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) {
if (hardwareContextController) {
//Add support for expectMMIO to AubStream
return;
}
this->getAubStream()->expectMMIO(mmioRegister, expectedValue);
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress, const void *srcAddress,
size_t length, uint32_t compareOperation) {
pollForCompletion();
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
hardwareContextController->expectMemory(reinterpret_cast<uint64_t>(gfxAddress), srcAddress, length, compareOperation);
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) {
UNRECOVERABLE_IF(offset > length);
this->getAubStream()->expectMemory(physAddress,
ptrOffset(srcAddress, offset),
size,
this->getAddressSpaceFromPTEBits(entryBits),
compareOperation);
};
this->ppgtt->pageWalk(reinterpret_cast<uintptr_t>(gfxAddress), length, 0, PageTableEntry::nonValidBits, walker, MemoryBanks::BankNotSpecified);
return true;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) {
if (subCaptureManager->isSubCaptureMode()) {
if (!subCaptureManager->isSubCaptureEnabled()) {
return;
}
}
for (auto &externalAllocation : externalAllocations) {
if (!writeMemory(externalAllocation)) {
DEBUG_BREAK_IF(externalAllocation.second != 0);
}
}
for (auto &gfxAllocation : allocationsForResidency) {
if (dumpAubNonWritable) {
this->setAubWritable(true, *gfxAllocation);
}
if (!writeMemory(*gfxAllocation)) {
DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) ||
!this->isAubWritable(*gfxAllocation)));
}
gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
}
dumpAubNonWritable = false;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::dumpAllocation(GraphicsAllocation &gfxAllocation) {
bool isBcsCsr = EngineHelpers::isBcs(this->osContext->getEngineType());
if (isBcsCsr != gfxAllocation.getAubInfo().bcsDumpOnly) {
return;
}
if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) {
if (!gfxAllocation.isAllocDumpable()) {
return;
}
gfxAllocation.setAllocDumpable(false, isBcsCsr);
}
auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation);
if (dumpFormat > AubAllocDump::DumpFormat::NONE) {
pollForCompletion();
}
auto streamLocked = getAubStream()->lockStream();
if (hardwareContextController) {
auto surfaceInfo = std::unique_ptr<aub_stream::SurfaceInfo>(AubAllocDump::getDumpSurfaceInfo<GfxFamily>(gfxAllocation, dumpFormat));
if (nullptr != surfaceInfo) {
hardwareContextController->dumpSurface(*surfaceInfo.get());
}
return;
}
AubAllocDump::dumpAllocation<GfxFamily>(dumpFormat, gfxAllocation, getAubStream(), getDumpHandle());
}
template <typename GfxFamily>
AubSubCaptureStatus AUBCommandStreamReceiverHw<GfxFamily>::checkAndActivateAubSubCapture(const std::string &kernelName) {
auto status = subCaptureManager->checkAndActivateSubCapture(kernelName);
if (status.isActive) {
auto &subCaptureFile = subCaptureManager->getSubCaptureFileName(kernelName);
auto isReopened = reopenFile(subCaptureFile);
if (isReopened) {
dumpAubNonWritable = true;
}
}
if (this->standalone) {
this->programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive);
}
return status;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::addAubComment(const char *message) {
auto streamLocked = getAubStream()->lockStream();
if (aubManager) {
aubManager->addComment(message);
return;
}
getAubStream()->addComment(message);
}
template <typename GfxFamily>
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getDumpHandle() {
return hashPtrToU32(this);
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::addGUCStartMessage(uint64_t batchBufferAddress) {
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
auto bufferSize = sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START);
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
std::unique_ptr<void, std::function<void(void *)>> buffer(this->getMemoryManager()->alignedMallocWrapper(bufferSize, MemoryConstants::pageSize), [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); });
LinearStream linearStream(buffer.get(), bufferSize);
uint32_t *header = static_cast<uint32_t *>(linearStream.getSpace(sizeof(uint32_t)));
*header = getGUCWorkQueueItemHeader();
MI_BATCH_BUFFER_START *miBatchBufferStartSpace = linearStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
DEBUG_BREAK_IF(bufferSize != linearStream.getUsed());
auto miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart;
miBatchBufferStart.setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get()));
miBatchBufferStart.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
*miBatchBufferStartSpace = miBatchBufferStart;
auto physBufferAddres = ppgtt->map(reinterpret_cast<uintptr_t>(buffer.get()), bufferSize,
this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()),
MemoryBanks::MainBank);
AUB::reserveAddressPPGTT(*stream, reinterpret_cast<uintptr_t>(buffer.get()), bufferSize, physBufferAddres,
this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()),
aubHelperHw);
AUB::addMemoryWrite(
*stream,
physBufferAddres,
buffer.get(),
bufferSize,
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype));
PatchInfoData patchInfoData(batchBufferAddress, 0u, PatchInfoAllocationType::Default, reinterpret_cast<uintptr_t>(buffer.get()), sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START) - sizeof(uint64_t), PatchInfoAllocationType::GUCStartMessage);
this->flatBatchBufferHelper->setPatchInfoData(patchInfoData);
}
} // namespace NEO

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
namespace NEO {
template <typename GfxFamily>
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
return 0x100;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken(uint32_t dumpHandle) {
// Some simulator versions don't support adding the context token.
// This hook allows specialization for those that do.
}
template <typename GfxFamily>
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getGUCWorkQueueItemHeader() {
uint32_t GUCWorkQueueItemHeader = 0x00030001;
return GUCWorkQueueItemHeader;
}
template <typename GfxFamily>
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
return AubMemDump::AddressSpaceValues::TraceNonlocal;
}
} // namespace NEO

View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
#include "shared/source/helpers/engine_node_helper.h"
namespace NEO {
template <typename GfxFamily>
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
return 0x00008000;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken(uint32_t dumpHandle) {
AUB::createContext(*stream, dumpHandle);
}
template <typename GfxFamily>
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getGUCWorkQueueItemHeader() {
if (EngineHelpers::isCcs(osContext->getEngineType())) {
return 0x00030401;
}
return 0x00030001;
}
template <typename GfxFamily>
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
if (entryBits & BIT(PageTableEntry::localMemoryBit)) {
return AubMemDump::AddressSpaceValues::TraceLocal;
}
return AubMemDump::AddressSpaceValues::TraceNonlocal;
}
} // namespace NEO

View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "aub_mapper.h"
#include "third_party/aub_stream/headers/hardware_context.h"
namespace aub_stream {
class AubManager;
struct AubStream;
} // namespace aub_stream
namespace NEO {
class AddressMapper;
class GraphicsAllocation;
class HardwareContextController;
template <typename GfxFamily>
class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw<GfxFamily> {
protected:
using CommandStreamReceiverHw<GfxFamily>::osContext;
using AUB = typename AUBFamilyMapper<GfxFamily>::AUB;
using MiContextDescriptorReg = typename AUB::MiContextDescriptorReg;
bool getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const;
void freeEngineInfo(AddressMapper &gttRemap);
MOCKABLE_VIRTUAL uint32_t getDeviceIndex() const;
public:
CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
~CommandStreamReceiverSimulatedCommonHw() override;
uint64_t getGTTBits() const {
return 0u;
}
void initGlobalMMIO();
void initAdditionalMMIO();
uint64_t getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation);
void getGTTData(void *memory, AubGTTData &data);
uint32_t getMemoryBankForGtt() const;
static const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType);
void initEngineMMIO();
void submitLRCA(const MiContextDescriptorReg &contextDescriptor);
void setupContext(OsContext &osContext) override;
virtual bool expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length);
virtual bool expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length);
virtual bool expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length);
virtual void pollForCompletion() = 0;
virtual void pollForCompletionImpl(){};
virtual bool writeMemory(GraphicsAllocation &gfxAllocation) = 0;
virtual void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) = 0;
virtual void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) = 0;
virtual void writeMMIO(uint32_t offset, uint32_t value) = 0;
virtual void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0;
virtual bool isAubWritable(GraphicsAllocation &graphicsAllocation) const = 0;
virtual void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0;
virtual bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const = 0;
virtual void dumpAllocation(GraphicsAllocation &gfxAllocation) = 0;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
size_t getPreferredTagPoolSize() const override { return 1; }
aub_stream::AubManager *aubManager = nullptr;
std::unique_ptr<HardwareContextController> hardwareContextController;
struct EngineInfo {
void *pLRCA;
uint32_t ggttLRCA;
void *pGlobalHWStatusPage;
uint32_t ggttHWSP;
void *pRingBuffer;
uint32_t ggttRingBuffer;
size_t sizeRingBuffer;
uint32_t tailRingBuffer;
} engineInfo = {};
AubMemDump::AubStream *stream;
};
} // namespace NEO

View File

@@ -0,0 +1,120 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub/aub_helper.h"
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/hardware_context_controller.h"
#include "shared/source/memory_manager/address_mapper.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "third_party/aub_stream/headers/aub_manager.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO() {
if (DebugManager.flags.AubDumpAddMmioRegistersList.get() != "unk") {
auto mmioList = AubHelper::getAdditionalMmioList();
for (auto &mmioPair : mmioList) {
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::setupContext(OsContext &osContext) {
CommandStreamReceiverHw<GfxFamily>::setupContext(osContext);
auto engineType = osContext.getEngineType();
uint32_t flags = 0;
getCsTraits(engineType).setContextSaveRestoreFlags(flags);
if (aubManager && !osContext.isLowPriority()) {
hardwareContextController = std::make_unique<HardwareContextController>(*aubManager, osContext, flags);
}
}
template <typename GfxFamily>
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const {
cpuAddress = graphicsAllocation.getUnderlyingBuffer();
gpuAddress = GmmHelper::decanonize(graphicsAllocation.getGpuAddress());
size = graphicsAllocation.getUnderlyingBufferSize();
auto gmm = graphicsAllocation.getDefaultGmm();
if (gmm && gmm->isCompressionEnabled) {
size = gmm->gmmResourceInfo->getSizeAllocation();
}
if (size == 0)
return false;
if (cpuAddress == nullptr && graphicsAllocation.isAllocationLockable()) {
cpuAddress = this->getMemoryManager()->lockResource(&graphicsAllocation);
}
return true;
}
template <typename GfxFamily>
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) {
return this->expectMemory(gfxAddress, srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual);
}
template <typename GfxFamily>
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) {
return this->expectMemory(gfxAddress, srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
}
template <typename GfxFamily>
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length) {
return this->expectMemory(gfxAddress, srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::freeEngineInfo(AddressMapper &gttRemap) {
alignedFree(engineInfo.pLRCA);
gttRemap.unmap(engineInfo.pLRCA);
engineInfo.pLRCA = nullptr;
alignedFree(engineInfo.pGlobalHWStatusPage);
gttRemap.unmap(engineInfo.pGlobalHWStatusPage);
engineInfo.pGlobalHWStatusPage = nullptr;
alignedFree(engineInfo.pRingBuffer);
gttRemap.unmap(engineInfo.pRingBuffer);
engineInfo.pRingBuffer = nullptr;
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.isResident(osContext->getContextId())) {
dumpAllocation(gfxAllocation);
this->getEvictionAllocations().push_back(&gfxAllocation);
gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId());
}
}
template <typename GfxFamily>
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getDeviceIndex() const {
return osContext->getDeviceBitfield().any() ? static_cast<uint32_t>(Math::log2(static_cast<uint32_t>(osContext->getDeviceBitfield().to_ulong()))) : 0u;
}
template <typename GfxFamily>
CommandStreamReceiverSimulatedCommonHw<GfxFamily>::CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield)
: CommandStreamReceiverHw<GfxFamily>(executionEnvironment, rootDeviceIndex, deviceBitfield) {
this->useNewResourceImplicitFlush = false;
this->useGpuIdleImplicitFlush = false;
}
template <typename GfxFamily>
CommandStreamReceiverSimulatedCommonHw<GfxFamily>::~CommandStreamReceiverSimulatedCommonHw() = default;
} // namespace NEO

View File

@@ -0,0 +1,59 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initGlobalMMIO() {
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
template <typename GfxFamily>
uint64_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) {
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::userSupervisorBit);
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getGTTData(void *memory, AubGTTData &data) {
data.present = true;
data.localMemory = false;
}
template <typename GfxFamily>
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getMemoryBankForGtt() const {
return MemoryBanks::getBank(getDeviceIndex());
}
template <typename GfxFamily>
const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getCsTraits(aub_stream::EngineType engineType) {
return *AUBFamilyMapper<GfxFamily>::csTraits[engineType];
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initEngineMMIO() {
auto mmioList = AUBFamilyMapper<GfxFamily>::perEngineMMIO[osContext->getEngineType()];
DEBUG_BREAK_IF(!mmioList);
for (auto &mmioPair : *mmioList) {
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::submitLRCA(const MiContextDescriptorReg &contextDescriptor) {
auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase;
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0);
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0);
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]);
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]);
}
} // namespace NEO

View File

@@ -0,0 +1,99 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/memory_manager/memory_pool.h"
#include "shared/source/memory_manager/physical_address_allocator.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initGlobalMMIO() {
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
if (this->localMemoryEnabled) {
MMIOPair guCntl = {0x00101010, 0x00000080}; //GU_CNTL
stream->writeMMIO(guCntl.first, guCntl.second);
MMIOPair lmemCfg = {0x0000cf58, 0x80000000}; //LMEM_CFG
stream->writeMMIO(lmemCfg.first, lmemCfg.second);
MMIOPair tileAddrRange[] = {{0x00004900, 0x0001},
{0x00004904, 0x0001},
{0x00004908, 0x0001},
{0x0000490c, 0x0001}}; //XEHP_TILE_ADDR_RANGE
const uint32_t numberOfTiles = 4;
const uint32_t localMemorySizeGB = static_cast<uint32_t>(AubHelper::getMemBankSize(&this->peekHwInfo()) / MemoryConstants::gigaByte);
uint32_t localMemoryBaseAddressInGB = 0x0;
for (uint32_t i = 0; i < numberOfTiles; i++) {
tileAddrRange[i].second |= localMemoryBaseAddressInGB << 1;
tileAddrRange[i].second |= localMemorySizeGB << 8;
stream->writeMMIO(tileAddrRange[i].first, tileAddrRange[i].second);
localMemoryBaseAddressInGB += localMemorySizeGB;
}
}
}
template <typename GfxFamily>
uint64_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) {
if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get() ||
(gfxAllocation && gfxAllocation->getMemoryPool() == MemoryPool::LocalMemory)) {
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit);
}
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit);
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getGTTData(void *memory, AubGTTData &data) {
data.present = true;
data.localMemory = this->localMemoryEnabled;
}
template <typename GfxFamily>
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getMemoryBankForGtt() const {
auto deviceIndex = getDeviceIndex();
if (this->localMemoryEnabled) {
return MemoryBanks::getBankForLocalMemory(deviceIndex);
}
return MemoryBanks::getBank(deviceIndex);
}
template <typename GfxFamily>
const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getCsTraits(aub_stream::EngineType engineType) {
return *AUBFamilyMapper<GfxFamily>::csTraits[engineType];
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initEngineMMIO() {
auto mmioList = AUBFamilyMapper<GfxFamily>::perEngineMMIO[osContext->getEngineType()];
DEBUG_BREAK_IF(!mmioList);
for (auto &mmioPair : *mmioList) {
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
template <typename GfxFamily>
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::submitLRCA(const MiContextDescriptorReg &contextDescriptor) {
auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase;
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]);
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]);
// Load our new exec list
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1);
}
} // namespace NEO

View File

@@ -0,0 +1,154 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/aub/aub_helper.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hardware_context_controller.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/memory_manager/memory_pool.h"
#include "shared/source/memory_manager/physical_address_allocator.h"
#include "shared/source/os_interface/os_context.h"
#include "aub_mem_dump.h"
#include "third_party/aub_stream/headers/allocation_params.h"
#include "third_party/aub_stream/headers/aub_manager.h"
#include "third_party/aub_stream/headers/hardware_context.h"
namespace NEO {
class GraphicsAllocation;
template <typename GfxFamily>
class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCommonHw<GfxFamily> {
protected:
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::CommandStreamReceiverSimulatedCommonHw;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::osContext;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getDeviceIndex;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::aubManager;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::hardwareContextController;
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::writeMemory;
public:
uint32_t getMemoryBank(GraphicsAllocation *allocation) const {
if (aubManager) {
return static_cast<uint32_t>(getMemoryBanksBitfield(allocation).to_ulong());
}
uint32_t deviceIndexChosen = allocation->storageInfo.memoryBanks.any()
? getDeviceIndexFromStorageInfo(allocation->storageInfo)
: getDeviceIndex();
if (allocation->getMemoryPool() == MemoryPool::LocalMemory) {
return MemoryBanks::getBankForLocalMemory(deviceIndexChosen);
}
return MemoryBanks::getBank(deviceIndexChosen);
}
static uint32_t getDeviceIndexFromStorageInfo(StorageInfo storageInfo) {
uint32_t deviceIndex = 0;
while (!storageInfo.memoryBanks.test(0)) {
storageInfo.memoryBanks >>= 1;
deviceIndex++;
}
return deviceIndex;
}
DeviceBitfield getMemoryBanksBitfield(GraphicsAllocation *allocation) const {
if (allocation->getMemoryPool() == MemoryPool::LocalMemory) {
if (allocation->storageInfo.memoryBanks.any()) {
if (allocation->storageInfo.cloningOfPageTables || this->isMultiOsContextCapable()) {
return allocation->storageInfo.memoryBanks;
}
}
return this->osContext->getDeviceBitfield();
}
return {};
}
int getAddressSpace(int hint) {
bool traceLocalAllowed = false;
switch (hint) {
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs:
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs:
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs:
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs:
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs:
case AubMemDump::DataTypeHintValues::TraceCommandBuffer:
traceLocalAllowed = true;
break;
default:
break;
}
if ((traceLocalAllowed && this->localMemoryEnabled) || DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
return AubMemDump::AddressSpaceValues::TraceLocal;
}
return AubMemDump::AddressSpaceValues::TraceNonlocal;
}
PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) {
const auto bankSize = AubHelper::getMemBankSize(hwInfo);
const auto devicesCount = HwHelper::getSubDevicesCount(hwInfo);
return new PhysicalAddressAllocatorHw<GfxFamily>(bankSize, devicesCount);
}
void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override {
uint64_t gpuAddress;
void *cpuAddress;
size_t size;
this->getParametersForWriteMemory(graphicsAllocation, gpuAddress, cpuAddress, size);
int hint = graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::COMMAND_BUFFER
? AubMemDump::DataTypeHintValues::TraceBatchBuffer
: AubMemDump::DataTypeHintValues::TraceNotype;
aub_stream::AllocationParams allocationParams(gpuAddress, cpuAddress, size, this->getMemoryBank(&graphicsAllocation),
hint, graphicsAllocation.getUsedPageSize());
auto gmm = graphicsAllocation.getDefaultGmm();
allocationParams.additionalParams.compressionEnabled = gmm ? gmm->isCompressionEnabled : false;
if (graphicsAllocation.storageInfo.cloningOfPageTables || !graphicsAllocation.isAllocatedInLocalMemoryPool()) {
aubManager->writeMemory2(allocationParams);
} else {
hardwareContextController->writeMemory(allocationParams);
}
}
void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) override {
auto bank = getMemoryBank(&graphicsAllocation);
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
bank = GraphicsAllocation::defaultBank;
}
graphicsAllocation.setAubWritable(writable, bank);
}
bool isAubWritable(GraphicsAllocation &graphicsAllocation) const override {
auto bank = getMemoryBank(&graphicsAllocation);
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
bank = GraphicsAllocation::defaultBank;
}
return graphicsAllocation.isAubWritable(bank);
}
void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) override {
auto bank = getMemoryBank(&graphicsAllocation);
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
bank = GraphicsAllocation::defaultBank;
}
graphicsAllocation.setTbxWritable(writable, bank);
}
bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const override {
auto bank = getMemoryBank(&graphicsAllocation);
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
bank = GraphicsAllocation::defaultBank;
}
return graphicsAllocation.isTbxWritable(bank);
}
};
} // namespace NEO

View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver.h"
#include <memory>
namespace NEO {
template <typename BaseCSR>
class CommandStreamReceiverWithAUBDump : public BaseCSR {
protected:
using BaseCSR::osContext;
public:
CommandStreamReceiverWithAUBDump(const std::string &baseName,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
CommandStreamReceiverWithAUBDump(const CommandStreamReceiverWithAUBDump &) = delete;
CommandStreamReceiverWithAUBDump &operator=(const CommandStreamReceiverWithAUBDump &) = delete;
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override;
void setupContext(OsContext &osContext) override;
CommandStreamReceiverType getType() override {
if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) {
return CommandStreamReceiverType::CSR_TBX_WITH_AUB;
}
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) override;
size_t getPreferredTagPoolSize() const override { return 1; }
void addAubComment(const char *comment) override;
std::unique_ptr<CommandStreamReceiver> aubCSR;
};
} // namespace NEO

View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub/aub_center.h"
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
namespace NEO {
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
template <typename BaseCSR>
CommandStreamReceiverWithAUBDump<BaseCSR>::CommandStreamReceiverWithAUBDump(const std::string &baseName,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield)
: BaseCSR(executionEnvironment, rootDeviceIndex, deviceBitfield) {
bool isAubManager = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter && executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter->getAubManager();
bool isTbxMode = CommandStreamReceiverType::CSR_TBX == BaseCSR::getType();
bool createAubCsr = (isAubManager && isTbxMode) ? false : true;
if (createAubCsr) {
aubCSR.reset(AUBCommandStreamReceiver::create(baseName, false, executionEnvironment, rootDeviceIndex, deviceBitfield));
UNRECOVERABLE_IF(!aubCSR->initializeTagAllocation());
*aubCSR->getTagAddress() = std::numeric_limits<uint32_t>::max();
}
}
template <typename BaseCSR>
bool CommandStreamReceiverWithAUBDump<BaseCSR>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
if (aubCSR) {
aubCSR->flush(batchBuffer, allocationsForResidency);
aubCSR->setLatestSentTaskCount(BaseCSR::peekLatestSentTaskCount());
}
return BaseCSR::flush(batchBuffer, allocationsForResidency);
}
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::makeNonResident(GraphicsAllocation &gfxAllocation) {
auto residencyTaskCount = gfxAllocation.getResidencyTaskCount(this->osContext->getContextId());
BaseCSR::makeNonResident(gfxAllocation);
if (aubCSR) {
gfxAllocation.updateResidencyTaskCount(residencyTaskCount, this->osContext->getContextId());
aubCSR->makeNonResident(gfxAllocation);
}
}
template <typename BaseCSR>
AubSubCaptureStatus CommandStreamReceiverWithAUBDump<BaseCSR>::checkAndActivateAubSubCapture(const std::string &kernelName) {
auto status = BaseCSR::checkAndActivateAubSubCapture(kernelName);
if (aubCSR) {
status = aubCSR->checkAndActivateAubSubCapture(kernelName);
}
BaseCSR::programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive);
return status;
}
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContext) {
BaseCSR::setupContext(osContext);
if (aubCSR) {
aubCSR->setupContext(osContext);
}
}
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) {
if (aubCSR) {
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::addAubComment(const char *comment) {
if (aubCSR) {
aubCSR->addAubComment(comment);
}
BaseCSR::addAubComment(comment);
}
} // namespace NEO

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/command_stream/tbx_command_stream_receiver.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/os_interface/device_factory.h"
namespace NEO {
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield) {
auto funcCreate = commandStreamReceiverFactory[executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily];
if (funcCreate == nullptr) {
return nullptr;
}
CommandStreamReceiver *commandStreamReceiver = nullptr;
int32_t csr = DebugManager.flags.SetCommandStreamReceiver.get();
if (csr < 0) {
csr = CommandStreamReceiverType::CSR_HW;
}
switch (csr) {
case CSR_HW:
commandStreamReceiver = funcCreate(false, executionEnvironment, rootDeviceIndex, deviceBitfield);
break;
case CSR_AUB:
commandStreamReceiver = AUBCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield);
break;
case CSR_TBX:
commandStreamReceiver = TbxCommandStreamReceiver::create("", false, executionEnvironment, rootDeviceIndex, deviceBitfield);
break;
case CSR_HW_WITH_AUB:
commandStreamReceiver = funcCreate(true, executionEnvironment, rootDeviceIndex, deviceBitfield);
break;
case CSR_TBX_WITH_AUB:
commandStreamReceiver = TbxCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield);
break;
default:
break;
}
return commandStreamReceiver;
}
bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment) {
if (DeviceFactory::isHwModeSelected()) {
return DeviceFactory::prepareDeviceEnvironments(executionEnvironment);
}
return DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
}
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver.h"
namespace NEO {
class ExecutionEnvironment;
extern CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
extern bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment);
} // namespace NEO

View File

@@ -6,13 +6,12 @@
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
#include "shared/source/command_stream/tbx_command_stream_receiver.h"
#include "shared/source/memory_manager/address_mapper.h"
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
#include "shared/source/memory_manager/page_table.h"
#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h"
#include "aub_mapper.h"
#include <set>

View File

@@ -11,13 +11,16 @@
#include "shared/source/aub_mem_dump/aub_alloc_dump.inl"
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/hardware_context_controller.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/helpers/ptr_math.h"
@@ -27,10 +30,6 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_context.h"
#include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "opencl/source/helpers/hardware_context_controller.h"
#include "opencl/source/os_interface/ocl_reg_path.h"
#include <cstring>
namespace NEO {
@@ -180,7 +179,7 @@ CommandStreamReceiver *TbxCommandStreamReceiverHw<GfxFamily>::create(const std::
UNRECOVERABLE_IF(nullptr == subCaptureCommon);
if (subCaptureCommon->subCaptureMode > AubSubCaptureManager::SubCaptureMode::Off) {
csr->subCaptureManager = std::make_unique<AubSubCaptureManager>(fullName, *subCaptureCommon, oclRegPath);
csr->subCaptureManager = std::make_unique<AubSubCaptureManager>(fullName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath());
}
if (csr->aubManager) {

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
template <>
uint32_t TbxCommandStreamReceiverHw<Family>::getMaskAndValueForPollForCompletion() const {
return 0x80;
}
template <>
bool TbxCommandStreamReceiverHw<Family>::getpollNotEqualValueForPollForCompletion() const {
return true;
}