mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Move generic command stream receiver files to shared
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
479035c839
commit
f8867e0b97
@@ -8,6 +8,9 @@ set(NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h
|
||||
@@ -15,6 +18,14 @@ set(NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tgllp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_definitions.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.h
|
||||
@@ -50,10 +61,13 @@ set(NEO_CORE_COMMAND_STREAM
|
||||
|
||||
if(SUPPORT_XEHP_AND_LATER)
|
||||
list(APPEND NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_xehp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_xehp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_xehp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preemption_xehp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_xehp_and_later.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
123
shared/source/command_stream/aub_command_stream_receiver_hw.h
Normal file
123
shared/source/command_stream/aub_command_stream_receiver_hw.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/aub/aub_center.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "shared/source/memory_manager/page_table.h"
|
||||
#include "shared/source/memory_manager/physical_address_allocator.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
|
||||
#include "aub_mapper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class AubSubCaptureManager;
|
||||
|
||||
template <typename GfxFamily>
|
||||
class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFamily> {
|
||||
protected:
|
||||
typedef CommandStreamReceiverSimulatedHw<GfxFamily> BaseClass;
|
||||
using AUB = typename AUBFamilyMapper<GfxFamily>::AUB;
|
||||
using ExternalAllocationsContainer = std::vector<AllocationView>;
|
||||
using BaseClass::getParametersForWriteMemory;
|
||||
using BaseClass::osContext;
|
||||
|
||||
public:
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::aubManager;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::hardwareContextController;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::engineInfo;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::stream;
|
||||
|
||||
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
|
||||
void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
|
||||
|
||||
void makeResidentExternal(AllocationView &allocationView);
|
||||
void makeNonResidentExternal(uint64_t gpuAddress);
|
||||
|
||||
AubMemDump::AubFileStream *getAubStream() const {
|
||||
return static_cast<AubMemDump::AubFileStream *>(this->stream);
|
||||
}
|
||||
|
||||
void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override;
|
||||
bool writeMemory(GraphicsAllocation &gfxAllocation) override;
|
||||
MOCKABLE_VIRTUAL bool writeMemory(AllocationView &allocationView);
|
||||
void writeMMIO(uint32_t offset, uint32_t value) override;
|
||||
void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue);
|
||||
bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override;
|
||||
|
||||
AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override;
|
||||
void addAubComment(const char *message) override;
|
||||
|
||||
// Family specific version
|
||||
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
|
||||
void pollForCompletion() override;
|
||||
void pollForCompletionImpl() override;
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
|
||||
|
||||
uint32_t getDumpHandle();
|
||||
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
|
||||
void dumpAllocation(GraphicsAllocation &gfxAllocation) override;
|
||||
|
||||
static CommandStreamReceiver *create(const std::string &fileName,
|
||||
bool standalone,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
AUBCommandStreamReceiverHw(const std::string &fileName,
|
||||
bool standalone,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
~AUBCommandStreamReceiverHw() override;
|
||||
|
||||
AUBCommandStreamReceiverHw(const AUBCommandStreamReceiverHw &) = delete;
|
||||
AUBCommandStreamReceiverHw &operator=(const AUBCommandStreamReceiverHw &) = delete;
|
||||
|
||||
MOCKABLE_VIRTUAL void openFile(const std::string &fileName);
|
||||
MOCKABLE_VIRTUAL bool reopenFile(const std::string &fileName);
|
||||
MOCKABLE_VIRTUAL void initFile(const std::string &fileName);
|
||||
MOCKABLE_VIRTUAL void closeFile();
|
||||
MOCKABLE_VIRTUAL bool isFileOpen() const;
|
||||
MOCKABLE_VIRTUAL const std::string getFileName();
|
||||
|
||||
MOCKABLE_VIRTUAL void initializeEngine();
|
||||
std::unique_ptr<AubSubCaptureManager> subCaptureManager;
|
||||
uint32_t aubDeviceId;
|
||||
bool standalone;
|
||||
|
||||
std::unique_ptr<std::conditional<is64bit, PML4, PDPE>::type> ppgtt;
|
||||
std::unique_ptr<PDPE> ggtt;
|
||||
// remap CPU VA -> GGTT VA
|
||||
AddressMapper *gttRemap;
|
||||
|
||||
MOCKABLE_VIRTUAL bool addPatchInfoComments();
|
||||
void addGUCStartMessage(uint64_t batchBufferAddress);
|
||||
uint32_t getGUCWorkQueueItemHeader();
|
||||
|
||||
CommandStreamReceiverType getType() override {
|
||||
return CommandStreamReceiverType::CSR_AUB;
|
||||
}
|
||||
|
||||
int getAddressSpaceFromPTEBits(uint64_t entryBits) const;
|
||||
|
||||
protected:
|
||||
constexpr static uint32_t getMaskAndValueForPollForCompletion();
|
||||
|
||||
bool dumpAubNonWritable = false;
|
||||
ExternalAllocationsContainer externalAllocations;
|
||||
|
||||
uint32_t pollForCompletionTaskCount = 0u;
|
||||
SpinLock pollForCompletionLock;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,852 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub/aub_helper.h"
|
||||
#include "shared/source/aub/aub_stream_provider.h"
|
||||
#include "shared/source/aub/aub_subcapture.h"
|
||||
#include "shared/source/aub_mem_dump/aub_alloc_dump.h"
|
||||
#include "shared/source/aub_mem_dump/aub_alloc_dump.inl"
|
||||
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/helpers/hash.h"
|
||||
#include "shared/source/helpers/neo_driver_version.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "third_party/aub_stream/headers/aub_manager.h"
|
||||
#include "third_party/aub_stream/headers/aubstream.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const std::string &fileName,
|
||||
bool standalone,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield)
|
||||
: BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield),
|
||||
standalone(standalone) {
|
||||
|
||||
executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->isLocalMemoryEnabled(), fileName, this->getType());
|
||||
auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get();
|
||||
UNRECOVERABLE_IF(nullptr == aubCenter);
|
||||
|
||||
auto subCaptureCommon = aubCenter->getSubCaptureCommon();
|
||||
UNRECOVERABLE_IF(nullptr == subCaptureCommon);
|
||||
subCaptureManager = std::make_unique<AubSubCaptureManager>(fileName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath());
|
||||
|
||||
aubManager = aubCenter->getAubManager();
|
||||
|
||||
if (!aubCenter->getPhysicalAddressAllocator()) {
|
||||
aubCenter->initPhysicalAddressAllocator(this->createPhysicalAddressAllocator(&this->peekHwInfo()));
|
||||
}
|
||||
auto physicalAddressAllocator = aubCenter->getPhysicalAddressAllocator();
|
||||
UNRECOVERABLE_IF(nullptr == physicalAddressAllocator);
|
||||
|
||||
ppgtt = std::make_unique<std::conditional<is64bit, PML4, PDPE>::type>(physicalAddressAllocator);
|
||||
ggtt = std::make_unique<PDPE>(physicalAddressAllocator);
|
||||
|
||||
gttRemap = aubCenter->getAddressMapper();
|
||||
UNRECOVERABLE_IF(nullptr == gttRemap);
|
||||
|
||||
auto streamProvider = aubCenter->getStreamProvider();
|
||||
UNRECOVERABLE_IF(nullptr == streamProvider);
|
||||
|
||||
stream = streamProvider->getStream();
|
||||
UNRECOVERABLE_IF(nullptr == stream);
|
||||
|
||||
this->dispatchMode = DispatchMode::BatchedDispatch;
|
||||
if (DebugManager.flags.CsrDispatchMode.get()) {
|
||||
this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get();
|
||||
}
|
||||
|
||||
auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get();
|
||||
this->aubDeviceId = debugDeviceId == -1
|
||||
? this->peekHwInfo().capabilityTable.aubDeviceId
|
||||
: static_cast<uint32_t>(debugDeviceId);
|
||||
this->defaultSshSize = 64 * KB;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
AUBCommandStreamReceiverHw<GfxFamily>::~AUBCommandStreamReceiverHw() {
|
||||
if (osContext) {
|
||||
pollForCompletion();
|
||||
}
|
||||
this->freeEngineInfo(*gttRemap);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::openFile(const std::string &fileName) {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
initFile(fileName);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::reopenFile(const std::string &fileName) {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (isFileOpen()) {
|
||||
if (fileName != getFileName()) {
|
||||
closeFile();
|
||||
this->freeEngineInfo(*gttRemap);
|
||||
}
|
||||
}
|
||||
if (!isFileOpen()) {
|
||||
initFile(fileName);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::initFile(const std::string &fileName) {
|
||||
if (aubManager) {
|
||||
if (!aubManager->isOpen()) {
|
||||
aubManager->open(fileName);
|
||||
UNRECOVERABLE_IF(!aubManager->isOpen());
|
||||
|
||||
std::ostringstream str;
|
||||
str << "driver version: " << driverVersion;
|
||||
aubManager->addComment(str.str().c_str());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!getAubStream()->isOpen()) {
|
||||
// Open our file
|
||||
stream->open(fileName.c_str());
|
||||
|
||||
if (!getAubStream()->isOpen()) {
|
||||
// This UNRECOVERABLE_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder)
|
||||
// try adding <familycodename>_aub
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
// Add the file header
|
||||
auto &hwInfo = this->peekHwInfo();
|
||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
stream->init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), aubDeviceId);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::closeFile() {
|
||||
aubManager ? aubManager->close() : stream->close();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::isFileOpen() const {
|
||||
return aubManager ? aubManager->isOpen() : getAubStream()->isOpen();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const std::string AUBCommandStreamReceiverHw<GfxFamily>::getFileName() {
|
||||
return aubManager ? aubManager->getFileName() : getAubStream()->getFileName();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine() {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
hardwareContextController->initialize();
|
||||
return;
|
||||
}
|
||||
|
||||
auto csTraits = this->getCsTraits(osContext->getEngineType());
|
||||
|
||||
if (engineInfo.pLRCA) {
|
||||
return;
|
||||
}
|
||||
|
||||
this->initGlobalMMIO();
|
||||
this->initEngineMMIO();
|
||||
this->initAdditionalMMIO();
|
||||
|
||||
// Write driver version
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "driver version: " << driverVersion;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
// Global HW Status Page
|
||||
{
|
||||
const size_t sizeHWSP = 0x1000;
|
||||
const size_t alignHWSP = 0x1000;
|
||||
engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP);
|
||||
engineInfo.ggttHWSP = gttRemap->map(engineInfo.pGlobalHWStatusPage, sizeHWSP);
|
||||
|
||||
auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
|
||||
// Write our GHWSP
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AubGTTData data = {0};
|
||||
this->getGTTData(reinterpret_cast<void *>(physHWSP), data);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data);
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP);
|
||||
}
|
||||
|
||||
// Allocate the LRCA
|
||||
const size_t sizeLRCA = csTraits.sizeLRCA;
|
||||
const size_t alignLRCA = csTraits.alignLRCA;
|
||||
auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA);
|
||||
engineInfo.pLRCA = pLRCABase;
|
||||
|
||||
// Initialize the LRCA to a known state
|
||||
csTraits.initialize(pLRCABase);
|
||||
|
||||
// Reserve the ring buffer
|
||||
engineInfo.sizeRingBuffer = 0x4 * 0x1000;
|
||||
{
|
||||
const size_t alignRingBuffer = 0x1000;
|
||||
engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRingBuffer);
|
||||
engineInfo.ggttRingBuffer = gttRemap->map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer);
|
||||
auto physRingBuffer = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AubGTTData data = {0};
|
||||
this->getGTTData(reinterpret_cast<void *>(physRingBuffer), data);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer, data);
|
||||
}
|
||||
|
||||
// Initialize the ring MMIO registers
|
||||
{
|
||||
uint32_t ringHead = 0x000;
|
||||
uint32_t ringTail = 0x000;
|
||||
auto ringBase = engineInfo.ggttRingBuffer;
|
||||
auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1);
|
||||
csTraits.setRingHead(pLRCABase, ringHead);
|
||||
csTraits.setRingTail(pLRCABase, ringTail);
|
||||
csTraits.setRingBase(pLRCABase, ringBase);
|
||||
csTraits.setRingCtrl(pLRCABase, ringCtrl);
|
||||
}
|
||||
|
||||
// Write our LRCA
|
||||
{
|
||||
engineInfo.ggttLRCA = gttRemap->map(engineInfo.pLRCA, sizeLRCA);
|
||||
auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AubGTTData data = {0};
|
||||
this->getGTTData(reinterpret_cast<void *>(lrcAddressPhys), data);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data);
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
lrcAddressPhys,
|
||||
pLRCABase,
|
||||
sizeLRCA,
|
||||
this->getAddressSpace(csTraits.aubHintLRCA),
|
||||
csTraits.aubHintLRCA);
|
||||
}
|
||||
|
||||
// Create a context to facilitate AUB dumping of memory using PPGTT
|
||||
addContextToken(getDumpHandle());
|
||||
|
||||
DEBUG_BREAK_IF(!engineInfo.pLRCA);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiver *AUBCommandStreamReceiverHw<GfxFamily>::create(const std::string &fileName,
|
||||
bool standalone,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield) {
|
||||
auto csr = std::make_unique<AUBCommandStreamReceiverHw<GfxFamily>>(fileName, standalone, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
|
||||
if (!csr->subCaptureManager->isSubCaptureMode()) {
|
||||
csr->openFile(fileName);
|
||||
}
|
||||
|
||||
return csr.release();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
if (!subCaptureManager->isSubCaptureEnabled()) {
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
initializeEngine();
|
||||
|
||||
// Write our batch buffer
|
||||
auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
|
||||
auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
|
||||
auto currentOffset = batchBuffer.usedSize;
|
||||
DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset);
|
||||
auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset;
|
||||
|
||||
std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>> flatBatchBuffer(
|
||||
nullptr, [&](GraphicsAllocation *ptr) { this->getMemoryManager()->freeGraphicsMemory(ptr); });
|
||||
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
|
||||
flatBatchBuffer.reset(this->flatBatchBufferHelper->flattenBatchBuffer(this->rootDeviceIndex, batchBuffer, sizeBatchBuffer, this->dispatchMode, this->getOsContext().getDeviceBitfield()));
|
||||
if (flatBatchBuffer.get() != nullptr) {
|
||||
pBatchBuffer = flatBatchBuffer->getUnderlyingBuffer();
|
||||
batchBufferGpuAddress = flatBatchBuffer->getGpuAddress();
|
||||
batchBuffer.commandBufferAllocation = flatBatchBuffer.get();
|
||||
}
|
||||
}
|
||||
|
||||
allocationsForResidency.push_back(batchBuffer.commandBufferAllocation);
|
||||
|
||||
processResidency(allocationsForResidency, 0u);
|
||||
|
||||
if (!this->standalone || DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
|
||||
allocationsForResidency.pop_back();
|
||||
}
|
||||
|
||||
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
|
||||
|
||||
if (this->standalone) {
|
||||
*this->tagAddress = this->peekLatestSentTaskCount();
|
||||
}
|
||||
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
pollForCompletion();
|
||||
subCaptureManager->disableSubCapture();
|
||||
}
|
||||
|
||||
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
|
||||
pollForCompletion();
|
||||
}
|
||||
|
||||
getAubStream()->flush();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
|
||||
std::map<uint64_t, uint64_t> allocationsMap;
|
||||
|
||||
std::ostringstream str;
|
||||
str << "PatchInfoData" << std::endl;
|
||||
for (auto &patchInfoData : this->flatBatchBufferHelper->getPatchInfoCollection()) {
|
||||
str << std::hex << patchInfoData.sourceAllocation << ";";
|
||||
str << std::hex << patchInfoData.sourceAllocationOffset << ";";
|
||||
str << std::hex << patchInfoData.sourceType << ";";
|
||||
str << std::hex << patchInfoData.targetAllocation << ";";
|
||||
str << std::hex << patchInfoData.targetAllocationOffset << ";";
|
||||
str << std::hex << patchInfoData.targetType << ";";
|
||||
str << std::endl;
|
||||
|
||||
if (patchInfoData.sourceAllocation) {
|
||||
allocationsMap.insert(std::pair<uint64_t, uint64_t>(patchInfoData.sourceAllocation,
|
||||
ppgtt->map(static_cast<uintptr_t>(patchInfoData.sourceAllocation), 1, 0, MemoryBanks::MainBank)));
|
||||
}
|
||||
|
||||
if (patchInfoData.targetAllocation) {
|
||||
allocationsMap.insert(std::pair<uint64_t, uintptr_t>(patchInfoData.targetAllocation,
|
||||
ppgtt->map(static_cast<uintptr_t>(patchInfoData.targetAllocation), 1, 0, MemoryBanks::MainBank)));
|
||||
}
|
||||
}
|
||||
bool result = getAubStream()->addComment(str.str().c_str());
|
||||
this->flatBatchBufferHelper->getPatchInfoCollection().clear();
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ostringstream allocationStr;
|
||||
allocationStr << "AllocationsList" << std::endl;
|
||||
for (auto &element : allocationsMap) {
|
||||
allocationStr << std::hex << element.first << ";" << element.second << std::endl;
|
||||
}
|
||||
result = getAubStream()->addComment(allocationStr.str().c_str());
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits) {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
if (batchBufferSize) {
|
||||
hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto csTraits = this->getCsTraits(osContext->getEngineType());
|
||||
|
||||
{
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << batchBuffer;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physBatchBuffer = ppgtt->map(static_cast<uintptr_t>(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank);
|
||||
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
|
||||
AUB::reserveAddressPPGTT(*stream, static_cast<uintptr_t>(batchBufferGpuAddress), batchBufferSize, physBatchBuffer,
|
||||
entryBits, aubHelperHw);
|
||||
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
physBatchBuffer,
|
||||
batchBuffer,
|
||||
batchBufferSize,
|
||||
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary),
|
||||
AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
addGUCStartMessage(static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(batchBuffer)));
|
||||
addPatchInfoComments();
|
||||
}
|
||||
|
||||
// Add a batch buffer start to the ring buffer
|
||||
auto previousTail = engineInfo.tailRingBuffer;
|
||||
{
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
|
||||
typedef typename GfxFamily::MI_NOOP MI_NOOP;
|
||||
|
||||
auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer);
|
||||
auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer);
|
||||
|
||||
auto sizeNeeded =
|
||||
sizeof(MI_BATCH_BUFFER_START) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM);
|
||||
|
||||
auto tailAlignment = sizeof(uint64_t);
|
||||
sizeNeeded = alignUp(sizeNeeded, tailAlignment);
|
||||
|
||||
if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) {
|
||||
// Pad the remaining ring with NOOPs
|
||||
auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer;
|
||||
memset(pTail, 0, sizeToWrap);
|
||||
// write remaining ring
|
||||
|
||||
auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
physDumpStart,
|
||||
pTail,
|
||||
sizeToWrap,
|
||||
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer),
|
||||
AubMemDump::DataTypeHintValues::TraceCommandBuffer);
|
||||
previousTail = 0;
|
||||
engineInfo.tailRingBuffer = 0;
|
||||
pTail = engineInfo.pRingBuffer;
|
||||
} else if (engineInfo.tailRingBuffer == 0) {
|
||||
// Add a LRI if this is our first submission
|
||||
auto lri = GfxFamily::cmdInitLoadRegisterImm;
|
||||
lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244));
|
||||
lri.setDataDword(0x00010000);
|
||||
*(MI_LOAD_REGISTER_IMM *)pTail = lri;
|
||||
pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1;
|
||||
}
|
||||
|
||||
// Add our BBS
|
||||
auto bbs = GfxFamily::cmdInitBatchBufferStart;
|
||||
bbs.setBatchBufferStartAddressGraphicsaddress472(static_cast<uint64_t>(batchBufferGpuAddress));
|
||||
bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
|
||||
*(MI_BATCH_BUFFER_START *)pTail = bbs;
|
||||
pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1;
|
||||
|
||||
// Compute our new ring tail.
|
||||
engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer);
|
||||
|
||||
// Add NOOPs as needed as our tail needs to be aligned
|
||||
while (engineInfo.tailRingBuffer % tailAlignment) {
|
||||
*(MI_NOOP *)pTail = GfxFamily::cmdInitNoop;
|
||||
pTail = ((MI_NOOP *)pTail) + 1;
|
||||
engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer);
|
||||
}
|
||||
UNRECOVERABLE_IF((engineInfo.tailRingBuffer % tailAlignment) != 0);
|
||||
|
||||
// Only dump the new commands
|
||||
auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail);
|
||||
auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail);
|
||||
auto dumpLength = engineInfo.tailRingBuffer - previousTail;
|
||||
|
||||
// write ring
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << ggttDumpStart;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
physDumpStart,
|
||||
dumpStart,
|
||||
dumpLength,
|
||||
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer),
|
||||
AubMemDump::DataTypeHintValues::TraceCommandBuffer);
|
||||
|
||||
// update the ring mmio tail in the LRCA
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt());
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
physLRCA + 0x101c,
|
||||
&engineInfo.tailRingBuffer,
|
||||
sizeof(engineInfo.tailRingBuffer),
|
||||
this->getAddressSpace(csTraits.aubHintLRCA));
|
||||
|
||||
DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer);
|
||||
}
|
||||
|
||||
// Submit our execlist by submitting to the execlist submit ports
|
||||
{
|
||||
typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
|
||||
|
||||
contextDescriptor.sData.Valid = true;
|
||||
contextDescriptor.sData.ForcePageDirRestore = false;
|
||||
contextDescriptor.sData.ForceRestore = false;
|
||||
contextDescriptor.sData.Legacy = true;
|
||||
contextDescriptor.sData.FaultSupport = 0;
|
||||
contextDescriptor.sData.PrivilegeAccessOrPPGTT = true;
|
||||
contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32;
|
||||
|
||||
auto ggttLRCA = engineInfo.ggttLRCA;
|
||||
contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096;
|
||||
contextDescriptor.sData.ContextID = 0;
|
||||
|
||||
this->submitLRCA(contextDescriptor);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion() {
|
||||
const auto lock = std::unique_lock<decltype(pollForCompletionLock)>{pollForCompletionLock};
|
||||
if (this->pollForCompletionTaskCount == this->latestSentTaskCount) {
|
||||
return;
|
||||
}
|
||||
pollForCompletionImpl();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
|
||||
this->pollForCompletionTaskCount = this->latestSentTaskCount;
|
||||
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
if (!subCaptureManager->isSubCaptureEnabled()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
hardwareContextController->pollForCompletion();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase;
|
||||
const bool pollNotEqual = false;
|
||||
const uint32_t mask = getMaskAndValueForPollForCompletion();
|
||||
const uint32_t value = mask;
|
||||
stream->registerPoll(
|
||||
AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS
|
||||
mask,
|
||||
value,
|
||||
pollNotEqual,
|
||||
AubMemDump::CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
|
||||
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
|
||||
pollForCompletion();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::makeResidentExternal(AllocationView &allocationView) {
|
||||
externalAllocations.push_back(allocationView);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::makeNonResidentExternal(uint64_t gpuAddress) {
|
||||
for (auto it = externalAllocations.begin(); it != externalAllocations.end(); it++) {
|
||||
if (it->first == gpuAddress) {
|
||||
externalAllocations.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) {
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << gpuAddress << " end address: " << gpuAddress + size << " cpu address: " << cpuAddress << " size: " << std::dec << size;
|
||||
getAubStream()->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
|
||||
|
||||
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) {
|
||||
AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset, entryBits,
|
||||
aubHelperHw);
|
||||
};
|
||||
|
||||
ppgtt->pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, entryBits, walker, memoryBank);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
|
||||
if (!this->isAubWritable(gfxAllocation)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ownsLock = !gfxAllocation.isLocked();
|
||||
uint64_t gpuAddress;
|
||||
void *cpuAddress;
|
||||
size_t size;
|
||||
if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (aubManager) {
|
||||
this->writeMemoryWithAubManager(gfxAllocation);
|
||||
} else {
|
||||
writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation));
|
||||
}
|
||||
|
||||
streamLocked.unlock();
|
||||
|
||||
if (gfxAllocation.isLocked() && ownsLock) {
|
||||
this->getMemoryManager()->unlockResource(&gfxAllocation);
|
||||
}
|
||||
|
||||
if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) {
|
||||
this->setAubWritable(false, gfxAllocation);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(AllocationView &allocationView) {
|
||||
GraphicsAllocation gfxAllocation(this->rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast<void *>(allocationView.first), allocationView.first, 0llu, allocationView.second, MemoryPool::MemoryNull, 0u);
|
||||
return writeMemory(gfxAllocation);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::writeMMIO(uint32_t offset, uint32_t value) {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
hardwareContextController->writeMMIO(offset, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) {
|
||||
if (hardwareContextController) {
|
||||
//Add support for expectMMIO to AubStream
|
||||
return;
|
||||
}
|
||||
this->getAubStream()->expectMMIO(mmioRegister, expectedValue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress, const void *srcAddress,
|
||||
size_t length, uint32_t compareOperation) {
|
||||
pollForCompletion();
|
||||
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
hardwareContextController->expectMemory(reinterpret_cast<uint64_t>(gfxAddress), srcAddress, length, compareOperation);
|
||||
}
|
||||
|
||||
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) {
|
||||
UNRECOVERABLE_IF(offset > length);
|
||||
|
||||
this->getAubStream()->expectMemory(physAddress,
|
||||
ptrOffset(srcAddress, offset),
|
||||
size,
|
||||
this->getAddressSpaceFromPTEBits(entryBits),
|
||||
compareOperation);
|
||||
};
|
||||
|
||||
this->ppgtt->pageWalk(reinterpret_cast<uintptr_t>(gfxAddress), length, 0, PageTableEntry::nonValidBits, walker, MemoryBanks::BankNotSpecified);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) {
|
||||
if (subCaptureManager->isSubCaptureMode()) {
|
||||
if (!subCaptureManager->isSubCaptureEnabled()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &externalAllocation : externalAllocations) {
|
||||
if (!writeMemory(externalAllocation)) {
|
||||
DEBUG_BREAK_IF(externalAllocation.second != 0);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &gfxAllocation : allocationsForResidency) {
|
||||
if (dumpAubNonWritable) {
|
||||
this->setAubWritable(true, *gfxAllocation);
|
||||
}
|
||||
if (!writeMemory(*gfxAllocation)) {
|
||||
DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) ||
|
||||
!this->isAubWritable(*gfxAllocation)));
|
||||
}
|
||||
gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||
}
|
||||
|
||||
dumpAubNonWritable = false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::dumpAllocation(GraphicsAllocation &gfxAllocation) {
|
||||
bool isBcsCsr = EngineHelpers::isBcs(this->osContext->getEngineType());
|
||||
|
||||
if (isBcsCsr != gfxAllocation.getAubInfo().bcsDumpOnly) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) {
|
||||
if (!gfxAllocation.isAllocDumpable()) {
|
||||
return;
|
||||
}
|
||||
gfxAllocation.setAllocDumpable(false, isBcsCsr);
|
||||
}
|
||||
|
||||
auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation);
|
||||
if (dumpFormat > AubAllocDump::DumpFormat::NONE) {
|
||||
pollForCompletion();
|
||||
}
|
||||
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
|
||||
if (hardwareContextController) {
|
||||
auto surfaceInfo = std::unique_ptr<aub_stream::SurfaceInfo>(AubAllocDump::getDumpSurfaceInfo<GfxFamily>(gfxAllocation, dumpFormat));
|
||||
if (nullptr != surfaceInfo) {
|
||||
hardwareContextController->dumpSurface(*surfaceInfo.get());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
AubAllocDump::dumpAllocation<GfxFamily>(dumpFormat, gfxAllocation, getAubStream(), getDumpHandle());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
AubSubCaptureStatus AUBCommandStreamReceiverHw<GfxFamily>::checkAndActivateAubSubCapture(const std::string &kernelName) {
|
||||
auto status = subCaptureManager->checkAndActivateSubCapture(kernelName);
|
||||
if (status.isActive) {
|
||||
auto &subCaptureFile = subCaptureManager->getSubCaptureFileName(kernelName);
|
||||
auto isReopened = reopenFile(subCaptureFile);
|
||||
if (isReopened) {
|
||||
dumpAubNonWritable = true;
|
||||
}
|
||||
}
|
||||
if (this->standalone) {
|
||||
this->programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::addAubComment(const char *message) {
|
||||
auto streamLocked = getAubStream()->lockStream();
|
||||
if (aubManager) {
|
||||
aubManager->addComment(message);
|
||||
return;
|
||||
}
|
||||
getAubStream()->addComment(message);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getDumpHandle() {
|
||||
return hashPtrToU32(this);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::addGUCStartMessage(uint64_t batchBufferAddress) {
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
|
||||
|
||||
auto bufferSize = sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START);
|
||||
AubHelperHw<GfxFamily> aubHelperHw(this->isLocalMemoryEnabled());
|
||||
|
||||
std::unique_ptr<void, std::function<void(void *)>> buffer(this->getMemoryManager()->alignedMallocWrapper(bufferSize, MemoryConstants::pageSize), [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); });
|
||||
LinearStream linearStream(buffer.get(), bufferSize);
|
||||
|
||||
uint32_t *header = static_cast<uint32_t *>(linearStream.getSpace(sizeof(uint32_t)));
|
||||
*header = getGUCWorkQueueItemHeader();
|
||||
|
||||
MI_BATCH_BUFFER_START *miBatchBufferStartSpace = linearStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
|
||||
DEBUG_BREAK_IF(bufferSize != linearStream.getUsed());
|
||||
auto miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart;
|
||||
miBatchBufferStart.setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get()));
|
||||
miBatchBufferStart.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
|
||||
*miBatchBufferStartSpace = miBatchBufferStart;
|
||||
|
||||
auto physBufferAddres = ppgtt->map(reinterpret_cast<uintptr_t>(buffer.get()), bufferSize,
|
||||
this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()),
|
||||
MemoryBanks::MainBank);
|
||||
|
||||
AUB::reserveAddressPPGTT(*stream, reinterpret_cast<uintptr_t>(buffer.get()), bufferSize, physBufferAddres,
|
||||
this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()),
|
||||
aubHelperHw);
|
||||
|
||||
AUB::addMemoryWrite(
|
||||
*stream,
|
||||
physBufferAddres,
|
||||
buffer.get(),
|
||||
bufferSize,
|
||||
this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype));
|
||||
|
||||
PatchInfoData patchInfoData(batchBufferAddress, 0u, PatchInfoAllocationType::Default, reinterpret_cast<uintptr_t>(buffer.get()), sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START) - sizeof(uint64_t), PatchInfoAllocationType::GUCStartMessage);
|
||||
this->flatBatchBufferHelper->setPatchInfoData(patchInfoData);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
|
||||
return 0x100;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken(uint32_t dumpHandle) {
|
||||
// Some simulator versions don't support adding the context token.
|
||||
// This hook allows specialization for those that do.
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getGUCWorkQueueItemHeader() {
|
||||
uint32_t GUCWorkQueueItemHeader = 0x00030001;
|
||||
return GUCWorkQueueItemHeader;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
|
||||
return AubMemDump::AddressSpaceValues::TraceNonlocal;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
|
||||
return 0x00008000;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken(uint32_t dumpHandle) {
|
||||
AUB::createContext(*stream, dumpHandle);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getGUCWorkQueueItemHeader() {
|
||||
if (EngineHelpers::isCcs(osContext->getEngineType())) {
|
||||
return 0x00030401;
|
||||
}
|
||||
return 0x00030001;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
|
||||
if (entryBits & BIT(PageTableEntry::localMemoryBit)) {
|
||||
return AubMemDump::AddressSpaceValues::TraceLocal;
|
||||
}
|
||||
return AubMemDump::AddressSpaceValues::TraceNonlocal;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
|
||||
#include "aub_mapper.h"
|
||||
#include "third_party/aub_stream/headers/hardware_context.h"
|
||||
|
||||
namespace aub_stream {
|
||||
class AubManager;
|
||||
struct AubStream;
|
||||
} // namespace aub_stream
|
||||
|
||||
namespace NEO {
|
||||
class AddressMapper;
|
||||
class GraphicsAllocation;
|
||||
class HardwareContextController;
|
||||
template <typename GfxFamily>
|
||||
class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw<GfxFamily> {
|
||||
protected:
|
||||
using CommandStreamReceiverHw<GfxFamily>::osContext;
|
||||
using AUB = typename AUBFamilyMapper<GfxFamily>::AUB;
|
||||
using MiContextDescriptorReg = typename AUB::MiContextDescriptorReg;
|
||||
|
||||
bool getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const;
|
||||
void freeEngineInfo(AddressMapper >tRemap);
|
||||
MOCKABLE_VIRTUAL uint32_t getDeviceIndex() const;
|
||||
|
||||
public:
|
||||
CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
~CommandStreamReceiverSimulatedCommonHw() override;
|
||||
uint64_t getGTTBits() const {
|
||||
return 0u;
|
||||
}
|
||||
void initGlobalMMIO();
|
||||
void initAdditionalMMIO();
|
||||
uint64_t getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation);
|
||||
void getGTTData(void *memory, AubGTTData &data);
|
||||
uint32_t getMemoryBankForGtt() const;
|
||||
static const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType);
|
||||
void initEngineMMIO();
|
||||
void submitLRCA(const MiContextDescriptorReg &contextDescriptor);
|
||||
void setupContext(OsContext &osContext) override;
|
||||
virtual bool expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length);
|
||||
virtual bool expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length);
|
||||
virtual bool expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length);
|
||||
virtual void pollForCompletion() = 0;
|
||||
virtual void pollForCompletionImpl(){};
|
||||
virtual bool writeMemory(GraphicsAllocation &gfxAllocation) = 0;
|
||||
virtual void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) = 0;
|
||||
virtual void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) = 0;
|
||||
virtual void writeMMIO(uint32_t offset, uint32_t value) = 0;
|
||||
|
||||
virtual void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0;
|
||||
virtual bool isAubWritable(GraphicsAllocation &graphicsAllocation) const = 0;
|
||||
virtual void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0;
|
||||
virtual bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const = 0;
|
||||
|
||||
virtual void dumpAllocation(GraphicsAllocation &gfxAllocation) = 0;
|
||||
|
||||
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
|
||||
|
||||
size_t getPreferredTagPoolSize() const override { return 1; }
|
||||
|
||||
aub_stream::AubManager *aubManager = nullptr;
|
||||
std::unique_ptr<HardwareContextController> hardwareContextController;
|
||||
|
||||
struct EngineInfo {
|
||||
void *pLRCA;
|
||||
uint32_t ggttLRCA;
|
||||
void *pGlobalHWStatusPage;
|
||||
uint32_t ggttHWSP;
|
||||
void *pRingBuffer;
|
||||
uint32_t ggttRingBuffer;
|
||||
size_t sizeRingBuffer;
|
||||
uint32_t tailRingBuffer;
|
||||
} engineInfo = {};
|
||||
|
||||
AubMemDump::AubStream *stream;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub/aub_helper.h"
|
||||
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/resource_info.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/memory_manager/address_mapper.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "third_party/aub_stream/headers/aub_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO() {
|
||||
if (DebugManager.flags.AubDumpAddMmioRegistersList.get() != "unk") {
|
||||
auto mmioList = AubHelper::getAdditionalMmioList();
|
||||
for (auto &mmioPair : mmioList) {
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::setupContext(OsContext &osContext) {
|
||||
CommandStreamReceiverHw<GfxFamily>::setupContext(osContext);
|
||||
|
||||
auto engineType = osContext.getEngineType();
|
||||
uint32_t flags = 0;
|
||||
getCsTraits(engineType).setContextSaveRestoreFlags(flags);
|
||||
|
||||
if (aubManager && !osContext.isLowPriority()) {
|
||||
hardwareContextController = std::make_unique<HardwareContextController>(*aubManager, osContext, flags);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const {
|
||||
cpuAddress = graphicsAllocation.getUnderlyingBuffer();
|
||||
gpuAddress = GmmHelper::decanonize(graphicsAllocation.getGpuAddress());
|
||||
size = graphicsAllocation.getUnderlyingBufferSize();
|
||||
auto gmm = graphicsAllocation.getDefaultGmm();
|
||||
if (gmm && gmm->isCompressionEnabled) {
|
||||
size = gmm->gmmResourceInfo->getSizeAllocation();
|
||||
}
|
||||
|
||||
if (size == 0)
|
||||
return false;
|
||||
|
||||
if (cpuAddress == nullptr && graphicsAllocation.isAllocationLockable()) {
|
||||
cpuAddress = this->getMemoryManager()->lockResource(&graphicsAllocation);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
return this->expectMemory(gfxAddress, srcAddress, length,
|
||||
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual);
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
return this->expectMemory(gfxAddress, srcAddress, length,
|
||||
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
bool CommandStreamReceiverSimulatedCommonHw<GfxFamily>::expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
return this->expectMemory(gfxAddress, srcAddress, length,
|
||||
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::freeEngineInfo(AddressMapper >tRemap) {
|
||||
alignedFree(engineInfo.pLRCA);
|
||||
gttRemap.unmap(engineInfo.pLRCA);
|
||||
engineInfo.pLRCA = nullptr;
|
||||
|
||||
alignedFree(engineInfo.pGlobalHWStatusPage);
|
||||
gttRemap.unmap(engineInfo.pGlobalHWStatusPage);
|
||||
engineInfo.pGlobalHWStatusPage = nullptr;
|
||||
|
||||
alignedFree(engineInfo.pRingBuffer);
|
||||
gttRemap.unmap(engineInfo.pRingBuffer);
|
||||
engineInfo.pRingBuffer = nullptr;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
|
||||
if (gfxAllocation.isResident(osContext->getContextId())) {
|
||||
dumpAllocation(gfxAllocation);
|
||||
this->getEvictionAllocations().push_back(&gfxAllocation);
|
||||
gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getDeviceIndex() const {
|
||||
return osContext->getDeviceBitfield().any() ? static_cast<uint32_t>(Math::log2(static_cast<uint32_t>(osContext->getDeviceBitfield().to_ulong()))) : 0u;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiverSimulatedCommonHw<GfxFamily>::CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield)
|
||||
: CommandStreamReceiverHw<GfxFamily>(executionEnvironment, rootDeviceIndex, deviceBitfield) {
|
||||
this->useNewResourceImplicitFlush = false;
|
||||
this->useGpuIdleImplicitFlush = false;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiverSimulatedCommonHw<GfxFamily>::~CommandStreamReceiverSimulatedCommonHw() = default;
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initGlobalMMIO() {
|
||||
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) {
|
||||
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::userSupervisorBit);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getGTTData(void *memory, AubGTTData &data) {
|
||||
data.present = true;
|
||||
data.localMemory = false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getMemoryBankForGtt() const {
|
||||
return MemoryBanks::getBank(getDeviceIndex());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getCsTraits(aub_stream::EngineType engineType) {
|
||||
return *AUBFamilyMapper<GfxFamily>::csTraits[engineType];
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initEngineMMIO() {
|
||||
auto mmioList = AUBFamilyMapper<GfxFamily>::perEngineMMIO[osContext->getEngineType()];
|
||||
|
||||
DEBUG_BREAK_IF(!mmioList);
|
||||
for (auto &mmioPair : *mmioList) {
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::submitLRCA(const MiContextDescriptorReg &contextDescriptor) {
|
||||
auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase;
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0);
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0);
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]);
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
#include "shared/source/memory_manager/memory_pool.h"
|
||||
#include "shared/source/memory_manager/physical_address_allocator.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initGlobalMMIO() {
|
||||
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
|
||||
if (this->localMemoryEnabled) {
|
||||
MMIOPair guCntl = {0x00101010, 0x00000080}; //GU_CNTL
|
||||
stream->writeMMIO(guCntl.first, guCntl.second);
|
||||
|
||||
MMIOPair lmemCfg = {0x0000cf58, 0x80000000}; //LMEM_CFG
|
||||
stream->writeMMIO(lmemCfg.first, lmemCfg.second);
|
||||
|
||||
MMIOPair tileAddrRange[] = {{0x00004900, 0x0001},
|
||||
{0x00004904, 0x0001},
|
||||
{0x00004908, 0x0001},
|
||||
{0x0000490c, 0x0001}}; //XEHP_TILE_ADDR_RANGE
|
||||
|
||||
const uint32_t numberOfTiles = 4;
|
||||
const uint32_t localMemorySizeGB = static_cast<uint32_t>(AubHelper::getMemBankSize(&this->peekHwInfo()) / MemoryConstants::gigaByte);
|
||||
|
||||
uint32_t localMemoryBaseAddressInGB = 0x0;
|
||||
|
||||
for (uint32_t i = 0; i < numberOfTiles; i++) {
|
||||
tileAddrRange[i].second |= localMemoryBaseAddressInGB << 1;
|
||||
tileAddrRange[i].second |= localMemorySizeGB << 8;
|
||||
stream->writeMMIO(tileAddrRange[i].first, tileAddrRange[i].second);
|
||||
|
||||
localMemoryBaseAddressInGB += localMemorySizeGB;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) {
|
||||
if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get() ||
|
||||
(gfxAllocation && gfxAllocation->getMemoryPool() == MemoryPool::LocalMemory)) {
|
||||
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit);
|
||||
}
|
||||
return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getGTTData(void *memory, AubGTTData &data) {
|
||||
data.present = true;
|
||||
data.localMemory = this->localMemoryEnabled;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getMemoryBankForGtt() const {
|
||||
auto deviceIndex = getDeviceIndex();
|
||||
if (this->localMemoryEnabled) {
|
||||
return MemoryBanks::getBankForLocalMemory(deviceIndex);
|
||||
}
|
||||
return MemoryBanks::getBank(deviceIndex);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getCsTraits(aub_stream::EngineType engineType) {
|
||||
return *AUBFamilyMapper<GfxFamily>::csTraits[engineType];
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initEngineMMIO() {
|
||||
auto mmioList = AUBFamilyMapper<GfxFamily>::perEngineMMIO[osContext->getEngineType()];
|
||||
DEBUG_BREAK_IF(!mmioList);
|
||||
for (auto &mmioPair : *mmioList) {
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverSimulatedCommonHw<GfxFamily>::submitLRCA(const MiContextDescriptorReg &contextDescriptor) {
|
||||
auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase;
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]);
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]);
|
||||
|
||||
// Load our new exec list
|
||||
stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/aub/aub_helper.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
#include "shared/source/memory_manager/memory_pool.h"
|
||||
#include "shared/source/memory_manager/physical_address_allocator.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "aub_mem_dump.h"
|
||||
#include "third_party/aub_stream/headers/allocation_params.h"
|
||||
#include "third_party/aub_stream/headers/aub_manager.h"
|
||||
#include "third_party/aub_stream/headers/hardware_context.h"
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
template <typename GfxFamily>
|
||||
class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCommonHw<GfxFamily> {
|
||||
protected:
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::CommandStreamReceiverSimulatedCommonHw;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::osContext;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::getDeviceIndex;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::aubManager;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::hardwareContextController;
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::writeMemory;
|
||||
|
||||
public:
|
||||
uint32_t getMemoryBank(GraphicsAllocation *allocation) const {
|
||||
if (aubManager) {
|
||||
return static_cast<uint32_t>(getMemoryBanksBitfield(allocation).to_ulong());
|
||||
}
|
||||
|
||||
uint32_t deviceIndexChosen = allocation->storageInfo.memoryBanks.any()
|
||||
? getDeviceIndexFromStorageInfo(allocation->storageInfo)
|
||||
: getDeviceIndex();
|
||||
|
||||
if (allocation->getMemoryPool() == MemoryPool::LocalMemory) {
|
||||
return MemoryBanks::getBankForLocalMemory(deviceIndexChosen);
|
||||
}
|
||||
return MemoryBanks::getBank(deviceIndexChosen);
|
||||
}
|
||||
|
||||
static uint32_t getDeviceIndexFromStorageInfo(StorageInfo storageInfo) {
|
||||
uint32_t deviceIndex = 0;
|
||||
while (!storageInfo.memoryBanks.test(0)) {
|
||||
storageInfo.memoryBanks >>= 1;
|
||||
deviceIndex++;
|
||||
}
|
||||
return deviceIndex;
|
||||
}
|
||||
|
||||
DeviceBitfield getMemoryBanksBitfield(GraphicsAllocation *allocation) const {
|
||||
if (allocation->getMemoryPool() == MemoryPool::LocalMemory) {
|
||||
if (allocation->storageInfo.memoryBanks.any()) {
|
||||
if (allocation->storageInfo.cloningOfPageTables || this->isMultiOsContextCapable()) {
|
||||
return allocation->storageInfo.memoryBanks;
|
||||
}
|
||||
}
|
||||
return this->osContext->getDeviceBitfield();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
int getAddressSpace(int hint) {
|
||||
bool traceLocalAllowed = false;
|
||||
switch (hint) {
|
||||
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs:
|
||||
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs:
|
||||
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs:
|
||||
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs:
|
||||
case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs:
|
||||
case AubMemDump::DataTypeHintValues::TraceCommandBuffer:
|
||||
traceLocalAllowed = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if ((traceLocalAllowed && this->localMemoryEnabled) || DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
|
||||
return AubMemDump::AddressSpaceValues::TraceLocal;
|
||||
}
|
||||
return AubMemDump::AddressSpaceValues::TraceNonlocal;
|
||||
}
|
||||
PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) {
|
||||
const auto bankSize = AubHelper::getMemBankSize(hwInfo);
|
||||
const auto devicesCount = HwHelper::getSubDevicesCount(hwInfo);
|
||||
return new PhysicalAddressAllocatorHw<GfxFamily>(bankSize, devicesCount);
|
||||
}
|
||||
void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override {
|
||||
uint64_t gpuAddress;
|
||||
void *cpuAddress;
|
||||
size_t size;
|
||||
this->getParametersForWriteMemory(graphicsAllocation, gpuAddress, cpuAddress, size);
|
||||
int hint = graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::COMMAND_BUFFER
|
||||
? AubMemDump::DataTypeHintValues::TraceBatchBuffer
|
||||
: AubMemDump::DataTypeHintValues::TraceNotype;
|
||||
|
||||
aub_stream::AllocationParams allocationParams(gpuAddress, cpuAddress, size, this->getMemoryBank(&graphicsAllocation),
|
||||
hint, graphicsAllocation.getUsedPageSize());
|
||||
|
||||
auto gmm = graphicsAllocation.getDefaultGmm();
|
||||
|
||||
allocationParams.additionalParams.compressionEnabled = gmm ? gmm->isCompressionEnabled : false;
|
||||
|
||||
if (graphicsAllocation.storageInfo.cloningOfPageTables || !graphicsAllocation.isAllocatedInLocalMemoryPool()) {
|
||||
aubManager->writeMemory2(allocationParams);
|
||||
} else {
|
||||
hardwareContextController->writeMemory(allocationParams);
|
||||
}
|
||||
}
|
||||
|
||||
void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) override {
|
||||
auto bank = getMemoryBank(&graphicsAllocation);
|
||||
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
|
||||
bank = GraphicsAllocation::defaultBank;
|
||||
}
|
||||
|
||||
graphicsAllocation.setAubWritable(writable, bank);
|
||||
}
|
||||
|
||||
bool isAubWritable(GraphicsAllocation &graphicsAllocation) const override {
|
||||
auto bank = getMemoryBank(&graphicsAllocation);
|
||||
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
|
||||
bank = GraphicsAllocation::defaultBank;
|
||||
}
|
||||
return graphicsAllocation.isAubWritable(bank);
|
||||
}
|
||||
|
||||
void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) override {
|
||||
auto bank = getMemoryBank(&graphicsAllocation);
|
||||
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
|
||||
bank = GraphicsAllocation::defaultBank;
|
||||
}
|
||||
graphicsAllocation.setTbxWritable(writable, bank);
|
||||
}
|
||||
|
||||
bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const override {
|
||||
auto bank = getMemoryBank(&graphicsAllocation);
|
||||
if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) {
|
||||
bank = GraphicsAllocation::defaultBank;
|
||||
}
|
||||
return graphicsAllocation.isTbxWritable(bank);
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename BaseCSR>
|
||||
class CommandStreamReceiverWithAUBDump : public BaseCSR {
|
||||
protected:
|
||||
using BaseCSR::osContext;
|
||||
|
||||
public:
|
||||
CommandStreamReceiverWithAUBDump(const std::string &baseName,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
CommandStreamReceiverWithAUBDump(const CommandStreamReceiverWithAUBDump &) = delete;
|
||||
CommandStreamReceiverWithAUBDump &operator=(const CommandStreamReceiverWithAUBDump &) = delete;
|
||||
|
||||
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
|
||||
|
||||
AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override;
|
||||
void setupContext(OsContext &osContext) override;
|
||||
|
||||
CommandStreamReceiverType getType() override {
|
||||
if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) {
|
||||
return CommandStreamReceiverType::CSR_TBX_WITH_AUB;
|
||||
}
|
||||
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
|
||||
}
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode,
|
||||
uint32_t partitionCount, uint32_t offsetSize) override;
|
||||
|
||||
size_t getPreferredTagPoolSize() const override { return 1; }
|
||||
|
||||
void addAubComment(const char *comment) override;
|
||||
|
||||
std::unique_ptr<CommandStreamReceiver> aubCSR;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub/aub_center.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
|
||||
|
||||
template <typename BaseCSR>
|
||||
CommandStreamReceiverWithAUBDump<BaseCSR>::CommandStreamReceiverWithAUBDump(const std::string &baseName,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield)
|
||||
: BaseCSR(executionEnvironment, rootDeviceIndex, deviceBitfield) {
|
||||
bool isAubManager = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter && executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter->getAubManager();
|
||||
bool isTbxMode = CommandStreamReceiverType::CSR_TBX == BaseCSR::getType();
|
||||
bool createAubCsr = (isAubManager && isTbxMode) ? false : true;
|
||||
if (createAubCsr) {
|
||||
aubCSR.reset(AUBCommandStreamReceiver::create(baseName, false, executionEnvironment, rootDeviceIndex, deviceBitfield));
|
||||
UNRECOVERABLE_IF(!aubCSR->initializeTagAllocation());
|
||||
*aubCSR->getTagAddress() = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
bool CommandStreamReceiverWithAUBDump<BaseCSR>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
|
||||
if (aubCSR) {
|
||||
aubCSR->flush(batchBuffer, allocationsForResidency);
|
||||
aubCSR->setLatestSentTaskCount(BaseCSR::peekLatestSentTaskCount());
|
||||
}
|
||||
return BaseCSR::flush(batchBuffer, allocationsForResidency);
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
void CommandStreamReceiverWithAUBDump<BaseCSR>::makeNonResident(GraphicsAllocation &gfxAllocation) {
|
||||
auto residencyTaskCount = gfxAllocation.getResidencyTaskCount(this->osContext->getContextId());
|
||||
BaseCSR::makeNonResident(gfxAllocation);
|
||||
if (aubCSR) {
|
||||
gfxAllocation.updateResidencyTaskCount(residencyTaskCount, this->osContext->getContextId());
|
||||
aubCSR->makeNonResident(gfxAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
AubSubCaptureStatus CommandStreamReceiverWithAUBDump<BaseCSR>::checkAndActivateAubSubCapture(const std::string &kernelName) {
|
||||
auto status = BaseCSR::checkAndActivateAubSubCapture(kernelName);
|
||||
if (aubCSR) {
|
||||
status = aubCSR->checkAndActivateAubSubCapture(kernelName);
|
||||
}
|
||||
BaseCSR::programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive);
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContext) {
|
||||
BaseCSR::setupContext(osContext);
|
||||
if (aubCSR) {
|
||||
aubCSR->setupContext(osContext);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode,
|
||||
uint32_t partitionCount, uint32_t offsetSize) {
|
||||
if (aubCSR) {
|
||||
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
|
||||
}
|
||||
|
||||
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
void CommandStreamReceiverWithAUBDump<BaseCSR>::addAubComment(const char *comment) {
|
||||
if (aubCSR) {
|
||||
aubCSR->addAubComment(comment);
|
||||
}
|
||||
BaseCSR::addAubComment(comment);
|
||||
}
|
||||
} // namespace NEO
|
||||
62
shared/source/command_stream/create_command_stream_impl.cpp
Normal file
62
shared/source/command_stream/create_command_stream_impl.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
|
||||
|
||||
CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield) {
|
||||
auto funcCreate = commandStreamReceiverFactory[executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily];
|
||||
if (funcCreate == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
CommandStreamReceiver *commandStreamReceiver = nullptr;
|
||||
int32_t csr = DebugManager.flags.SetCommandStreamReceiver.get();
|
||||
if (csr < 0) {
|
||||
csr = CommandStreamReceiverType::CSR_HW;
|
||||
}
|
||||
|
||||
switch (csr) {
|
||||
case CSR_HW:
|
||||
commandStreamReceiver = funcCreate(false, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
break;
|
||||
case CSR_AUB:
|
||||
commandStreamReceiver = AUBCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
break;
|
||||
case CSR_TBX:
|
||||
commandStreamReceiver = TbxCommandStreamReceiver::create("", false, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
break;
|
||||
case CSR_HW_WITH_AUB:
|
||||
commandStreamReceiver = funcCreate(true, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
break;
|
||||
case CSR_TBX_WITH_AUB:
|
||||
commandStreamReceiver = TbxCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return commandStreamReceiver;
|
||||
}
|
||||
|
||||
bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment) {
|
||||
if (DeviceFactory::isHwModeSelected()) {
|
||||
return DeviceFactory::prepareDeviceEnvironments(executionEnvironment);
|
||||
}
|
||||
return DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
17
shared/source/command_stream/create_command_stream_impl.h
Normal file
17
shared/source/command_stream/create_command_stream_impl.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
namespace NEO {
|
||||
class ExecutionEnvironment;
|
||||
extern CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
extern bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment);
|
||||
} // namespace NEO
|
||||
@@ -6,13 +6,12 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver.h"
|
||||
#include "shared/source/memory_manager/address_mapper.h"
|
||||
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "shared/source/memory_manager/page_table.h"
|
||||
|
||||
#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h"
|
||||
|
||||
#include "aub_mapper.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
@@ -11,13 +11,16 @@
|
||||
#include "shared/source/aub_mem_dump/aub_alloc_dump.inl"
|
||||
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
@@ -27,10 +30,6 @@
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "opencl/source/helpers/hardware_context_controller.h"
|
||||
#include "opencl/source/os_interface/ocl_reg_path.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace NEO {
|
||||
@@ -180,7 +179,7 @@ CommandStreamReceiver *TbxCommandStreamReceiverHw<GfxFamily>::create(const std::
|
||||
UNRECOVERABLE_IF(nullptr == subCaptureCommon);
|
||||
|
||||
if (subCaptureCommon->subCaptureMode > AubSubCaptureManager::SubCaptureMode::Off) {
|
||||
csr->subCaptureManager = std::make_unique<AubSubCaptureManager>(fullName, *subCaptureCommon, oclRegPath);
|
||||
csr->subCaptureManager = std::make_unique<AubSubCaptureManager>(fullName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath());
|
||||
}
|
||||
|
||||
if (csr->aubManager) {
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
uint32_t TbxCommandStreamReceiverHw<Family>::getMaskAndValueForPollForCompletion() const {
|
||||
return 0x80;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TbxCommandStreamReceiverHw<Family>::getpollNotEqualValueForPollForCompletion() const {
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user