mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Add patch info comments to AUB dump
Collect patching information and add as comments to AUB dump. Change-Id: Ib7c903a2589d68b6e3e614c1774c7cd5a000c29f
This commit is contained in:
@@ -121,6 +121,7 @@ struct AubStream {
|
||||
virtual void writeGTT(uint32_t offset, uint64_t entry) = 0;
|
||||
virtual void writeMMIO(uint32_t offset, uint32_t value) = 0;
|
||||
virtual void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) = 0;
|
||||
virtual ~AubStream() = default;
|
||||
};
|
||||
|
||||
struct AubFileStream : public AubStream {
|
||||
@@ -134,8 +135,8 @@ struct AubFileStream : public AubStream {
|
||||
void writeGTT(uint32_t offset, uint64_t entry) override;
|
||||
void writeMMIO(uint32_t offset, uint32_t value) override;
|
||||
void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override;
|
||||
void expectMemory(uint64_t physAddress, const void *memory, size_t size);
|
||||
void addComment(const char *message);
|
||||
MOCKABLE_VIRTUAL void expectMemory(uint64_t physAddress, const void *memory, size_t size);
|
||||
MOCKABLE_VIRTUAL bool addComment(const char *message);
|
||||
|
||||
std::ofstream fileHandle;
|
||||
};
|
||||
|
||||
@@ -249,6 +249,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
blockQueue,
|
||||
commandType);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
|
||||
commandStreamReceiver.setPatchInfoData(patchInfoData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commandStreamReceiver.setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||
|
||||
slmUsed = multiDispatchInfo.usesSlm();
|
||||
|
||||
@@ -231,7 +231,7 @@ void AubFileStream::createContext(const AubPpgttContextCreate &cmd) {
|
||||
fileHandle.write(reinterpret_cast<const char *>(&cmd), sizeof(cmd));
|
||||
}
|
||||
|
||||
void AubFileStream::addComment(const char *message) {
|
||||
bool AubFileStream::addComment(const char *message) {
|
||||
using CmdServicesMemTraceComment = AubMemDump::CmdServicesMemTraceComment;
|
||||
CmdServicesMemTraceComment cmd;
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
@@ -251,6 +251,7 @@ void AubFileStream::addComment(const char *message) {
|
||||
uint32_t zero = 0;
|
||||
fileHandle.write(reinterpret_cast<char *>(&zero), sizeof(uint32_t) - remainder);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace AubMemDump
|
||||
|
||||
@@ -66,6 +66,10 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
|
||||
return this->memoryManager;
|
||||
}
|
||||
|
||||
bool setPatchInfoData(PatchInfoData &data) override;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoCollection;
|
||||
|
||||
static const AubMemDump::LrcaHelper &getCsTraits(EngineType engineType);
|
||||
|
||||
struct EngineInfo {
|
||||
@@ -79,7 +83,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
|
||||
uint32_t tailRingBuffer;
|
||||
} engineInfoTable[EngineType::NUM_ENGINES];
|
||||
|
||||
AUBCommandStreamReceiver::AubFileStream stream;
|
||||
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> stream;
|
||||
bool standalone;
|
||||
|
||||
TypeSelector<PML4, PDPE, sizeof(void *) == 8>::type ppgtt;
|
||||
@@ -88,5 +92,6 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
|
||||
AddressMapper gttRemap;
|
||||
|
||||
MOCKABLE_VIRTUAL void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer);
|
||||
MOCKABLE_VIRTUAL bool addPatchInfoComments();
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -35,6 +35,7 @@ namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const HardwareInfo &hwInfoIn, bool standalone)
|
||||
: BaseClass(hwInfoIn),
|
||||
stream(std::unique_ptr<AUBCommandStreamReceiver::AubFileStream>(new AUBCommandStreamReceiver::AubFileStream())),
|
||||
standalone(standalone) {
|
||||
this->dispatchMode = CommandStreamReceiver::DispatchMode::BatchedDispatch;
|
||||
if (DebugManager.flags.CsrDispatchMode.get()) {
|
||||
@@ -54,7 +55,7 @@ AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const Hardware
|
||||
|
||||
template <typename GfxFamily>
|
||||
AUBCommandStreamReceiverHw<GfxFamily>::~AUBCommandStreamReceiverHw() {
|
||||
stream.close();
|
||||
stream->close();
|
||||
|
||||
for (auto &engineInfo : engineInfoTable) {
|
||||
alignedFree(engineInfo.pLRCA);
|
||||
@@ -79,7 +80,7 @@ const AubMemDump::LrcaHelper &AUBCommandStreamReceiverHw<GfxFamily>::getCsTraits
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::initGlobalMMIO() {
|
||||
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
|
||||
stream.writeMMIO(mmioPair.first, mmioPair.second);
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,7 +90,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initEngineMMIO(EngineType engineType
|
||||
|
||||
DEBUG_BREAK_IF(!mmioList);
|
||||
for (auto &mmioPair : *mmioList) {
|
||||
stream.writeMMIO(mmioPair.first, mmioPair.second);
|
||||
stream->writeMMIO(mmioPair.first, mmioPair.second);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,11 +114,11 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AUB::reserveAddressGGTT(stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP);
|
||||
stream.writeMMIO(mmioBase + 0x2080, engineInfo.ggttHWSP);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP);
|
||||
stream->writeMMIO(mmioBase + 0x2080, engineInfo.ggttHWSP);
|
||||
}
|
||||
|
||||
// Allocate the LRCA
|
||||
@@ -141,10 +142,10 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AUB::reserveAddressGGTT(stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer);
|
||||
}
|
||||
|
||||
// Initialize the ring MMIO registers
|
||||
@@ -167,12 +168,12 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
AUB::reserveAddressGGTT(stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys);
|
||||
AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys);
|
||||
AUB::addMemoryWrite(
|
||||
stream,
|
||||
*stream,
|
||||
lrcAddressPhys,
|
||||
pLRCABase,
|
||||
sizeLRCA,
|
||||
@@ -189,15 +190,15 @@ CommandStreamReceiver *AUBCommandStreamReceiverHw<GfxFamily>::create(const Hardw
|
||||
auto csr = new AUBCommandStreamReceiverHw<GfxFamily>(hwInfoIn, standalone);
|
||||
|
||||
// Open our file
|
||||
csr->stream.open(fileName.c_str());
|
||||
csr->stream->open(fileName.c_str());
|
||||
|
||||
if (!csr->stream.fileHandle.is_open()) {
|
||||
if (!csr->stream->fileHandle.is_open()) {
|
||||
// This DEBUG_BREAK_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder)
|
||||
// try adding <familycodename>_aub
|
||||
DEBUG_BREAK_IF(true);
|
||||
}
|
||||
// Add the file header.
|
||||
csr->stream.init(AubMemDump::SteppingValues::A, AUB::Traits::device);
|
||||
csr->stream->init(AubMemDump::SteppingValues::A, AUB::Traits::device);
|
||||
|
||||
return csr;
|
||||
}
|
||||
@@ -231,14 +232,14 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << pBatchBuffer;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physBatchBuffer = ppgtt.map(reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer);
|
||||
AUB::reserveAddressPPGTT(stream, reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer, physBatchBuffer);
|
||||
AUB::reserveAddressPPGTT(*stream, reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer, physBatchBuffer);
|
||||
|
||||
AUB::addMemoryWrite(
|
||||
stream,
|
||||
*stream,
|
||||
physBatchBuffer,
|
||||
pBatchBuffer,
|
||||
sizeBatchBuffer,
|
||||
@@ -257,6 +258,9 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
}
|
||||
processResidency(allocationsForResidency);
|
||||
}
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
addPatchInfoComments();
|
||||
}
|
||||
|
||||
// Add a batch buffer start to the ring buffer
|
||||
auto previousTail = engineInfo.tailRingBuffer;
|
||||
@@ -282,7 +286,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
// write remaining ring
|
||||
auto physDumpStart = ggtt.map(ggttTail, sizeToWrap);
|
||||
AUB::addMemoryWrite(
|
||||
stream,
|
||||
*stream,
|
||||
physDumpStart,
|
||||
pTail,
|
||||
sizeToWrap,
|
||||
@@ -327,12 +331,12 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << ggttDumpStart;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physDumpStart = ggtt.map(ggttDumpStart, dumpLength);
|
||||
AUB::addMemoryWrite(
|
||||
stream,
|
||||
*stream,
|
||||
physDumpStart,
|
||||
dumpStart,
|
||||
dumpLength,
|
||||
@@ -343,12 +347,12 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
auto physLRCA = ggtt.map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer));
|
||||
AUB::addMemoryWrite(
|
||||
stream,
|
||||
*stream,
|
||||
physLRCA + 0x101c,
|
||||
&engineInfo.tailRingBuffer,
|
||||
sizeof(engineInfo.tailRingBuffer),
|
||||
@@ -402,13 +406,56 @@ void *AUBCommandStreamReceiverHw<GfxFamily>::flattenBatchBuffer(BatchBuffer &bat
|
||||
return flatBatchBuffer;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
|
||||
std::map<uint64_t, uint64_t> allocationsMap;
|
||||
|
||||
std::ostringstream str;
|
||||
str << "PatchInfoData" << std::endl;
|
||||
for (auto &patchInfoData : this->patchInfoCollection) {
|
||||
str << std::hex << patchInfoData.sourceAllocation << ";";
|
||||
str << std::hex << patchInfoData.sourceAllocationOffset << ";";
|
||||
str << std::hex << patchInfoData.sourceType << ";";
|
||||
str << std::hex << patchInfoData.targetAllocation << ";";
|
||||
str << std::hex << patchInfoData.targetAllocationOffset << ";";
|
||||
str << std::hex << patchInfoData.targetType << ";";
|
||||
str << std::endl;
|
||||
|
||||
if (patchInfoData.sourceAllocation) {
|
||||
allocationsMap.insert(std::pair<uint64_t, uint64_t>(patchInfoData.sourceAllocation,
|
||||
ppgtt.map(static_cast<uintptr_t>(patchInfoData.sourceAllocation), 1)));
|
||||
}
|
||||
|
||||
if (patchInfoData.targetAllocation) {
|
||||
allocationsMap.insert(std::pair<uint64_t, uintptr_t>(patchInfoData.targetAllocation,
|
||||
ppgtt.map(static_cast<uintptr_t>(patchInfoData.targetAllocation), 1)));
|
||||
}
|
||||
}
|
||||
bool result = stream->addComment(str.str().c_str());
|
||||
this->patchInfoCollection.clear();
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ostringstream allocationStr;
|
||||
allocationStr << "AllocationsList" << std::endl;
|
||||
for (auto &element : allocationsMap) {
|
||||
allocationStr << std::hex << element.first << ";" << element.second << std::endl;
|
||||
}
|
||||
result = stream->addComment(allocationStr.str().c_str());
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void AUBCommandStreamReceiverHw<GfxFamily>::submitLRCA(EngineType engineType, const typename AUBCommandStreamReceiverHw<GfxFamily>::MiContextDescriptorReg &contextDescriptor) {
|
||||
auto mmioBase = getCsTraits(engineType).mmioBase;
|
||||
stream.writeMMIO(mmioBase + 0x2230, 0);
|
||||
stream.writeMMIO(mmioBase + 0x2230, 0);
|
||||
stream.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]);
|
||||
stream.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]);
|
||||
stream->writeMMIO(mmioBase + 0x2230, 0);
|
||||
stream->writeMMIO(mmioBase + 0x2230, 0);
|
||||
stream->writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]);
|
||||
stream->writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -417,7 +464,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion(EngineType engineT
|
||||
|
||||
auto mmioBase = getCsTraits(engineType).mmioBase;
|
||||
bool pollNotEqual = false;
|
||||
this->stream.registerPoll(
|
||||
this->stream->registerPoll(
|
||||
mmioBase + 0x2234, //EXECLIST_STATUS
|
||||
0x100,
|
||||
0x100,
|
||||
@@ -447,7 +494,7 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxA
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
|
||||
stream.addComment(str.str().c_str());
|
||||
stream->addComment(str.str().c_str());
|
||||
}
|
||||
|
||||
if (cpuAddress == nullptr) {
|
||||
@@ -457,7 +504,7 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxA
|
||||
}
|
||||
|
||||
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
|
||||
AUB::reserveAddressGGTTAndWriteMmeory(stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset);
|
||||
AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset);
|
||||
};
|
||||
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
|
||||
|
||||
@@ -499,4 +546,10 @@ void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken() {
|
||||
// Some simulator versions don't support adding the context token.
|
||||
// This hook allows specialization for those that do.
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool AUBCommandStreamReceiverHw<GfxFamily>::setPatchInfoData(PatchInfoData &data) {
|
||||
patchInfoCollection.push_back(data);
|
||||
return true;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "runtime/command_stream/submissions_aggregator.h"
|
||||
#include "runtime/helpers/completion_stamp.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/address_patch.h"
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
@@ -122,6 +123,9 @@ class CommandStreamReceiver {
|
||||
|
||||
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
|
||||
|
||||
// Collect patch info data
|
||||
virtual bool setPatchInfoData(PatchInfoData &data) { return false; }
|
||||
|
||||
protected:
|
||||
void setDisableL3Cache(bool val) {
|
||||
disableL3Cache = val;
|
||||
|
||||
@@ -68,6 +68,15 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override;
|
||||
const HardwareInfo &peekHwInfo() const { return hwInfo; }
|
||||
|
||||
void collectStateBaseAddresPatchInfo(
|
||||
uint64_t baseAddress,
|
||||
uint64_t commandOffset,
|
||||
const LinearStream &dsh,
|
||||
const LinearStream &ih,
|
||||
const LinearStream &ioh,
|
||||
const LinearStream &ssh,
|
||||
uint64_t generalStateBase);
|
||||
|
||||
protected:
|
||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags, const LinearStream &ih);
|
||||
void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
|
||||
|
||||
@@ -260,6 +260,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
GSBAFor32BitProgrammed = true;
|
||||
}
|
||||
|
||||
auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed();
|
||||
|
||||
StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
commandStreamCSR,
|
||||
dsh,
|
||||
@@ -268,7 +270,12 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
ssh,
|
||||
newGSHbase,
|
||||
requiredL3Index);
|
||||
|
||||
latestSentStatelessMocsConfig = requiredL3Index;
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
collectStateBaseAddresPatchInfo(commandStream.getGpuBase(), stateBaseAddressCmdOffset, dsh, ih, ioh, ssh, newGSHbase);
|
||||
}
|
||||
}
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel);
|
||||
@@ -638,4 +645,30 @@ template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::updateLastWaitForCompletionTimestamp() {
|
||||
lastWaitForCompletionTimestamp = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::collectStateBaseAddresPatchInfo(
|
||||
uint64_t baseAddress,
|
||||
uint64_t commandOffset,
|
||||
const LinearStream &dsh,
|
||||
const LinearStream &ih,
|
||||
const LinearStream &ioh,
|
||||
const LinearStream &ssh,
|
||||
uint64_t generalStateBase) {
|
||||
|
||||
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
PatchInfoData dynamicStatePatchInfo = {dsh.getGpuBase(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
|
||||
PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
|
||||
PatchInfoData surfaceStatePatchInfo = {ssh.getGpuBase(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
|
||||
PatchInfoData indirectObjectPatchInfo = {ioh.getGpuBase(), 0u, PatchInfoAllocationType::IndirectObjectHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
|
||||
PatchInfoData instructionPatchInfo = {ih.getGpuBase(), 0u, PatchInfoAllocationType::InstructionHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INSTRUCTIONBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
|
||||
|
||||
setPatchInfoData(dynamicStatePatchInfo);
|
||||
setPatchInfoData(generalStatePatchInfo);
|
||||
setPatchInfoData(surfaceStatePatchInfo);
|
||||
setPatchInfoData(indirectObjectPatchInfo);
|
||||
setPatchInfoData(instructionPatchInfo);
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -40,7 +40,7 @@ class TbxStream : public AubMemDump::AubStream {
|
||||
|
||||
public:
|
||||
TbxStream();
|
||||
virtual ~TbxStream();
|
||||
~TbxStream() override;
|
||||
|
||||
TbxStream(const TbxStream &) = delete;
|
||||
TbxStream &operator=(const TbxStream &) = delete;
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
set(RUNTIME_SRCS_HELPERS_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/base_object.cpp
|
||||
|
||||
46
runtime/helpers/address_patch.h
Normal file
46
runtime/helpers/address_patch.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
enum PatchInfoAllocationType {
|
||||
Default = 0,
|
||||
KernelArg,
|
||||
GeneralStateHeap,
|
||||
DynamicStateHeap,
|
||||
IndirectObjectHeap,
|
||||
SurfaceStateHeap,
|
||||
InstructionHeap
|
||||
};
|
||||
|
||||
struct PatchInfoData {
|
||||
uint64_t sourceAllocation;
|
||||
uint64_t sourceAllocationOffset;
|
||||
PatchInfoAllocationType sourceType;
|
||||
uint64_t targetAllocation;
|
||||
uint64_t targetAllocationOffset;
|
||||
PatchInfoAllocationType targetType;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
@@ -74,7 +74,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
|
||||
static size_t sendCrossThreadData(
|
||||
IndirectHeap &indirectHeap,
|
||||
const Kernel &kernel);
|
||||
Kernel &kernel);
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
@@ -100,7 +100,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
size_t ihReservedBlockSize,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
const Kernel &kernel,
|
||||
Kernel &kernel,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/helpers/address_patch.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
@@ -253,7 +254,7 @@ void KernelCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
template <typename GfxFamily>
|
||||
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
IndirectHeap &indirectHeap,
|
||||
const Kernel &kernel) {
|
||||
Kernel &kernel) {
|
||||
typedef typename GfxFamily::GPGPU_WALKER GPGPU_WALKER;
|
||||
|
||||
indirectHeap.align(GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
@@ -263,6 +264,13 @@ size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
|
||||
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
for (auto &patchInfoData : kernel.getPatchInfoDataList()) {
|
||||
patchInfoData.targetAllocation = indirectHeap.getGpuBase();
|
||||
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
|
||||
}
|
||||
}
|
||||
|
||||
return offsetCrossThreadData;
|
||||
}
|
||||
|
||||
@@ -327,7 +335,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
size_t ihReservedBlockSize,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
const Kernel &kernel,
|
||||
Kernel &kernel,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include "runtime/helpers/cache_policy.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/memory_manager/memory_constants.h"
|
||||
#include <cstddef>
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -1093,7 +1093,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size;
|
||||
|
||||
buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn);
|
||||
uint64_t addressToPatch = buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData = {addressToPatch - buffer->getOffset(), buffer->getOffset(), PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(getCrossThreadData()), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset, PatchInfoAllocationType::IndirectObjectHeap};
|
||||
this->patchInfoDataList.push_back(patchInfoData);
|
||||
}
|
||||
|
||||
if (requiresSshForBuffers()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/helpers/address_patch.h"
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/program/kernel_info.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
@@ -369,6 +370,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<PatchInfoData> &getPatchInfoDataList() { return patchInfoDataList; };
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
@@ -476,5 +479,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
bool usingSharedObjArgs;
|
||||
uint32_t patchedArgumentsNum = 0;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -266,12 +266,14 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
|
||||
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
|
||||
// Subbuffers have offset that graphicsAllocation is not aware of
|
||||
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
|
||||
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (this->getCpuAddress() == reinterpret_cast<void *>(addressToPatch)) || (graphicsAllocation->gpuBaseAddress != 0) || (this->getCpuAddress() == nullptr && this->getGraphicsAllocation()->peekSharedHandle())));
|
||||
|
||||
patchWithRequiredSize(memory, patchSize, addressToPatch);
|
||||
|
||||
return addressToPatch;
|
||||
}
|
||||
|
||||
bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
|
||||
@@ -89,7 +89,8 @@ class Buffer : public MemObj {
|
||||
BufferCreatFunc createFunction = nullptr;
|
||||
bool isSubBuffer();
|
||||
bool isValidSubBufferOffset(size_t offset);
|
||||
void setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing = false);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
|
||||
virtual void setArgStateful(void *memory) = 0;
|
||||
bool bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
|
||||
@@ -122,6 +122,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; }
|
||||
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
|
||||
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
|
||||
size_t getOffset() const { return offset; }
|
||||
|
||||
protected:
|
||||
void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
||||
@@ -62,6 +62,7 @@ DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableAUBBufferDump, false, "Avoid dumping buffers in AUB files")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableAUBImageDump, false, "Avoid dumping images in AUB files")
|
||||
DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information")
|
||||
/*FEATURE FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
|
||||
|
||||
Reference in New Issue
Block a user