Add patch info comments to AUB dump

Collect patching information and add as comments to AUB dump.

Change-Id: Ib7c903a2589d68b6e3e614c1774c7cd5a000c29f
This commit is contained in:
Pawel Wilma
2018-03-14 11:07:51 +01:00
committed by sys_ocldev
parent d7fe01454b
commit ff1d2361f3
28 changed files with 853 additions and 47 deletions

View File

@@ -121,6 +121,7 @@ struct AubStream {
virtual void writeGTT(uint32_t offset, uint64_t entry) = 0;
virtual void writeMMIO(uint32_t offset, uint32_t value) = 0;
virtual void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) = 0;
virtual ~AubStream() = default;
};
struct AubFileStream : public AubStream {
@@ -134,8 +135,8 @@ struct AubFileStream : public AubStream {
void writeGTT(uint32_t offset, uint64_t entry) override;
void writeMMIO(uint32_t offset, uint32_t value) override;
void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override;
void expectMemory(uint64_t physAddress, const void *memory, size_t size);
void addComment(const char *message);
MOCKABLE_VIRTUAL void expectMemory(uint64_t physAddress, const void *memory, size_t size);
MOCKABLE_VIRTUAL bool addComment(const char *message);
std::ofstream fileHandle;
};

View File

@@ -249,6 +249,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
blockQueue,
commandType);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &dispatchInfo : multiDispatchInfo) {
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
commandStreamReceiver.setPatchInfoData(patchInfoData);
}
}
}
commandStreamReceiver.setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
slmUsed = multiDispatchInfo.usesSlm();

View File

@@ -231,7 +231,7 @@ void AubFileStream::createContext(const AubPpgttContextCreate &cmd) {
fileHandle.write(reinterpret_cast<const char *>(&cmd), sizeof(cmd));
}
void AubFileStream::addComment(const char *message) {
bool AubFileStream::addComment(const char *message) {
using CmdServicesMemTraceComment = AubMemDump::CmdServicesMemTraceComment;
CmdServicesMemTraceComment cmd;
memset(&cmd, 0, sizeof(cmd));
@@ -251,6 +251,7 @@ void AubFileStream::addComment(const char *message) {
uint32_t zero = 0;
fileHandle.write(reinterpret_cast<char *>(&zero), sizeof(uint32_t) - remainder);
}
return true;
}
} // namespace AubMemDump

View File

@@ -66,6 +66,10 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
return this->memoryManager;
}
bool setPatchInfoData(PatchInfoData &data) override;
std::vector<PatchInfoData> patchInfoCollection;
static const AubMemDump::LrcaHelper &getCsTraits(EngineType engineType);
struct EngineInfo {
@@ -79,7 +83,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
uint32_t tailRingBuffer;
} engineInfoTable[EngineType::NUM_ENGINES];
AUBCommandStreamReceiver::AubFileStream stream;
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> stream;
bool standalone;
TypeSelector<PML4, PDPE, sizeof(void *) == 8>::type ppgtt;
@@ -88,5 +92,6 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
AddressMapper gttRemap;
MOCKABLE_VIRTUAL void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer);
MOCKABLE_VIRTUAL bool addPatchInfoComments();
};
} // namespace OCLRT

View File

@@ -35,6 +35,7 @@ namespace OCLRT {
template <typename GfxFamily>
AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const HardwareInfo &hwInfoIn, bool standalone)
: BaseClass(hwInfoIn),
stream(std::unique_ptr<AUBCommandStreamReceiver::AubFileStream>(new AUBCommandStreamReceiver::AubFileStream())),
standalone(standalone) {
this->dispatchMode = CommandStreamReceiver::DispatchMode::BatchedDispatch;
if (DebugManager.flags.CsrDispatchMode.get()) {
@@ -54,7 +55,7 @@ AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const Hardware
template <typename GfxFamily>
AUBCommandStreamReceiverHw<GfxFamily>::~AUBCommandStreamReceiverHw() {
stream.close();
stream->close();
for (auto &engineInfo : engineInfoTable) {
alignedFree(engineInfo.pLRCA);
@@ -79,7 +80,7 @@ const AubMemDump::LrcaHelper &AUBCommandStreamReceiverHw<GfxFamily>::getCsTraits
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::initGlobalMMIO() {
for (auto &mmioPair : AUBFamilyMapper<GfxFamily>::globalMMIO) {
stream.writeMMIO(mmioPair.first, mmioPair.second);
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
@@ -89,7 +90,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initEngineMMIO(EngineType engineType
DEBUG_BREAK_IF(!mmioList);
for (auto &mmioPair : *mmioList) {
stream.writeMMIO(mmioPair.first, mmioPair.second);
stream->writeMMIO(mmioPair.first, mmioPair.second);
}
}
@@ -113,11 +114,11 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
AUB::reserveAddressGGTT(stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP);
stream.writeMMIO(mmioBase + 0x2080, engineInfo.ggttHWSP);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP);
stream->writeMMIO(mmioBase + 0x2080, engineInfo.ggttHWSP);
}
// Allocate the LRCA
@@ -141,10 +142,10 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
AUB::reserveAddressGGTT(stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer);
}
// Initialize the ring MMIO registers
@@ -167,12 +168,12 @@ void AUBCommandStreamReceiverHw<GfxFamily>::initializeEngine(EngineType engineTy
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
AUB::reserveAddressGGTT(stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys);
AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys);
AUB::addMemoryWrite(
stream,
*stream,
lrcAddressPhys,
pLRCABase,
sizeLRCA,
@@ -189,15 +190,15 @@ CommandStreamReceiver *AUBCommandStreamReceiverHw<GfxFamily>::create(const Hardw
auto csr = new AUBCommandStreamReceiverHw<GfxFamily>(hwInfoIn, standalone);
// Open our file
csr->stream.open(fileName.c_str());
csr->stream->open(fileName.c_str());
if (!csr->stream.fileHandle.is_open()) {
if (!csr->stream->fileHandle.is_open()) {
// This DEBUG_BREAK_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder)
// try adding <familycodename>_aub
DEBUG_BREAK_IF(true);
}
// Add the file header.
csr->stream.init(AubMemDump::SteppingValues::A, AUB::Traits::device);
csr->stream->init(AubMemDump::SteppingValues::A, AUB::Traits::device);
return csr;
}
@@ -231,14 +232,14 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << pBatchBuffer;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
auto physBatchBuffer = ppgtt.map(reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer);
AUB::reserveAddressPPGTT(stream, reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer, physBatchBuffer);
AUB::reserveAddressPPGTT(*stream, reinterpret_cast<uintptr_t>(pBatchBuffer), sizeBatchBuffer, physBatchBuffer);
AUB::addMemoryWrite(
stream,
*stream,
physBatchBuffer,
pBatchBuffer,
sizeBatchBuffer,
@@ -257,6 +258,9 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
}
processResidency(allocationsForResidency);
}
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
addPatchInfoComments();
}
// Add a batch buffer start to the ring buffer
auto previousTail = engineInfo.tailRingBuffer;
@@ -282,7 +286,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
// write remaining ring
auto physDumpStart = ggtt.map(ggttTail, sizeToWrap);
AUB::addMemoryWrite(
stream,
*stream,
physDumpStart,
pTail,
sizeToWrap,
@@ -327,12 +331,12 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << ggttDumpStart;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
auto physDumpStart = ggtt.map(ggttDumpStart, dumpLength);
AUB::addMemoryWrite(
stream,
*stream,
physDumpStart,
dumpStart,
dumpLength,
@@ -343,12 +347,12 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
{
std::ostringstream str;
str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
auto physLRCA = ggtt.map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer));
AUB::addMemoryWrite(
stream,
*stream,
physLRCA + 0x101c,
&engineInfo.tailRingBuffer,
sizeof(engineInfo.tailRingBuffer),
@@ -402,13 +406,56 @@ void *AUBCommandStreamReceiverHw<GfxFamily>::flattenBatchBuffer(BatchBuffer &bat
return flatBatchBuffer;
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
std::map<uint64_t, uint64_t> allocationsMap;
std::ostringstream str;
str << "PatchInfoData" << std::endl;
for (auto &patchInfoData : this->patchInfoCollection) {
str << std::hex << patchInfoData.sourceAllocation << ";";
str << std::hex << patchInfoData.sourceAllocationOffset << ";";
str << std::hex << patchInfoData.sourceType << ";";
str << std::hex << patchInfoData.targetAllocation << ";";
str << std::hex << patchInfoData.targetAllocationOffset << ";";
str << std::hex << patchInfoData.targetType << ";";
str << std::endl;
if (patchInfoData.sourceAllocation) {
allocationsMap.insert(std::pair<uint64_t, uint64_t>(patchInfoData.sourceAllocation,
ppgtt.map(static_cast<uintptr_t>(patchInfoData.sourceAllocation), 1)));
}
if (patchInfoData.targetAllocation) {
allocationsMap.insert(std::pair<uint64_t, uintptr_t>(patchInfoData.targetAllocation,
ppgtt.map(static_cast<uintptr_t>(patchInfoData.targetAllocation), 1)));
}
}
bool result = stream->addComment(str.str().c_str());
this->patchInfoCollection.clear();
if (!result) {
return false;
}
std::ostringstream allocationStr;
allocationStr << "AllocationsList" << std::endl;
for (auto &element : allocationsMap) {
allocationStr << std::hex << element.first << ";" << element.second << std::endl;
}
result = stream->addComment(allocationStr.str().c_str());
if (!result) {
return false;
}
return true;
}
template <typename GfxFamily>
void AUBCommandStreamReceiverHw<GfxFamily>::submitLRCA(EngineType engineType, const typename AUBCommandStreamReceiverHw<GfxFamily>::MiContextDescriptorReg &contextDescriptor) {
auto mmioBase = getCsTraits(engineType).mmioBase;
stream.writeMMIO(mmioBase + 0x2230, 0);
stream.writeMMIO(mmioBase + 0x2230, 0);
stream.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]);
stream.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]);
stream->writeMMIO(mmioBase + 0x2230, 0);
stream->writeMMIO(mmioBase + 0x2230, 0);
stream->writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]);
stream->writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]);
}
template <typename GfxFamily>
@@ -417,7 +464,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion(EngineType engineT
auto mmioBase = getCsTraits(engineType).mmioBase;
bool pollNotEqual = false;
this->stream.registerPoll(
this->stream->registerPoll(
mmioBase + 0x2234, //EXECLIST_STATUS
0x100,
0x100,
@@ -447,7 +494,7 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxA
{
std::ostringstream str;
str << "ppgtt: " << std::hex << std::showbase << gpuAddress;
stream.addComment(str.str().c_str());
stream->addComment(str.str().c_str());
}
if (cpuAddress == nullptr) {
@@ -457,7 +504,7 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxA
}
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset) {
AUB::reserveAddressGGTTAndWriteMmeory(stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset);
AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast<uintptr_t>(gpuAddress), cpuAddress, physAddress, size, offset);
};
ppgtt.pageWalk(static_cast<uintptr_t>(gpuAddress), size, 0, walker);
@@ -499,4 +546,10 @@ void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken() {
// Some simulator versions don't support adding the context token.
// This hook allows specialization for those that do.
}
template <typename GfxFamily>
bool AUBCommandStreamReceiverHw<GfxFamily>::setPatchInfoData(PatchInfoData &data) {
patchInfoCollection.push_back(data);
return true;
}
} // namespace OCLRT

View File

@@ -26,6 +26,7 @@
#include "runtime/command_stream/submissions_aggregator.h"
#include "runtime/helpers/completion_stamp.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/address_patch.h"
#include "runtime/command_stream/csr_definitions.h"
#include <cstddef>
#include <cstdint>
@@ -122,6 +123,9 @@ class CommandStreamReceiver {
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
// Collect patch info data
virtual bool setPatchInfoData(PatchInfoData &data) { return false; }
protected:
void setDisableL3Cache(bool val) {
disableL3Cache = val;

View File

@@ -68,6 +68,15 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override;
const HardwareInfo &peekHwInfo() const { return hwInfo; }
void collectStateBaseAddresPatchInfo(
uint64_t baseAddress,
uint64_t commandOffset,
const LinearStream &dsh,
const LinearStream &ih,
const LinearStream &ioh,
const LinearStream &ssh,
uint64_t generalStateBase);
protected:
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags, const LinearStream &ih);
void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);

View File

@@ -260,6 +260,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
GSBAFor32BitProgrammed = true;
}
auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed();
StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
commandStreamCSR,
dsh,
@@ -268,7 +270,12 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
ssh,
newGSHbase,
requiredL3Index);
latestSentStatelessMocsConfig = requiredL3Index;
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
collectStateBaseAddresPatchInfo(commandStream.getGpuBase(), stateBaseAddressCmdOffset, dsh, ih, ioh, ssh, newGSHbase);
}
}
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel);
@@ -638,4 +645,30 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::updateLastWaitForCompletionTimestamp() {
lastWaitForCompletionTimestamp = std::chrono::high_resolution_clock::now();
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::collectStateBaseAddresPatchInfo(
uint64_t baseAddress,
uint64_t commandOffset,
const LinearStream &dsh,
const LinearStream &ih,
const LinearStream &ioh,
const LinearStream &ssh,
uint64_t generalStateBase) {
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
PatchInfoData dynamicStatePatchInfo = {dsh.getGpuBase(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
PatchInfoData surfaceStatePatchInfo = {ssh.getGpuBase(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
PatchInfoData indirectObjectPatchInfo = {ioh.getGpuBase(), 0u, PatchInfoAllocationType::IndirectObjectHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
PatchInfoData instructionPatchInfo = {ih.getGpuBase(), 0u, PatchInfoAllocationType::InstructionHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INSTRUCTIONBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
setPatchInfoData(dynamicStatePatchInfo);
setPatchInfoData(generalStatePatchInfo);
setPatchInfoData(surfaceStatePatchInfo);
setPatchInfoData(indirectObjectPatchInfo);
setPatchInfoData(instructionPatchInfo);
}
} // namespace OCLRT

View File

@@ -40,7 +40,7 @@ class TbxStream : public AubMemDump::AubStream {
public:
TbxStream();
virtual ~TbxStream();
~TbxStream() override;
TbxStream(const TbxStream &) = delete;
TbxStream &operator=(const TbxStream &) = delete;

View File

@@ -21,6 +21,7 @@
set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
${CMAKE_CURRENT_SOURCE_DIR}/base_object.cpp

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <stdint.h>
namespace OCLRT {
enum PatchInfoAllocationType {
Default = 0,
KernelArg,
GeneralStateHeap,
DynamicStateHeap,
IndirectObjectHeap,
SurfaceStateHeap,
InstructionHeap
};
struct PatchInfoData {
uint64_t sourceAllocation;
uint64_t sourceAllocationOffset;
PatchInfoAllocationType sourceType;
uint64_t targetAllocation;
uint64_t targetAllocationOffset;
PatchInfoAllocationType targetType;
};
} // namespace OCLRT

View File

@@ -74,7 +74,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
static size_t sendCrossThreadData(
IndirectHeap &indirectHeap,
const Kernel &kernel);
Kernel &kernel);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
const void *srcKernelSsh, size_t srcKernelSshSize,
@@ -100,7 +100,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,
Kernel &kernel,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,

View File

@@ -26,6 +26,7 @@
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/address_patch.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/string.h"
#include "runtime/indirect_heap/indirect_heap.h"
@@ -253,7 +254,7 @@ void KernelCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
IndirectHeap &indirectHeap,
const Kernel &kernel) {
Kernel &kernel) {
typedef typename GfxFamily::GPGPU_WALKER GPGPU_WALKER;
indirectHeap.align(GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
@@ -263,6 +264,13 @@ size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &patchInfoData : kernel.getPatchInfoDataList()) {
patchInfoData.targetAllocation = indirectHeap.getGpuBase();
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
}
}
return offsetCrossThreadData;
}
@@ -327,7 +335,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,
Kernel &kernel,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,

View File

@@ -25,7 +25,6 @@
#include "runtime/helpers/cache_policy.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/memory_manager/memory_constants.h"
#include <cstddef>
namespace OCLRT {
template <typename GfxFamily>

View File

@@ -1093,7 +1093,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size;
buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn);
uint64_t addressToPatch = buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
PatchInfoData patchInfoData = {addressToPatch - buffer->getOffset(), buffer->getOffset(), PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(getCrossThreadData()), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset, PatchInfoAllocationType::IndirectObjectHeap};
this->patchInfoDataList.push_back(patchInfoData);
}
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);

View File

@@ -26,6 +26,7 @@
#include "runtime/device_queue/device_queue.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/address_patch.h"
#include "runtime/program/program.h"
#include "runtime/program/kernel_info.h"
#include "runtime/os_interface/debug_settings_manager.h"
@@ -369,6 +370,8 @@ class Kernel : public BaseObject<_cl_kernel> {
return 0;
}
std::vector<PatchInfoData> &getPatchInfoDataList() { return patchInfoDataList; };
protected:
struct ObjectCounts {
uint32_t imageCount;
@@ -476,5 +479,7 @@ class Kernel : public BaseObject<_cl_kernel> {
bool usingSharedObjArgs;
uint32_t patchedArgumentsNum = 0;
std::vector<PatchInfoData> patchInfoDataList;
};
} // namespace OCLRT

View File

@@ -266,12 +266,14 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
return buffer;
}
void Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
// Subbuffers have offset that graphicsAllocation is not aware of
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (this->getCpuAddress() == reinterpret_cast<void *>(addressToPatch)) || (graphicsAllocation->gpuBaseAddress != 0) || (this->getCpuAddress() == nullptr && this->getGraphicsAllocation()->peekSharedHandle())));
patchWithRequiredSize(memory, patchSize, addressToPatch);
return addressToPatch;
}
bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin,

View File

@@ -89,7 +89,8 @@ class Buffer : public MemObj {
BufferCreatFunc createFunction = nullptr;
bool isSubBuffer();
bool isValidSubBufferOffset(size_t offset);
void setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing = false);
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
virtual void setArgStateful(void *memory) = 0;
bool bufferRectPitchSet(const size_t *bufferOrigin,
const size_t *region,

View File

@@ -122,6 +122,7 @@ class MemObj : public BaseObject<_cl_mem> {
virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; }
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
size_t getOffset() const { return offset; }
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);

View File

@@ -62,6 +62,7 @@ DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "
DECLARE_DEBUG_VARIABLE(bool, DisableAUBBufferDump, false, "Avoid dumping buffers in AUB files")
DECLARE_DEBUG_VARIABLE(bool, DisableAUBImageDump, false, "Avoid dumping images in AUB files")
DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer")
DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information")
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")

View File

@@ -52,7 +52,7 @@ class AUBCommandStreamFixture : public CommandStreamFixture {
// Write our pseudo-op to the AUB file
auto aubCsr = reinterpret_cast<AUBCommandStreamReceiverHw<FamilyType> *>(pCommandStreamReceiver);
aubCsr->stream.fileHandle.write(reinterpret_cast<char *>(&header), sizeof(header));
aubCsr->stream->fileHandle.write(reinterpret_cast<char *>(&header), sizeof(header));
}
template <typename FamilyType>
@@ -62,7 +62,7 @@ class AUBCommandStreamFixture : public CommandStreamFixture {
if (offset > length)
abort();
aubCsr->stream.expectMemory(physAddress,
aubCsr->stream->expectMemory(physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(srcAddress) + offset),
size);
};

View File

@@ -28,6 +28,7 @@
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_mdi.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "test.h"
@@ -244,3 +245,40 @@ HWTEST_F(EnqueueHandlerTest, enqueueWithOutputEventRegistersEvent) {
event->release();
mockCmdQ->release();
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenPatchInfoDataIsNotTransferredToCSR) {
int32_t tag;
auto csr = new MockCsrBase<FamilyType>(tag);
pDevice->resetCommandStreamReceiver(csr);
MockKernelWithInternals mockKernel(*pDevice);
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pDevice, 0));
size_t gws[] = {1, 1, 1};
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData);
EXPECT_CALL(*csr, setPatchInfoData(::testing::_)).Times(0);
mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsSetThenPatchInfoDataIsTransferredToCSR) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
int32_t tag;
auto csr = new MockCsrBase<FamilyType>(tag);
pDevice->resetCommandStreamReceiver(csr);
MockKernelWithInternals mockKernel(*pDevice);
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pDevice, 0));
size_t gws[] = {1, 1, 1};
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData);
EXPECT_CALL(*csr, setPatchInfoData(::testing::_)).Times(6);
mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
}

View File

@@ -31,6 +31,10 @@
using namespace OCLRT;
using ::testing::Invoke;
using ::testing::_;
using ::testing::Return;
typedef Test<DeviceFixture> AubCommandStreamReceiverTests;
template <typename GfxFamily>
@@ -50,6 +54,11 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw<GfxFamily> {
}
MOCK_METHOD2(flattenBatchBuffer, void *(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer));
MOCK_METHOD0(addPatchInfoComments, bool(void));
};
struct MockAubFileStream : public AUBCommandStreamReceiver::AubFileStream {
MOCK_METHOD1(addComment, bool(const char *message));
};
TEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedWithWrongGfxCoreFamilyThenNullPointerShouldBeReturned) {
@@ -528,6 +537,332 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenDispatc
memoryManager->freeGraphicsMemory(chainedBatchBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoCommentsIsCalled) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<MockAubCsr<FamilyType>> aubCsr(new MockAubCsr<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
ResidencyContainer allocationsForResidency;
aubCsr->setTagAllocation(pDevice->getTagAllocation());
EXPECT_CALL(*aubCsr, addPatchInfoComments()).Times(1);
aubCsr->flush(batchBuffer, engineType, &allocationsForResidency);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoCommentsIsNotCalled) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<MockAubCsr<FamilyType>> aubCsr(new MockAubCsr<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
ResidencyContainer allocationsForResidency;
aubCsr->setTagAllocation(pDevice->getTagAllocation());
EXPECT_CALL(*aubCsr, addPatchInfoComments()).Times(0);
aubCsr->flush(batchBuffer, engineType, &allocationsForResidency);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenNoPatchInfoDataObjectsThenCommentsAreEmpty) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
std::vector<std::string> comments;
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool {
comments.push_back(std::string(str));
return true;
}));
bool result = aubCsr->addPatchInfoComments();
EXPECT_TRUE(result);
ASSERT_EQ(2u, comments.size());
EXPECT_EQ("PatchInfoData\n", comments[0]);
EXPECT_EQ("AllocationsList\n", comments[1]);
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenFirstAddCommentsFailsThenFunctionReturnsFalse) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(1).WillOnce(Return(false));
bool result = aubCsr->addPatchInfoComments();
EXPECT_FALSE(result);
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenSecondAddCommentsFailsThenFunctionReturnsFalse) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(2).WillOnce(Return(true)).WillOnce(Return(false));
bool result = aubCsr->addPatchInfoComments();
EXPECT_FALSE(result);
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenPatchInfoDataObjectsAddedThenCommentsAreNotEmpty) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
PatchInfoData patchInfoData[2] = {{0xAAAAAAAA, 128u, PatchInfoAllocationType::Default, 0xBBBBBBBB, 256u, PatchInfoAllocationType::Default},
{0xBBBBBBBB, 128u, PatchInfoAllocationType::Default, 0xDDDDDDDD, 256u, PatchInfoAllocationType::Default}};
EXPECT_TRUE(aubCsr->setPatchInfoData(patchInfoData[0]));
EXPECT_TRUE(aubCsr->setPatchInfoData(patchInfoData[1]));
std::vector<std::string> comments;
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool {
comments.push_back(std::string(str));
return true;
}));
bool result = aubCsr->addPatchInfoComments();
EXPECT_TRUE(result);
ASSERT_EQ(2u, comments.size());
EXPECT_EQ("PatchInfoData", comments[0].substr(0, 13));
EXPECT_EQ("AllocationsList", comments[1].substr(0, 15));
std::string line;
std::istringstream input1;
input1.str(comments[0]);
uint32_t lineNo = 0;
while (std::getline(input1, line)) {
if (line.substr(0, 13) == "PatchInfoData") {
continue;
}
std::ostringstream ss;
ss << std::hex << patchInfoData[lineNo].sourceAllocation << ";" << patchInfoData[lineNo].sourceAllocationOffset << ";" << patchInfoData[lineNo].sourceType << ";";
ss << patchInfoData[lineNo].targetAllocation << ";" << patchInfoData[lineNo].targetAllocationOffset << ";" << patchInfoData[lineNo].targetType << ";";
EXPECT_EQ(ss.str(), line);
lineNo++;
}
std::vector<std::string> expectedAddresses = {"aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd"};
lineNo = 0;
std::istringstream input2;
input2.str(comments[1]);
while (std::getline(input2, line)) {
if (line.substr(0, 15) == "AllocationsList") {
continue;
}
bool foundAddr = false;
for (auto &addr : expectedAddresses) {
if (line.substr(0, 8) == addr) {
foundAddr = true;
break;
}
}
EXPECT_TRUE(foundAddr);
EXPECT_TRUE(line.size() > 9);
lineNo++;
}
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenSourceAllocationIsNullThenDoNotAddToAllocationsList) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
PatchInfoData patchInfoData = {0x0, 0u, PatchInfoAllocationType::Default, 0xBBBBBBBB, 0u, PatchInfoAllocationType::Default};
EXPECT_TRUE(aubCsr->setPatchInfoData(patchInfoData));
std::vector<std::string> comments;
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool {
comments.push_back(std::string(str));
return true;
}));
bool result = aubCsr->addPatchInfoComments();
EXPECT_TRUE(result);
ASSERT_EQ(2u, comments.size());
ASSERT_EQ("PatchInfoData", comments[0].substr(0, 13));
ASSERT_EQ("AllocationsList", comments[1].substr(0, 15));
std::string line;
std::istringstream input;
input.str(comments[1]);
uint32_t lineNo = 0;
std::vector<std::string> expectedAddresses = {"bbbbbbbb"};
while (std::getline(input, line)) {
if (line.substr(0, 15) == "AllocationsList") {
continue;
}
bool foundAddr = false;
for (auto &addr : expectedAddresses) {
if (line.substr(0, 8) == addr) {
foundAddr = true;
break;
}
}
EXPECT_TRUE(foundAddr);
EXPECT_TRUE(line.size() > 9);
lineNo++;
}
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
HWTEST_F(AubCommandStreamReceiverTests, givenAddPatchInfoCommentsCalledWhenTargetAllocationIsNullThenDoNotAddToAllocationsList) {
std::unique_ptr<MemoryManager> memoryManager(nullptr);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>(*platformDevices[0], true));
memoryManager.reset(aubCsr->createMemoryManager(false));
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
std::unique_ptr<AUBCommandStreamReceiver::AubFileStream> mockAubFileStream(new MockAubFileStream());
MockAubFileStream *mockAubFileStreamPtr = static_cast<MockAubFileStream *>(mockAubFileStream.get());
ASSERT_NE(nullptr, mockAubFileStreamPtr);
mockAubFileStream.swap(aubCsr->stream);
PatchInfoData patchInfoData = {0xAAAAAAAA, 0u, PatchInfoAllocationType::Default, 0x0, 0u, PatchInfoAllocationType::Default};
EXPECT_TRUE(aubCsr->setPatchInfoData(patchInfoData));
std::vector<std::string> comments;
EXPECT_CALL(*mockAubFileStreamPtr, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool {
comments.push_back(std::string(str));
return true;
}));
bool result = aubCsr->addPatchInfoComments();
EXPECT_TRUE(result);
ASSERT_EQ(2u, comments.size());
ASSERT_EQ("PatchInfoData", comments[0].substr(0, 13));
ASSERT_EQ("AllocationsList", comments[1].substr(0, 15));
std::string line;
std::istringstream input;
input.str(comments[1]);
uint32_t lineNo = 0;
std::vector<std::string> expectedAddresses = {"aaaaaaaa"};
while (std::getline(input, line)) {
if (line.substr(0, 15) == "AllocationsList") {
continue;
}
bool foundAddr = false;
for (auto &addr : expectedAddresses) {
if (line.substr(0, 8) == addr) {
foundAddr = true;
break;
}
}
EXPECT_TRUE(foundAddr);
EXPECT_TRUE(line.size() > 9);
lineNo++;
}
mockAubFileStream.swap(aubCsr->stream);
memoryManager->freeGraphicsMemory(commandBuffer);
}
class OsAgnosticMemoryManagerForImagesWithNoHostPtr : public OsAgnosticMemoryManager {
public:
GraphicsAllocation *allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) override {

View File

@@ -57,6 +57,9 @@
using namespace OCLRT;
using ::testing::Invoke;
using ::testing::_;
struct UltCommandStreamReceiverTest
: public DeviceFixture,
public BuiltInFixture,
@@ -3425,3 +3428,141 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetT
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataIsNotCollected) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
int32_t tag;
auto mockCsr = new MockCsrBase<FamilyType>(tag);
pDevice->resetCommandStreamReceiver(mockCsr);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::MEDIUM;
EXPECT_CALL(*mockCsr, setPatchInfoData(_)).Times(0);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataIsCollected) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
int32_t tag;
auto mockCsr = new MockCsrBase<FamilyType>(tag);
pDevice->resetCommandStreamReceiver(mockCsr);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::MEDIUM;
std::vector<PatchInfoData> patchInfoDataVector;
EXPECT_CALL(*mockCsr, setPatchInfoData(_)).Times(5).WillRepeatedly(Invoke([&](PatchInfoData &data) {
patchInfoDataVector.push_back(data);
return true;
}));
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(5u, patchInfoDataVector.size());
for (auto &patchInfoData : patchInfoDataVector) {
uint64_t expectedAddress = 0u;
switch (patchInfoData.sourceType) {
case PatchInfoAllocationType::DynamicStateHeap:
expectedAddress = dsh.getGpuBase();
break;
case PatchInfoAllocationType::SurfaceStateHeap:
expectedAddress = ssh.getGpuBase();
break;
case PatchInfoAllocationType::IndirectObjectHeap:
expectedAddress = ioh.getGpuBase();
break;
case PatchInfoAllocationType::InstructionHeap:
expectedAddress = ih.getGpuBase();
break;
default:
expectedAddress = 0u;
}
EXPECT_EQ(expectedAddress, patchInfoData.sourceAllocation);
EXPECT_EQ(0u, patchInfoData.sourceAllocationOffset);
EXPECT_EQ(commandStream.getGpuBase(), patchInfoData.targetAllocation);
EXPECT_EQ(PatchInfoAllocationType::Default, patchInfoData.targetType);
EXPECT_NE(commandStream.getGpuBase(), patchInfoData.targetAllocationOffset);
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonAubCsrWhenSetPatchInfoDataIsCalledThenNoDataIsCollected) {
std::unique_ptr<MockCsrHw2<FamilyType>> mockCsr(new MockCsrHw2<FamilyType>(*platformDevices[0]));
PatchInfoData patchInfoData = {0u, 0u, PatchInfoAllocationType::Default, 0u, 0u, PatchInfoAllocationType::Default};
EXPECT_FALSE(mockCsr->setPatchInfoData(patchInfoData));
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMockCsrWhenCollectStateBaseAddresPatchInfoIsCalledThenAppropriateAddressesAreTaken) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
int32_t tag;
std::unique_ptr<MockCsrBase<FamilyType>> mockCsr(new MockCsrBase<FamilyType>(tag));
std::vector<PatchInfoData> patchInfoDataVector;
EXPECT_CALL(*mockCsr, setPatchInfoData(_)).Times(5).WillRepeatedly(Invoke([&](PatchInfoData &data) {
patchInfoDataVector.push_back(data);
return true;
}));
uint64_t baseAddress = 0xabcdef;
uint64_t commandOffset = 0xa;
uint64_t generalStateBase = 0xff;
mockCsr->collectStateBaseAddresPatchInfo(baseAddress, commandOffset, dsh, ih, ioh, ssh, generalStateBase);
ASSERT_EQ(patchInfoDataVector.size(), 5u);
PatchInfoData dshPatch = patchInfoDataVector[0];
PatchInfoData gshPatch = patchInfoDataVector[1];
PatchInfoData sshPatch = patchInfoDataVector[2];
PatchInfoData iohPatch = patchInfoDataVector[3];
PatchInfoData ihPatch = patchInfoDataVector[4];
for (auto &patch : patchInfoDataVector) {
EXPECT_EQ(patch.targetAllocation, baseAddress);
EXPECT_EQ(patch.sourceAllocationOffset, 0u);
}
//DSH
EXPECT_EQ(dshPatch.sourceAllocation, dsh.getGpuBase());
EXPECT_EQ(dshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET);
//IH
EXPECT_EQ(ihPatch.sourceAllocation, ih.getGpuBase());
EXPECT_EQ(ihPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INSTRUCTIONBASEADDRESS_BYTEOFFSET);
//IOH
EXPECT_EQ(iohPatch.sourceAllocation, ioh.getGpuBase());
EXPECT_EQ(iohPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET);
//SSH
EXPECT_EQ(sshPatch.sourceAllocation, ssh.getGpuBase());
EXPECT_EQ(sshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET);
//GSH
EXPECT_EQ(gshPatch.sourceAllocation, generalStateBase);
EXPECT_EQ(gshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET);
}

View File

@@ -30,6 +30,7 @@
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/image_fixture.h"
#include "unit_tests/fixtures/execution_model_kernel_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/indirect_heap/indirect_heap_fixture.h"
#include "unit_tests/fixtures/built_in_fixture.h"
#include "unit_tests/mocks/mock_kernel.h"
@@ -211,6 +212,67 @@ HWTEST_F(KernelCommandsTest, sendCrossThreadDataResourceUsage) {
EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore);
}
HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataOffsetsAreNotMoved) {
CommandQueueHw<FamilyType> cmdQ(pContext, pDevice, 0);
MockContext context;
MockProgram program(&context, false);
std::unique_ptr<KernelInfo> kernelInfo(KernelInfo::create());
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *kernelInfo, *pDevice));
auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
kernel->getPatchInfoDataList().push_back(patchInfoData);
KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap,
*kernel);
ASSERT_EQ(1u, kernel->getPatchInfoDataList().size());
EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation);
EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset);
EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType);
EXPECT_EQ(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation);
EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset);
EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType);
}
HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataOffsetsAreMoved) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
CommandQueueHw<FamilyType> cmdQ(pContext, pDevice, 0);
MockContext context;
MockProgram program(&context, false);
std::unique_ptr<KernelInfo> kernelInfo(KernelInfo::create());
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *kernelInfo, *pDevice));
auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
indirectHeap.getSpace(128u);
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
kernel->getPatchInfoDataList().push_back(patchInfoData);
auto offsetCrossThreadData = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap,
*kernel);
ASSERT_NE(0u, offsetCrossThreadData);
EXPECT_EQ(128u, offsetCrossThreadData);
ASSERT_EQ(1u, kernel->getPatchInfoDataList().size());
EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation);
EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset);
EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType);
EXPECT_NE(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation);
EXPECT_EQ(indirectHeap.getGpuBase(), kernel->getPatchInfoDataList()[0].targetAllocation);
EXPECT_NE(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset);
EXPECT_EQ(offsetCrossThreadData, kernel->getPatchInfoDataList()[0].targetAllocationOffset);
EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType);
}
HWTEST_F(KernelCommandsTest, sendIndirectStateResourceUsage) {
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;

View File

@@ -29,6 +29,7 @@
#include "unit_tests/fixtures/buffer_fixture.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_program.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "gtest/gtest.h"
using namespace OCLRT;
@@ -245,3 +246,35 @@ TEST_F(BufferSetArgTest, getKernelArgShouldReturnBuffer) {
EXPECT_EQ(memObj, pKernel->getKernelArg(0));
}
TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
cl_mem memObj = buffer;
retVal = pKernel->setArg(
0,
sizeof(memObj),
&memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_EQ(1u, pKernel->getPatchInfoDataList().size());
EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType);
EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType);
EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation);
EXPECT_EQ(reinterpret_cast<uint64_t>(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation);
EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset);
}
TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected) {
cl_mem memObj = buffer;
retVal = pKernel->setArg(
0,
sizeof(memObj),
&memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, pKernel->getPatchInfoDataList().size());
}

View File

@@ -28,8 +28,14 @@
#include "runtime/helpers/flush_stamp.h"
#include "runtime/helpers/string.h"
#include "unit_tests/libult/ult_command_stream_receiver.h"
#include "gmock/gmock.h"
#include <vector>
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
#endif
using namespace OCLRT;
template <typename GfxFamily>
@@ -83,6 +89,8 @@ class MockCsrBase : public UltCommandStreamReceiver<GfxFamily> {
processEvictionCalled = true;
}
MOCK_METHOD1(setPatchInfoData, bool(PatchInfoData &));
ResidencyContainer madeResidentGfxAllocations;
ResidencyContainer madeNonResidentGfxAllocations;
int32_t *executionStamp;
@@ -141,7 +149,8 @@ class MockCsr : public MockCsrBase<GfxFamily> {
};
template <typename GfxFamily>
struct MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
public:
using CommandStreamReceiverHw<GfxFamily>::flushStamp;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
@@ -250,3 +259,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
instructionHeapReserveredData.data(), instructionHeapReserveredData.size());
}
};
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

View File

@@ -62,3 +62,4 @@ OverrideThreadArbitrationPolicy = -1
PrintDriverDiagnostics = -1
FlattenBatchBufferForAUBDump = false
PrintDispatchParameters = false
AddPatchInfoCommentsForAUBDump = false