Extend batch buffer flattening in AubCSR to BatchedDispatch mode

- batch buffer flatening in batched mode
    - added MI_USER_INTERRUPT command
    - added GUC Work Queue Item

Change-Id: I35142da34b30d3006bb4ffc1521db7f6ebe68ebc
This commit is contained in:
Pawel Wilma
2018-04-04 11:34:46 +02:00
committed by sys_ocldev
parent 31157573ca
commit a0c044e6d2
41 changed files with 1188 additions and 247 deletions

View File

@@ -44,6 +44,10 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.h
${CMAKE_CURRENT_SOURCE_DIR}/get_info.h

View File

@@ -32,7 +32,10 @@ enum PatchInfoAllocationType {
DynamicStateHeap,
IndirectObjectHeap,
SurfaceStateHeap,
InstructionHeap
InstructionHeap,
TagAddress,
TagValue,
GUCStartMessage,
};
struct PatchInfoData {
@@ -42,5 +45,50 @@ struct PatchInfoData {
uint64_t targetAllocation;
uint64_t targetAllocationOffset;
PatchInfoAllocationType targetType;
uint32_t patchAddressSize;
PatchInfoData(uint64_t sourceAllocation,
uint64_t sourceAllocationOffset,
PatchInfoAllocationType sourceType,
uint64_t targetAllocation,
uint64_t targetAllocationOffset,
PatchInfoAllocationType targetType,
uint32_t patchAddressSize)
: sourceAllocation(sourceAllocation),
sourceAllocationOffset(sourceAllocationOffset),
sourceType(sourceType),
targetAllocation(targetAllocation),
targetAllocationOffset(targetAllocationOffset),
targetType(targetType),
patchAddressSize(patchAddressSize) {
}
PatchInfoData(uint64_t sourceAllocation,
uint64_t sourceAllocationOffset,
PatchInfoAllocationType sourceType,
uint64_t targetAllocation,
uint64_t targetAllocationOffset,
PatchInfoAllocationType targetType)
: sourceAllocation(sourceAllocation),
sourceAllocationOffset(sourceAllocationOffset),
sourceType(sourceType),
targetAllocation(targetAllocation),
targetAllocationOffset(targetAllocationOffset),
targetType(targetType),
patchAddressSize(sizeof(void *)) {
}
bool requiresIndirectPatching() {
return (targetType != PatchInfoAllocationType::Default && targetType != PatchInfoAllocationType::GUCStartMessage);
}
};
struct CommandChunk {
uint64_t baseAddressCpu = 0;
uint64_t baseAddressGpu = 0;
uint64_t startOffset = 0;
uint64_t endOffset = 0;
uint64_t batchBufferStartLocation = 0;
uint64_t batchBufferStartAddress = 0;
};
} // namespace OCLRT

View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/flat_batch_buffer_helper.h"
namespace OCLRT {
bool FlatBatchBufferHelper::setPatchInfoData(const PatchInfoData &data) {
patchInfoCollection.push_back(data);
return true;
}
bool FlatBatchBufferHelper::removePatchInfoData(uint64_t targetLocation) {
for (auto it = patchInfoCollection.begin(); it != patchInfoCollection.end(); ++it) {
if (it->targetAllocation + it->targetAllocationOffset == targetLocation) {
patchInfoCollection.erase(it);
break;
}
}
return true;
}
bool FlatBatchBufferHelper::registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset) {
CommandChunk commandChunk;
commandChunk.baseAddressGpu = baseGpu;
commandChunk.baseAddressCpu = baseCpu;
commandChunk.startOffset = startOffset;
commandChunk.endOffset = endOffset;
return registerCommandChunk(commandChunk);
}
bool FlatBatchBufferHelper::registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize) {
CommandChunk commandChunk;
commandChunk.baseAddressGpu = batchBuffer.stream->getGraphicsAllocation()->getGpuAddress();
commandChunk.baseAddressCpu = reinterpret_cast<uint64_t>(batchBuffer.stream->getCpuBase());
commandChunk.startOffset = batchBuffer.startOffset;
commandChunk.endOffset = batchBuffer.chainedBatchBufferStartOffset + batchBufferStartCommandSize;
return registerCommandChunk(commandChunk);
}
bool FlatBatchBufferHelper::registerCommandChunk(CommandChunk &commandChunk) {
commandChunkList.push_back(commandChunk);
return true;
}
bool FlatBatchBufferHelper::registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress) {
batchBufferStartAddressSequence.insert(std::pair<uint64_t, uint64_t>(commandAddress, startAddress));
return true;
}
void FlatBatchBufferHelper::fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress) {
for (auto &patchInfoData : data) {
if (patchInfoData.sourceType == PatchInfoAllocationType::KernelArg) {
patchInfoData.targetAllocation = gpuAddress;
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
}
}
}
};

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/address_patch.h"
#include "runtime/command_stream/submissions_aggregator.h"
#include <map>
#include <vector>
namespace OCLRT {
enum class DispatchMode;
class MemoryManager;
class FlatBatchBufferHelper {
public:
FlatBatchBufferHelper(MemoryManager *memoryManager) : memoryManager(memoryManager) {}
virtual ~FlatBatchBufferHelper(){};
MOCKABLE_VIRTUAL bool setPatchInfoData(const PatchInfoData &data);
MOCKABLE_VIRTUAL bool removePatchInfoData(uint64_t targetLocation);
MOCKABLE_VIRTUAL bool registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset);
MOCKABLE_VIRTUAL bool registerCommandChunk(CommandChunk &commandChunk);
MOCKABLE_VIRTUAL bool registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize);
MOCKABLE_VIRTUAL bool registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress);
virtual void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0;
virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) = 0;
virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) = 0;
void setMemoryManager(MemoryManager *memoryManager) { this->memoryManager = memoryManager; }
static void fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress);
std::vector<CommandChunk> &getCommandChunkList() { return commandChunkList; }
std::vector<PatchInfoData> &getPatchInfoCollection() { return patchInfoCollection; }
std::map<uint64_t, uint64_t> &getBatchBufferStartAddressSequence() { return batchBufferStartAddressSequence; }
protected:
MemoryManager *memoryManager = nullptr;
std::vector<PatchInfoData> patchInfoCollection;
std::vector<CommandChunk> commandChunkList;
std::map<uint64_t, uint64_t> batchBufferStartAddressSequence;
};
} // namespace OCLRT

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/flat_batch_buffer_helper.h"
namespace OCLRT {
template <typename GfxFamily>
class FlatBatchBufferHelperHw : public FlatBatchBufferHelper {
public:
FlatBatchBufferHelperHw(MemoryManager *memoryManager) : FlatBatchBufferHelper(memoryManager) {}
void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override;
char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) override;
void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) override;
static void sdiSetAddress(typename GfxFamily::MI_STORE_DATA_IMM *sdiCommand, uint64_t address);
static void sdiSetStoreQword(typename GfxFamily::MI_STORE_DATA_IMM *sdiCommand, bool setQword);
};
} // namespace OCLRT

View File

@@ -0,0 +1,196 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/flat_batch_buffer_helper_hw.h"
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/helpers/string.h"
namespace OCLRT {
template <typename GfxFamily>
void *FlatBatchBufferHelperHw<GfxFamily>::flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) {
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename GfxFamily::MI_USER_INTERRUPT MI_USER_INTERRUPT;
void *flatBatchBuffer = nullptr;
size_t indirectPatchCommandsSize = 0u;
std::vector<PatchInfoData> indirectPatchInfo;
std::unique_ptr<char> indirectPatchCommands(getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo));
if (dispatchMode == DispatchMode::ImmediateDispatch) {
if (batchBuffer.chainedBatchBuffer) {
batchBuffer.chainedBatchBuffer->setAllocationType(batchBuffer.chainedBatchBuffer->getAllocationType() | GraphicsAllocation::ALLOCATION_TYPE_NON_AUB_WRITABLE);
auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
auto flatBatchBufferSize = alignUp(sizeMainBatchBuffer + indirectPatchCommandsSize + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
flatBatchBuffer = this->memoryManager->alignedMallocWrapper(flatBatchBufferSize, MemoryConstants::pageSize);
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
// Copy main batchbuffer
memcpy_s(flatBatchBuffer, sizeMainBatchBuffer, ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset), sizeMainBatchBuffer);
// Copy indirect patch commands
memcpy_s(ptrOffset(flatBatchBuffer, sizeMainBatchBuffer), indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
// Copy chained batchbuffer
memcpy_s(ptrOffset(flatBatchBuffer, sizeMainBatchBuffer + indirectPatchCommandsSize), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
sizeBatchBuffer = flatBatchBufferSize;
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
}
} else if (dispatchMode == DispatchMode::BatchedDispatch) {
CommandChunk firstChunk;
for (auto &chunk : commandChunkList) {
bool found = false;
for (auto &batchBuffer : batchBufferStartAddressSequence) {
if ((batchBuffer.first <= chunk.baseAddressGpu + chunk.endOffset) && (batchBuffer.first >= chunk.baseAddressGpu + chunk.startOffset)) {
chunk.batchBufferStartLocation = batchBuffer.first;
chunk.batchBufferStartAddress = batchBuffer.second;
chunk.endOffset = chunk.batchBufferStartLocation - chunk.baseAddressGpu;
}
if (batchBuffer.second == chunk.baseAddressGpu + chunk.startOffset) {
found = true;
}
}
if (!found) {
firstChunk = chunk;
}
}
std::vector<CommandChunk> orderedChunks;
CommandChunk &nextChunk = firstChunk;
while (true) {
bool hasNextChunk = false;
for (auto &chunk : commandChunkList) {
if (nextChunk.batchBufferStartAddress == chunk.baseAddressGpu + chunk.startOffset) {
hasNextChunk = true;
orderedChunks.push_back(nextChunk);
nextChunk = chunk;
break;
}
}
if (!hasNextChunk) {
nextChunk.endOffset -= sizeof(MI_BATCH_BUFFER_START);
orderedChunks.push_back(nextChunk);
break;
}
}
uint64_t flatBatchBufferSize = 0u;
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
patchInfoCollection.clear();
for (auto &chunk : orderedChunks) {
for (auto &patch : patchInfoCopy) {
if (patch.targetAllocation + patch.targetAllocationOffset >= chunk.baseAddressGpu + chunk.startOffset && patch.targetAllocation + patch.targetAllocationOffset <= chunk.baseAddressGpu + chunk.endOffset) {
patch.targetAllocationOffset = patch.targetAllocationOffset - chunk.startOffset + flatBatchBufferSize + indirectPatchCommandsSize;
patchInfoCollection.push_back(patch);
}
}
flatBatchBufferSize += chunk.endOffset - chunk.startOffset;
}
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
flatBatchBufferSize += sizeof(MI_USER_INTERRUPT);
flatBatchBufferSize += sizeof(MI_BATCH_BUFFER_END);
flatBatchBufferSize += indirectPatchCommandsSize;
flatBatchBufferSize = alignUp(flatBatchBufferSize, MemoryConstants::pageSize);
flatBatchBufferSize += CSRequirements::csOverfetchSize;
flatBatchBuffer = this->memoryManager->alignedMallocWrapper(static_cast<size_t>(flatBatchBufferSize), MemoryConstants::pageSize);
char *ptr = reinterpret_cast<char *>(flatBatchBuffer);
memcpy_s(ptr, indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
ptr += indirectPatchCommandsSize;
for (auto &chunk : orderedChunks) {
size_t chunkSize = static_cast<size_t>(chunk.endOffset - chunk.startOffset);
memcpy_s(ptr,
chunkSize,
reinterpret_cast<char *>(ptrOffset(chunk.baseAddressCpu, static_cast<size_t>(chunk.startOffset))),
chunkSize);
ptr += chunkSize;
}
auto pCmdMui = reinterpret_cast<MI_USER_INTERRUPT *>(ptr);
pCmdMui->init();
ptr += sizeof(MI_USER_INTERRUPT);
auto pCmdBBend = reinterpret_cast<MI_BATCH_BUFFER_END *>(ptr);
*pCmdBBend = GfxFamily::cmdInitBatchBufferEnd;
ptr += sizeof(MI_BATCH_BUFFER_END);
sizeBatchBuffer = static_cast<size_t>(flatBatchBufferSize);
commandChunkList.clear();
batchBufferStartAddressSequence.clear();
}
return flatBatchBuffer;
}
template <typename GfxFamily>
char *FlatBatchBufferHelperHw<GfxFamily>::getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) {
typedef typename GfxFamily::MI_STORE_DATA_IMM MI_STORE_DATA_IMM;
indirectPatchCommandsSize = 0;
for (auto &patchInfoData : patchInfoCollection) {
if (patchInfoData.requiresIndirectPatching()) {
indirectPatchCommandsSize += sizeof(MI_STORE_DATA_IMM);
}
}
uint64_t stiCommandOffset = 0;
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
std::unique_ptr<char> buffer(new char[indirectPatchCommandsSize]);
LinearStream indirectPatchCommandStream(buffer.get(), indirectPatchCommandsSize);
patchInfoCollection.clear();
for (auto &patchInfoData : patchInfoCopy) {
if (patchInfoData.requiresIndirectPatching()) {
auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
storeDataImmediate->init();
sdiSetAddress(storeDataImmediate, patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset);
sdiSetStoreQword(storeDataImmediate, patchInfoData.patchAddressSize != sizeof(uint32_t));
storeDataImmediate->setDataDword0(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL));
storeDataImmediate->setDataDword1(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32));
PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default);
PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default);
indirectPatchInfo.push_back(patchInfoForAddress);
indirectPatchInfo.push_back(patchInfoForValue);
stiCommandOffset += sizeof(MI_STORE_DATA_IMM);
} else {
patchInfoCollection.push_back(patchInfoData);
}
}
return buffer.release();
}
template <typename GfxFamily>
void FlatBatchBufferHelperHw<GfxFamily>::removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) {
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
size_t numPipeControls = pipeControlLocationSize / sizeof(PIPE_CONTROL);
for (size_t i = 0; i < numPipeControls; i++) {
PIPE_CONTROL *erasedPipeControl = reinterpret_cast<PIPE_CONTROL *>(pipeControlForNooping);
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - 2 * sizeof(uint64_t));
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - sizeof(uint64_t));
}
}
}; // namespace OCLRT

View File

@@ -155,6 +155,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
// Program the kernel start pointer
pInterfaceDescriptor->setKernelStartPointerHigh(kernelStartOffset >> 32);
pInterfaceDescriptor->setKernelStartPointer((uint32_t)kernelStartOffset);
// # of threads in thread group should be based on LWS.
pInterfaceDescriptor->setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
@@ -234,10 +235,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &patchInfoData : kernel.getPatchInfoDataList()) {
patchInfoData.targetAllocation = indirectHeap.getGpuBase();
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
}
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
}
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
@@ -399,6 +397,11 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
!!patchInfo.executionEnvironment->HasBarriers,
preemptionMode);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);
kernel.getPatchInfoDataList().push_back(patchInfoData);
}
// Program media state flush to set interface descriptor offset
KernelCommandsHelper<GfxFamily>::sendMediaStateFlush(
commandStream,