mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
Extend batch buffer flattening in AubCSR to BatchedDispatch mode
- batch buffer flatening in batched mode
- added MI_USER_INTERRUPT command
- added GUC Work Queue Item
Change-Id: I35142da34b30d3006bb4ffc1521db7f6ebe68ebc
This commit is contained in:
@@ -44,6 +44,10 @@ set(RUNTIME_SRCS_HELPERS_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_info.h
|
||||
|
||||
@@ -32,7 +32,10 @@ enum PatchInfoAllocationType {
|
||||
DynamicStateHeap,
|
||||
IndirectObjectHeap,
|
||||
SurfaceStateHeap,
|
||||
InstructionHeap
|
||||
InstructionHeap,
|
||||
TagAddress,
|
||||
TagValue,
|
||||
GUCStartMessage,
|
||||
};
|
||||
|
||||
struct PatchInfoData {
|
||||
@@ -42,5 +45,50 @@ struct PatchInfoData {
|
||||
uint64_t targetAllocation;
|
||||
uint64_t targetAllocationOffset;
|
||||
PatchInfoAllocationType targetType;
|
||||
uint32_t patchAddressSize;
|
||||
|
||||
PatchInfoData(uint64_t sourceAllocation,
|
||||
uint64_t sourceAllocationOffset,
|
||||
PatchInfoAllocationType sourceType,
|
||||
uint64_t targetAllocation,
|
||||
uint64_t targetAllocationOffset,
|
||||
PatchInfoAllocationType targetType,
|
||||
uint32_t patchAddressSize)
|
||||
: sourceAllocation(sourceAllocation),
|
||||
sourceAllocationOffset(sourceAllocationOffset),
|
||||
sourceType(sourceType),
|
||||
targetAllocation(targetAllocation),
|
||||
targetAllocationOffset(targetAllocationOffset),
|
||||
targetType(targetType),
|
||||
patchAddressSize(patchAddressSize) {
|
||||
}
|
||||
|
||||
PatchInfoData(uint64_t sourceAllocation,
|
||||
uint64_t sourceAllocationOffset,
|
||||
PatchInfoAllocationType sourceType,
|
||||
uint64_t targetAllocation,
|
||||
uint64_t targetAllocationOffset,
|
||||
PatchInfoAllocationType targetType)
|
||||
: sourceAllocation(sourceAllocation),
|
||||
sourceAllocationOffset(sourceAllocationOffset),
|
||||
sourceType(sourceType),
|
||||
targetAllocation(targetAllocation),
|
||||
targetAllocationOffset(targetAllocationOffset),
|
||||
targetType(targetType),
|
||||
patchAddressSize(sizeof(void *)) {
|
||||
}
|
||||
|
||||
bool requiresIndirectPatching() {
|
||||
return (targetType != PatchInfoAllocationType::Default && targetType != PatchInfoAllocationType::GUCStartMessage);
|
||||
}
|
||||
};
|
||||
|
||||
struct CommandChunk {
|
||||
uint64_t baseAddressCpu = 0;
|
||||
uint64_t baseAddressGpu = 0;
|
||||
uint64_t startOffset = 0;
|
||||
uint64_t endOffset = 0;
|
||||
uint64_t batchBufferStartLocation = 0;
|
||||
uint64_t batchBufferStartAddress = 0;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
78
runtime/helpers/flat_batch_buffer_helper.cpp
Normal file
78
runtime/helpers/flat_batch_buffer_helper.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/flat_batch_buffer_helper.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
bool FlatBatchBufferHelper::setPatchInfoData(const PatchInfoData &data) {
|
||||
patchInfoCollection.push_back(data);
|
||||
return true;
|
||||
}
|
||||
bool FlatBatchBufferHelper::removePatchInfoData(uint64_t targetLocation) {
|
||||
for (auto it = patchInfoCollection.begin(); it != patchInfoCollection.end(); ++it) {
|
||||
if (it->targetAllocation + it->targetAllocationOffset == targetLocation) {
|
||||
patchInfoCollection.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset) {
|
||||
|
||||
CommandChunk commandChunk;
|
||||
commandChunk.baseAddressGpu = baseGpu;
|
||||
commandChunk.baseAddressCpu = baseCpu;
|
||||
commandChunk.startOffset = startOffset;
|
||||
commandChunk.endOffset = endOffset;
|
||||
return registerCommandChunk(commandChunk);
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize) {
|
||||
CommandChunk commandChunk;
|
||||
commandChunk.baseAddressGpu = batchBuffer.stream->getGraphicsAllocation()->getGpuAddress();
|
||||
commandChunk.baseAddressCpu = reinterpret_cast<uint64_t>(batchBuffer.stream->getCpuBase());
|
||||
commandChunk.startOffset = batchBuffer.startOffset;
|
||||
commandChunk.endOffset = batchBuffer.chainedBatchBufferStartOffset + batchBufferStartCommandSize;
|
||||
return registerCommandChunk(commandChunk);
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(CommandChunk &commandChunk) {
|
||||
commandChunkList.push_back(commandChunk);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress) {
|
||||
batchBufferStartAddressSequence.insert(std::pair<uint64_t, uint64_t>(commandAddress, startAddress));
|
||||
return true;
|
||||
}
|
||||
|
||||
void FlatBatchBufferHelper::fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress) {
|
||||
for (auto &patchInfoData : data) {
|
||||
if (patchInfoData.sourceType == PatchInfoAllocationType::KernelArg) {
|
||||
patchInfoData.targetAllocation = gpuAddress;
|
||||
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
63
runtime/helpers/flat_batch_buffer_helper.h
Normal file
63
runtime/helpers/flat_batch_buffer_helper.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/helpers/address_patch.h"
|
||||
#include "runtime/command_stream/submissions_aggregator.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
enum class DispatchMode;
|
||||
class MemoryManager;
|
||||
|
||||
class FlatBatchBufferHelper {
|
||||
public:
|
||||
FlatBatchBufferHelper(MemoryManager *memoryManager) : memoryManager(memoryManager) {}
|
||||
virtual ~FlatBatchBufferHelper(){};
|
||||
MOCKABLE_VIRTUAL bool setPatchInfoData(const PatchInfoData &data);
|
||||
MOCKABLE_VIRTUAL bool removePatchInfoData(uint64_t targetLocation);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(CommandChunk &commandChunk);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize);
|
||||
MOCKABLE_VIRTUAL bool registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress);
|
||||
virtual void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0;
|
||||
virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) = 0;
|
||||
virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) = 0;
|
||||
void setMemoryManager(MemoryManager *memoryManager) { this->memoryManager = memoryManager; }
|
||||
static void fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress);
|
||||
|
||||
std::vector<CommandChunk> &getCommandChunkList() { return commandChunkList; }
|
||||
std::vector<PatchInfoData> &getPatchInfoCollection() { return patchInfoCollection; }
|
||||
std::map<uint64_t, uint64_t> &getBatchBufferStartAddressSequence() { return batchBufferStartAddressSequence; }
|
||||
|
||||
protected:
|
||||
MemoryManager *memoryManager = nullptr;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoCollection;
|
||||
std::vector<CommandChunk> commandChunkList;
|
||||
std::map<uint64_t, uint64_t> batchBufferStartAddressSequence;
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
40
runtime/helpers/flat_batch_buffer_helper_hw.h
Normal file
40
runtime/helpers/flat_batch_buffer_helper_hw.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/helpers/flat_batch_buffer_helper.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class FlatBatchBufferHelperHw : public FlatBatchBufferHelper {
|
||||
public:
|
||||
FlatBatchBufferHelperHw(MemoryManager *memoryManager) : FlatBatchBufferHelper(memoryManager) {}
|
||||
void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override;
|
||||
char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) override;
|
||||
void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) override;
|
||||
static void sdiSetAddress(typename GfxFamily::MI_STORE_DATA_IMM *sdiCommand, uint64_t address);
|
||||
static void sdiSetStoreQword(typename GfxFamily::MI_STORE_DATA_IMM *sdiCommand, bool setQword);
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
196
runtime/helpers/flat_batch_buffer_helper_hw.inl
Normal file
196
runtime/helpers/flat_batch_buffer_helper_hw.inl
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/flat_batch_buffer_helper_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *FlatBatchBufferHelperHw<GfxFamily>::flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) {
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
|
||||
typedef typename GfxFamily::MI_USER_INTERRUPT MI_USER_INTERRUPT;
|
||||
|
||||
void *flatBatchBuffer = nullptr;
|
||||
|
||||
size_t indirectPatchCommandsSize = 0u;
|
||||
std::vector<PatchInfoData> indirectPatchInfo;
|
||||
std::unique_ptr<char> indirectPatchCommands(getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo));
|
||||
|
||||
if (dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
if (batchBuffer.chainedBatchBuffer) {
|
||||
batchBuffer.chainedBatchBuffer->setAllocationType(batchBuffer.chainedBatchBuffer->getAllocationType() | GraphicsAllocation::ALLOCATION_TYPE_NON_AUB_WRITABLE);
|
||||
auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
|
||||
|
||||
auto flatBatchBufferSize = alignUp(sizeMainBatchBuffer + indirectPatchCommandsSize + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
|
||||
flatBatchBuffer = this->memoryManager->alignedMallocWrapper(flatBatchBufferSize, MemoryConstants::pageSize);
|
||||
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
|
||||
// Copy main batchbuffer
|
||||
memcpy_s(flatBatchBuffer, sizeMainBatchBuffer, ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset), sizeMainBatchBuffer);
|
||||
// Copy indirect patch commands
|
||||
memcpy_s(ptrOffset(flatBatchBuffer, sizeMainBatchBuffer), indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
|
||||
// Copy chained batchbuffer
|
||||
memcpy_s(ptrOffset(flatBatchBuffer, sizeMainBatchBuffer + indirectPatchCommandsSize), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
|
||||
sizeBatchBuffer = flatBatchBufferSize;
|
||||
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
|
||||
}
|
||||
} else if (dispatchMode == DispatchMode::BatchedDispatch) {
|
||||
CommandChunk firstChunk;
|
||||
for (auto &chunk : commandChunkList) {
|
||||
bool found = false;
|
||||
for (auto &batchBuffer : batchBufferStartAddressSequence) {
|
||||
if ((batchBuffer.first <= chunk.baseAddressGpu + chunk.endOffset) && (batchBuffer.first >= chunk.baseAddressGpu + chunk.startOffset)) {
|
||||
chunk.batchBufferStartLocation = batchBuffer.first;
|
||||
chunk.batchBufferStartAddress = batchBuffer.second;
|
||||
chunk.endOffset = chunk.batchBufferStartLocation - chunk.baseAddressGpu;
|
||||
}
|
||||
if (batchBuffer.second == chunk.baseAddressGpu + chunk.startOffset) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
firstChunk = chunk;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CommandChunk> orderedChunks;
|
||||
CommandChunk &nextChunk = firstChunk;
|
||||
while (true) {
|
||||
bool hasNextChunk = false;
|
||||
for (auto &chunk : commandChunkList) {
|
||||
if (nextChunk.batchBufferStartAddress == chunk.baseAddressGpu + chunk.startOffset) {
|
||||
hasNextChunk = true;
|
||||
orderedChunks.push_back(nextChunk);
|
||||
nextChunk = chunk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasNextChunk) {
|
||||
nextChunk.endOffset -= sizeof(MI_BATCH_BUFFER_START);
|
||||
orderedChunks.push_back(nextChunk);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t flatBatchBufferSize = 0u;
|
||||
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
|
||||
patchInfoCollection.clear();
|
||||
|
||||
for (auto &chunk : orderedChunks) {
|
||||
for (auto &patch : patchInfoCopy) {
|
||||
if (patch.targetAllocation + patch.targetAllocationOffset >= chunk.baseAddressGpu + chunk.startOffset && patch.targetAllocation + patch.targetAllocationOffset <= chunk.baseAddressGpu + chunk.endOffset) {
|
||||
patch.targetAllocationOffset = patch.targetAllocationOffset - chunk.startOffset + flatBatchBufferSize + indirectPatchCommandsSize;
|
||||
patchInfoCollection.push_back(patch);
|
||||
}
|
||||
}
|
||||
flatBatchBufferSize += chunk.endOffset - chunk.startOffset;
|
||||
}
|
||||
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
|
||||
|
||||
flatBatchBufferSize += sizeof(MI_USER_INTERRUPT);
|
||||
flatBatchBufferSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
flatBatchBufferSize += indirectPatchCommandsSize;
|
||||
|
||||
flatBatchBufferSize = alignUp(flatBatchBufferSize, MemoryConstants::pageSize);
|
||||
flatBatchBufferSize += CSRequirements::csOverfetchSize;
|
||||
flatBatchBuffer = this->memoryManager->alignedMallocWrapper(static_cast<size_t>(flatBatchBufferSize), MemoryConstants::pageSize);
|
||||
|
||||
char *ptr = reinterpret_cast<char *>(flatBatchBuffer);
|
||||
memcpy_s(ptr, indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
|
||||
ptr += indirectPatchCommandsSize;
|
||||
for (auto &chunk : orderedChunks) {
|
||||
size_t chunkSize = static_cast<size_t>(chunk.endOffset - chunk.startOffset);
|
||||
memcpy_s(ptr,
|
||||
chunkSize,
|
||||
reinterpret_cast<char *>(ptrOffset(chunk.baseAddressCpu, static_cast<size_t>(chunk.startOffset))),
|
||||
chunkSize);
|
||||
ptr += chunkSize;
|
||||
}
|
||||
|
||||
auto pCmdMui = reinterpret_cast<MI_USER_INTERRUPT *>(ptr);
|
||||
pCmdMui->init();
|
||||
ptr += sizeof(MI_USER_INTERRUPT);
|
||||
|
||||
auto pCmdBBend = reinterpret_cast<MI_BATCH_BUFFER_END *>(ptr);
|
||||
*pCmdBBend = GfxFamily::cmdInitBatchBufferEnd;
|
||||
ptr += sizeof(MI_BATCH_BUFFER_END);
|
||||
|
||||
sizeBatchBuffer = static_cast<size_t>(flatBatchBufferSize);
|
||||
commandChunkList.clear();
|
||||
batchBufferStartAddressSequence.clear();
|
||||
}
|
||||
|
||||
return flatBatchBuffer;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
char *FlatBatchBufferHelperHw<GfxFamily>::getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) {
|
||||
typedef typename GfxFamily::MI_STORE_DATA_IMM MI_STORE_DATA_IMM;
|
||||
|
||||
indirectPatchCommandsSize = 0;
|
||||
for (auto &patchInfoData : patchInfoCollection) {
|
||||
if (patchInfoData.requiresIndirectPatching()) {
|
||||
indirectPatchCommandsSize += sizeof(MI_STORE_DATA_IMM);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t stiCommandOffset = 0;
|
||||
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
|
||||
std::unique_ptr<char> buffer(new char[indirectPatchCommandsSize]);
|
||||
LinearStream indirectPatchCommandStream(buffer.get(), indirectPatchCommandsSize);
|
||||
patchInfoCollection.clear();
|
||||
|
||||
for (auto &patchInfoData : patchInfoCopy) {
|
||||
if (patchInfoData.requiresIndirectPatching()) {
|
||||
auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
|
||||
storeDataImmediate->init();
|
||||
sdiSetAddress(storeDataImmediate, patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset);
|
||||
sdiSetStoreQword(storeDataImmediate, patchInfoData.patchAddressSize != sizeof(uint32_t));
|
||||
storeDataImmediate->setDataDword0(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL));
|
||||
storeDataImmediate->setDataDword1(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32));
|
||||
|
||||
PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default);
|
||||
PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default);
|
||||
indirectPatchInfo.push_back(patchInfoForAddress);
|
||||
indirectPatchInfo.push_back(patchInfoForValue);
|
||||
stiCommandOffset += sizeof(MI_STORE_DATA_IMM);
|
||||
} else {
|
||||
patchInfoCollection.push_back(patchInfoData);
|
||||
}
|
||||
}
|
||||
return buffer.release();
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void FlatBatchBufferHelperHw<GfxFamily>::removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping) {
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
size_t numPipeControls = pipeControlLocationSize / sizeof(PIPE_CONTROL);
|
||||
for (size_t i = 0; i < numPipeControls; i++) {
|
||||
PIPE_CONTROL *erasedPipeControl = reinterpret_cast<PIPE_CONTROL *>(pipeControlForNooping);
|
||||
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - 2 * sizeof(uint64_t));
|
||||
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - sizeof(uint64_t));
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace OCLRT
|
||||
@@ -155,6 +155,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
// Program the kernel start pointer
|
||||
pInterfaceDescriptor->setKernelStartPointerHigh(kernelStartOffset >> 32);
|
||||
pInterfaceDescriptor->setKernelStartPointer((uint32_t)kernelStartOffset);
|
||||
|
||||
// # of threads in thread group should be based on LWS.
|
||||
pInterfaceDescriptor->setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
|
||||
|
||||
@@ -234,10 +235,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
for (auto &patchInfoData : kernel.getPatchInfoDataList()) {
|
||||
patchInfoData.targetAllocation = indirectHeap.getGpuBase();
|
||||
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
|
||||
}
|
||||
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
|
||||
@@ -399,6 +397,11 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
!!patchInfo.executionEnvironment->HasBarriers,
|
||||
preemptionMode);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);
|
||||
kernel.getPatchInfoDataList().push_back(patchInfoData);
|
||||
}
|
||||
|
||||
// Program media state flush to set interface descriptor offset
|
||||
KernelCommandsHelper<GfxFamily>::sendMediaStateFlush(
|
||||
commandStream,
|
||||
|
||||
Reference in New Issue
Block a user