mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Revert "Optimize copying buffers by blitter"
This reverts commit b09872f595.
Change-Id: I571de1024416b2e1cbf47aadbfe6e15566dd8dd9
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
@@ -931,7 +931,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress);
|
||||
}
|
||||
|
||||
BlitCommandsHelper<GfxFamily>::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]);
|
||||
BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]);
|
||||
|
||||
if (blitProperties.outputTimestampPacket) {
|
||||
if (profilingEnabled) {
|
||||
|
||||
@@ -111,18 +111,12 @@ struct BlitCommandsHelper {
|
||||
static uint64_t getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchPostBlitCommand(LinearStream &linearStream);
|
||||
static size_t estimatePostBlitCommandSize();
|
||||
static size_t estimateBlitCommandsSize(const Vec3<size_t> ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket,
|
||||
static size_t estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket,
|
||||
bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled,
|
||||
bool debugPauseEnabled, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t getNumberOfBlitsForCopyRegion(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static size_t getNumberOfBlitsForCopyPerRow(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
|
||||
static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
|
||||
static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice);
|
||||
static uint64_t calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice);
|
||||
static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
@@ -141,6 +135,5 @@ struct BlitCommandsHelper {
|
||||
static size_t getSizeForDebugPauseCommands();
|
||||
static bool useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel);
|
||||
static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, uint32_t srcSize, uint32_t dstSize);
|
||||
static bool isCopyRegionPreferred(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -68,22 +68,46 @@ size_t BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const Vec3<size_t> ©Size, const CsrDependencies &csrDependencies,
|
||||
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies,
|
||||
bool updateTimestampPacket, bool profilingEnabled,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
size_t timestampCmdSize = 0;
|
||||
if (updateTimestampPacket) {
|
||||
timestampCmdSize = (profilingEnabled) ? 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM)
|
||||
: EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
|
||||
size_t numberOfBlits = 0;
|
||||
uint64_t width = 1;
|
||||
uint64_t height = 1;
|
||||
|
||||
for (uint64_t slice = 0; slice < copySize.z; slice++) {
|
||||
for (uint64_t row = 0; row < copySize.y; row++) {
|
||||
uint64_t sizeToBlit = copySize.x;
|
||||
while (sizeToBlit != 0) {
|
||||
if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) {
|
||||
// dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
|
||||
width = getMaxBlitWidth(rootDeviceEnvironment);
|
||||
height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment));
|
||||
|
||||
} else {
|
||||
// dispatch 1D blt: (1 .. maxBlitWidth) x 1
|
||||
width = sizeToBlit;
|
||||
height = 1;
|
||||
}
|
||||
sizeToBlit -= (width * height);
|
||||
numberOfBlits++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool preferRegionCopy = isCopyRegionPreferred(copySize, rootDeviceEnvironment);
|
||||
auto nBlits = preferRegionCopy ? getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment)
|
||||
: getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
|
||||
const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
|
||||
|
||||
auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
|
||||
size_t timestampCmdSize = 0;
|
||||
if (updateTimestampPacket) {
|
||||
if (profilingEnabled) {
|
||||
timestampCmdSize = 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
} else {
|
||||
timestampCmdSize = EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
|
||||
}
|
||||
}
|
||||
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize;
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
|
||||
(cmdsSizePerBlit * numberOfBlits) + timestampCmdSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -97,8 +121,7 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropert
|
||||
rootDeviceEnvironment);
|
||||
}
|
||||
size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo());
|
||||
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
|
||||
size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
|
||||
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
|
||||
|
||||
if (debugPauseEnabled) {
|
||||
size += BlitCommandsHelper<GfxFamily>::getSizeForDebugPauseCommands();
|
||||
@@ -174,7 +197,6 @@ void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(const Bl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <size_t patternSize>
|
||||
void BlitCommandsHelper<GfxFamily>::dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) {
|
||||
@@ -279,102 +301,4 @@ uint32_t BlitCommandsHelper<GfxFamily>::getAvailableBytesPerPixel(size_t copySiz
|
||||
return bytesPerPixel;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
bool preferCopyRegion = isCopyRegionPreferred(blitProperties.copySize, rootDeviceEnvironment);
|
||||
|
||||
preferCopyRegion ? dispatchBlitCommandsForBufferRegion(blitProperties, linearStream, rootDeviceEnvironment)
|
||||
: dispatchBlitCommandsForBufferPerRow(blitProperties, linearStream, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t BlitCommandsHelper<GfxFamily>::calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) {
|
||||
return blitProperties.srcGpuAddress + blitProperties.srcOffset.x +
|
||||
(blitProperties.srcOffset.y * blitProperties.srcRowPitch) +
|
||||
(blitProperties.srcSlicePitch * (slice + blitProperties.srcOffset.z));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t BlitCommandsHelper<GfxFamily>::calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) {
|
||||
return blitProperties.dstGpuAddress + blitProperties.dstOffset.x +
|
||||
(blitProperties.dstOffset.y * blitProperties.dstRowPitch) +
|
||||
(blitProperties.dstSlicePitch * (slice + blitProperties.dstOffset.z));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
|
||||
const auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
|
||||
|
||||
for (size_t slice = 0u; slice < blitProperties.copySize.z; ++slice) {
|
||||
auto srcAddress = calculateBlitCommandSourceBaseAddressCopyRegion(blitProperties, slice);
|
||||
auto dstAddress = calculateBlitCommandDestinationBaseAddressCopyRegion(blitProperties, slice);
|
||||
auto heightToCopy = blitProperties.copySize.y;
|
||||
|
||||
while (heightToCopy > 0) {
|
||||
auto height = static_cast<uint32_t>(std::min(heightToCopy, static_cast<size_t>(maxHeightToCopy)));
|
||||
auto widthToCopy = blitProperties.copySize.x;
|
||||
|
||||
while (widthToCopy > 0) {
|
||||
auto width = static_cast<uint32_t>(std::min(widthToCopy, static_cast<size_t>(maxWidthToCopy)));
|
||||
auto bltCmd = GfxFamily::cmdInitXyCopyBlt;
|
||||
|
||||
bltCmd.setSourceBaseAddress(srcAddress);
|
||||
bltCmd.setDestinationBaseAddress(dstAddress);
|
||||
bltCmd.setTransferWidth(width);
|
||||
bltCmd.setTransferHeight(height);
|
||||
bltCmd.setSourcePitch(static_cast<uint32_t>(blitProperties.srcRowPitch));
|
||||
bltCmd.setDestinationPitch(static_cast<uint32_t>(blitProperties.dstRowPitch));
|
||||
|
||||
appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment);
|
||||
|
||||
auto cmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
|
||||
*cmd = bltCmd;
|
||||
dispatchPostBlitCommand(linearStream);
|
||||
|
||||
srcAddress += width;
|
||||
dstAddress += width;
|
||||
widthToCopy -= width;
|
||||
}
|
||||
|
||||
heightToCopy -= height;
|
||||
srcAddress += (blitProperties.srcRowPitch - blitProperties.copySize.x);
|
||||
srcAddress += (blitProperties.srcRowPitch * (height - 1));
|
||||
dstAddress += (blitProperties.dstRowPitch - blitProperties.copySize.x);
|
||||
dstAddress += (blitProperties.dstRowPitch * (height - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
bool preferCopyRegion = getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) < getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
|
||||
return preferCopyRegion;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyRegion(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
|
||||
auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
|
||||
auto xBlits = static_cast<size_t>(std::ceil(copySize.x / static_cast<double>(maxWidthToCopy)));
|
||||
auto yBlits = static_cast<size_t>(std::ceil(copySize.y / static_cast<double>(maxHeightToCopy)));
|
||||
auto zBlits = static_cast<size_t>(copySize.z);
|
||||
auto nBlits = xBlits * yBlits * zBlits;
|
||||
|
||||
return nBlits;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
|
||||
auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
|
||||
auto maxSizeRectangleToCopy = maxWidthToCopy * maxHeightToCopy;
|
||||
auto xBlits = static_cast<size_t>(std::ceil(copySize.x / static_cast<double>(maxSizeRectangleToCopy)));
|
||||
auto yBlits = copySize.y;
|
||||
auto zBlits = copySize.z;
|
||||
auto nBlits = xBlits * yBlits * zBlits;
|
||||
|
||||
return nBlits;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user