/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/timestamp_packet.h" namespace NEO { template uint64_t BlitCommandsHelper::getMaxBlitWidth(const RootDeviceEnvironment &rootDeviceEnvironment) { if (DebugManager.flags.LimitBlitterMaxWidth.get() != -1) { return static_cast(DebugManager.flags.LimitBlitterMaxWidth.get()); } auto maxBlitWidthOverride = getMaxBlitWidthOverride(rootDeviceEnvironment); if (maxBlitWidthOverride > 0) { return maxBlitWidthOverride; } return BlitterConstants::maxBlitWidth; } template uint64_t BlitCommandsHelper::getMaxBlitHeight(const RootDeviceEnvironment &rootDeviceEnvironment) { if (DebugManager.flags.LimitBlitterMaxHeight.get() != -1) { return static_cast(DebugManager.flags.LimitBlitterMaxHeight.get()); } auto maxBlitHeightOverride = getMaxBlitHeightOverride(rootDeviceEnvironment); if (maxBlitHeightOverride > 0) { return maxBlitHeightOverride; } return BlitterConstants::maxBlitHeight; } template void BlitCommandsHelper::dispatchPostBlitCommand(LinearStream &linearStream) { switch (DebugManager.flags.PostBlitCommand.get()) { case 1: EncodeMiFlushDW::programMiFlushDw(linearStream, 0, 0, false, false); break; case 2: break; default: { auto miArbCheckStream = linearStream.getSpaceForCmd(); *miArbCheckStream = GfxFamily::cmdInitArbCheck; break; } } } template size_t BlitCommandsHelper::estimatePostBlitCommandSize() { switch (DebugManager.flags.PostBlitCommand.get()) { case 1: return sizeof(typename GfxFamily::MI_FLUSH_DW); case 2: return 0; default: return sizeof(typename GfxFamily::MI_ARB_CHECK); } } template size_t BlitCommandsHelper::estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment) { size_t timestampCmdSize = 0; if (updateTimestampPacket) { timestampCmdSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); if (profilingEnabled) { timestampCmdSize += 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } } bool preferRegionCopy = isCopyRegionPreferred(copySize, rootDeviceEnvironment); auto nBlits = preferRegionCopy ? getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) : getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize; } template size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, bool debugPauseEnabled, bool blitterDirectSubmission, const RootDeviceEnvironment &rootDeviceEnvironment) { size_t size = 0; for (auto &blitProperties : blitPropertiesContainer) { size += BlitCommandsHelper::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies, blitProperties.outputTimestampPacket != nullptr, profilingEnabled, rootDeviceEnvironment); } size += MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); if (blitterDirectSubmission) { size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); } else { size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); } if (debugPauseEnabled) { size += BlitCommandsHelper::getSizeForDebugPauseCommands(); } size += BlitCommandsHelper::getSizeForGlobalSequencerFlush(); return alignUp(size, MemoryConstants::cacheLineSize); } template uint64_t BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice) { return blitProperties.dstGpuAddress + blitProperties.dstOffset.x + offset + blitProperties.dstOffset.y * blitProperties.dstRowPitch + blitProperties.dstOffset.z * blitProperties.dstSlicePitch + row * blitProperties.dstRowPitch + slice * blitProperties.dstSlicePitch; } template uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice) { return blitProperties.srcGpuAddress + blitProperties.srcOffset.x + offset + blitProperties.srcOffset.y * blitProperties.srcRowPitch + blitProperties.srcOffset.z * blitProperties.srcSlicePitch + row * blitProperties.srcRowPitch + slice * blitProperties.srcSlicePitch; } template void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { uint64_t width = 1; uint64_t height = 1; PRINT_DEBUG_STRING(DebugManager.flags.PrintBlitDispatchDetails.get(), stdout, "\nBlit dispatch with AuxTranslationDirection %u ", static_cast(blitProperties.auxTranslationDirection)); for (uint64_t slice = 0; slice < blitProperties.copySize.z; slice++) { for (uint64_t row = 0; row < blitProperties.copySize.y; row++) { uint64_t offset = 0; uint64_t sizeToBlit = blitProperties.copySize.x; while (sizeToBlit != 0) { if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) { // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) width = getMaxBlitWidth(rootDeviceEnvironment); height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment)); } else { // dispatch 1D blt: (1 .. maxBlitWidth) x 1 width = sizeToBlit; height = 1; } { auto bltCmd = GfxFamily::cmdInitXyCopyBlt; bltCmd.setTransferWidth(static_cast(width)); bltCmd.setTransferHeight(static_cast(height)); bltCmd.setDestinationPitch(static_cast(width)); bltCmd.setSourcePitch(static_cast(width)); auto dstAddr = calculateBlitCommandDestinationBaseAddress(blitProperties, offset, row, slice); auto srcAddr = calculateBlitCommandSourceBaseAddress(blitProperties, offset, row, slice); PRINT_DEBUG_STRING(DebugManager.flags.PrintBlitDispatchDetails.get(), stdout, "\nBlit command. width: %u, height: %u, srcAddr: %#llx, dstAddr: %#llx ", width, height, srcAddr, dstAddr); bltCmd.setDestinationBaseAddress(dstAddr); bltCmd.setSourceBaseAddress(srcAddr); appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment); auto bltStream = linearStream.getSpaceForCmd(); *bltStream = bltCmd; } dispatchPostBlitCommand(linearStream); auto blitSize = width * height; sizeToBlit -= blitSize; offset += blitSize; } } } } template template void BlitCommandsHelper::dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) { using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; auto blitCmd = GfxFamily::cmdInitXyColorBlt; blitCmd.setFillColor(pattern); blitCmd.setColorDepth(depth); uint64_t offset = 0; uint64_t sizeToFill = size / patternSize; while (sizeToFill != 0) { auto tmpCmd = blitCmd; tmpCmd.setDestinationBaseAddress(ptrOffset(dstAlloc->getGpuAddress(), static_cast(offset))); uint64_t height = 0; uint64_t width = 0; if (sizeToFill <= getMaxBlitWidth(rootDeviceEnvironment)) { width = sizeToFill; height = 1; } else { width = getMaxBlitWidth(rootDeviceEnvironment); height = std::min((sizeToFill / width), getMaxBlitHeight(rootDeviceEnvironment)); if (height > 1) { appendTilingEnable(tmpCmd); } } tmpCmd.setTransferWidth(static_cast(width)); tmpCmd.setTransferHeight(static_cast(height)); tmpCmd.setDestinationPitch(static_cast(width * patternSize)); appendBlitCommandsForFillBuffer(dstAlloc, tmpCmd, rootDeviceEnvironment); auto cmd = linearStream.getSpaceForCmd(); *cmd = tmpCmd; auto blitSize = width * height; offset += (blitSize * patternSize); sizeToFill -= blitSize; } } template void BlitCommandsHelper::dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { auto srcSlicePitch = static_cast(blitProperties.srcSlicePitch); auto dstSlicePitch = static_cast(blitProperties.dstSlicePitch); UNRECOVERABLE_IF(blitProperties.copySize.x > BlitterConstants::maxBlitWidth || blitProperties.copySize.y > BlitterConstants::maxBlitHeight); auto bltCmd = GfxFamily::cmdInitXyCopyBlt; bltCmd.setSourceBaseAddress(blitProperties.srcAllocation->getGpuAddress()); bltCmd.setDestinationBaseAddress(blitProperties.dstAllocation->getGpuAddress()); bltCmd.setDestinationX1CoordinateLeft(static_cast(blitProperties.dstOffset.x)); bltCmd.setDestinationY1CoordinateTop(static_cast(blitProperties.dstOffset.y)); bltCmd.setTransferWidth(static_cast(blitProperties.dstOffset.x + blitProperties.copySize.x)); bltCmd.setTransferHeight(static_cast(blitProperties.dstOffset.y + blitProperties.copySize.y)); bltCmd.setSourceX1CoordinateLeft(static_cast(blitProperties.srcOffset.x)); bltCmd.setSourceY1CoordinateTop(static_cast(blitProperties.srcOffset.y)); appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment); appendBlitCommandsForImages(blitProperties, bltCmd, rootDeviceEnvironment, srcSlicePitch, dstSlicePitch); appendColorDepth(blitProperties, bltCmd); appendSurfaceType(blitProperties, bltCmd); for (uint32_t i = 0; i < blitProperties.copySize.z; i++) { appendSliceOffsets(blitProperties, bltCmd, i, rootDeviceEnvironment, srcSlicePitch, dstSlicePitch); auto cmd = linearStream.getSpaceForCmd(); *cmd = bltCmd; dispatchPostBlitCommand(linearStream); } } template void BlitCommandsHelper::dispatchDebugPauseCommands(LinearStream &commandStream, uint64_t debugPauseStateGPUAddress, DebugPauseState confirmationTrigger, DebugPauseState waitCondition) { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; EncodeMiFlushDW::programMiFlushDw(commandStream, debugPauseStateGPUAddress, static_cast(confirmationTrigger), false, true); EncodeSempahore::addMiSemaphoreWaitCommand(commandStream, debugPauseStateGPUAddress, static_cast(waitCondition), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); } template size_t BlitCommandsHelper::getSizeForDebugPauseCommands() { return (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore::getSizeMiSemaphoreWait()) * 2; } template bool BlitCommandsHelper::useOneBlitCopyCommand(Vec3 copySize, uint32_t bytesPerPixel) { return (copySize.x / bytesPerPixel <= BlitterConstants::maxBlitWidth && copySize.y <= BlitterConstants::maxBlitHeight); } template uint32_t BlitCommandsHelper::getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, uint32_t srcSize, uint32_t dstSize) { uint32_t bytesPerPixel = BlitterConstants::maxBytesPerPixel; while (bytesPerPixel > 1) { if (copySize % bytesPerPixel == 0 && srcSize % bytesPerPixel == 0 && dstSize % bytesPerPixel == 0) { if ((srcOrigin ? (srcOrigin % bytesPerPixel == 0) : true) && (dstOrigin ? (dstOrigin % bytesPerPixel == 0) : true)) { break; } } bytesPerPixel >>= 1; } return bytesPerPixel; } template void BlitCommandsHelper::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { if (blitProperties.blitDirection == BlitterConstants::BlitDirection::HostPtrToImage || blitProperties.blitDirection == BlitterConstants::BlitDirection::ImageToHostPtr) { return dispatchBlitCommandsRegion(blitProperties, linearStream, rootDeviceEnvironment); } bool preferCopyBufferRegion = isCopyRegionPreferred(blitProperties.copySize, rootDeviceEnvironment); preferCopyBufferRegion ? dispatchBlitCommandsForBufferRegion(blitProperties, linearStream, rootDeviceEnvironment) : dispatchBlitCommandsForBufferPerRow(blitProperties, linearStream, rootDeviceEnvironment); } template uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { return blitProperties.srcGpuAddress + blitProperties.srcOffset.x + (blitProperties.srcOffset.y * blitProperties.srcRowPitch) + (blitProperties.srcSlicePitch * (slice + blitProperties.srcOffset.z)); } template uint64_t BlitCommandsHelper::calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { return blitProperties.dstGpuAddress + blitProperties.dstOffset.x + (blitProperties.dstOffset.y * blitProperties.dstRowPitch) + (blitProperties.dstSlicePitch * (slice + blitProperties.dstOffset.z)); } template void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); const auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); for (size_t slice = 0u; slice < blitProperties.copySize.z; ++slice) { auto srcAddress = calculateBlitCommandSourceBaseAddressCopyRegion(blitProperties, slice); auto dstAddress = calculateBlitCommandDestinationBaseAddressCopyRegion(blitProperties, slice); auto heightToCopy = blitProperties.copySize.y; while (heightToCopy > 0) { auto height = static_cast(std::min(heightToCopy, static_cast(maxHeightToCopy))); auto widthToCopy = blitProperties.copySize.x; while (widthToCopy > 0) { auto width = static_cast(std::min(widthToCopy, static_cast(maxWidthToCopy))); auto bltCmd = GfxFamily::cmdInitXyCopyBlt; bltCmd.setSourceBaseAddress(srcAddress); bltCmd.setDestinationBaseAddress(dstAddress); bltCmd.setTransferWidth(width); bltCmd.setTransferHeight(height); bltCmd.setSourcePitch(static_cast(blitProperties.srcRowPitch)); bltCmd.setDestinationPitch(static_cast(blitProperties.dstRowPitch)); appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment); auto cmd = linearStream.getSpaceForCmd(); *cmd = bltCmd; dispatchPostBlitCommand(linearStream); srcAddress += width; dstAddress += width; widthToCopy -= width; } heightToCopy -= height; srcAddress += (blitProperties.srcRowPitch - blitProperties.copySize.x); srcAddress += (blitProperties.srcRowPitch * (height - 1)); dstAddress += (blitProperties.dstRowPitch - blitProperties.copySize.x); dstAddress += (blitProperties.dstRowPitch * (height - 1)); } } } template bool BlitCommandsHelper::isCopyRegionPreferred(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { bool preferCopyRegion = getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) < getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); return preferCopyRegion; } template size_t BlitCommandsHelper::getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); auto xBlits = static_cast(std::ceil(copySize.x / static_cast(maxWidthToCopy))); auto yBlits = static_cast(std::ceil(copySize.y / static_cast(maxHeightToCopy))); auto zBlits = static_cast(copySize.z); auto nBlits = xBlits * yBlits * zBlits; return nBlits; } template size_t BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { size_t xBlits = 0u; uint64_t width = 1; uint64_t height = 1; uint64_t sizeToBlit = copySize.x; while (sizeToBlit != 0) { if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) { // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) width = getMaxBlitWidth(rootDeviceEnvironment); height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment)); } else { // dispatch 1D blt: (1 .. maxBlitWidth) x 1 width = sizeToBlit; height = 1; } sizeToBlit -= (width * height); xBlits++; } auto yBlits = copySize.y; auto zBlits = copySize.z; auto nBlits = xBlits * yBlits * zBlits; return nBlits; } } // namespace NEO