Code cleanup - avoid copy 5/n

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2021-09-07 21:21:19 +00:00 committed by Compute-Runtime-Automation
parent ae88789bce
commit a924b6a304
22 changed files with 81 additions and 80 deletions

View File

@ -179,10 +179,10 @@ struct CommandListCoreFamily : CommandListImp {
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
@ -208,11 +208,11 @@ struct CommandListCoreFamily : CommandListImp {
MOCKABLE_VIRTUAL ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent);
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent);
MOCKABLE_VIRTUAL ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,

View File

@ -883,10 +883,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@ -896,12 +896,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
bool copyOneCommand = NEO::BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(copySize, bytesPerPixel);
Vec3<size_t> srcPtrOffset = {(copyOneCommand ? (srcRegion.originX / bytesPerPixel) : srcRegion.originX), srcRegion.originY, srcRegion.originZ};
Vec3<size_t> dstPtrOffset = {(copyOneCommand ? (dstRegion.originX / bytesPerPixel) : dstRegion.originX), dstRegion.originY, dstRegion.originZ};
copySize.x = copyOneCommand ? copySize.x / bytesPerPixel : copySize.x;
auto copySizeModified = copySize;
copySizeModified.x = copyOneCommand ? copySizeModified.x / bytesPerPixel : copySizeModified.x;
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc, srcAlloc,
dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch,
dstPtrOffset, srcPtrOffset, copySizeModified, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, clearColorAllocation);
commandContainer.addToResidencyContainer(dstAlloc);
commandContainer.addToResidencyContainer(srcAlloc);
@ -928,11 +929,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) {
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();

View File

@ -57,10 +57,10 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyBlitRegionCalledTimes++;
return ZE_RESULT_SUCCESS;
@ -95,11 +95,11 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
}
ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) override {
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) override {
appendCopyImageBlitCalledTimes++;
appendImageRegionCopySize = copySize;
appendImageRegionSrcOrigin = srcOffsets;
@ -901,10 +901,10 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
this->srcSize = srcSize;
this->dstSize = dstSize;

View File

@ -56,10 +56,10 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyBlitRegionCalledTimes++;
return ZE_RESULT_SUCCESS;
@ -94,11 +94,11 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
}
ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) override {
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) override {
appendCopyImageBlitCalledTimes++;
appendImageRegionCopySize = copySize;
appendImageRegionSrcOrigin = srcOffsets;
@ -156,10 +156,10 @@ class MockAppendMemoryCopy : public MockCommandListHw<gfxCoreFamily> {
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;

View File

@ -174,7 +174,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
return *(RetType *)(vmeKernel->getCrossThreadData() + element.offset);
}
cl_int validateImages(Vec3<size_t> inputRegion, Vec3<size_t> offset) const {
cl_int validateImages(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset) const {
Image *srcImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(srcImgArgNum));
Image *refImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(refImgArgNum));
@ -208,7 +208,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
return CL_SUCCESS;
}
virtual cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const {
virtual cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const {
{
cl_int imageValidationStatus = validateImages(inputRegion, offset);
if (imageValidationStatus != CL_SUCCESS) {
@ -383,7 +383,7 @@ class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuild
return predictorsBufferExpSize;
}
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const override {
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, blkNum, blkMul);
if (basicVmeValidationStatus != CL_SUCCESS) {
return basicVmeValidationStatus;
@ -452,7 +452,7 @@ class BuiltInOp<EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel> : public
"block_advanced_motion_estimate_check_intel") {
}
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset,
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset,
size_t gwWidthInBlk, size_t gwHeightInBlk) const override {
cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk);
if (basicAdvVmeValidationStatus != CL_SUCCESS) {

View File

@ -68,22 +68,22 @@ Vec3<size_t> generateWorkgroupSize(
const DispatchInfo &dispatchInfo);
Vec3<size_t> computeWorkgroupsNumber(
const Vec3<size_t> gws,
const Vec3<size_t> lws);
const Vec3<size_t> &gws,
const Vec3<size_t> &lws);
Vec3<size_t> generateWorkgroupsNumber(
const Vec3<size_t> gws,
const Vec3<size_t> lws);
const Vec3<size_t> &gws,
const Vec3<size_t> &lws);
Vec3<size_t> generateWorkgroupsNumber(
const DispatchInfo &dispatchInfo);
inline uint32_t calculateDispatchDim(Vec3<size_t> dispatchSize, Vec3<size_t> dispatchOffset) {
inline uint32_t calculateDispatchDim(const Vec3<size_t> &dispatchSize, const Vec3<size_t> &dispatchOffset) {
return std::max(1U, std::max(dispatchSize.getSimplifiedDim(), dispatchOffset.getSimplifiedDim()));
}
Vec3<size_t> canonizeWorkgroup(
Vec3<size_t> workgroup);
const Vec3<size_t> &workgroup);
void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo);

View File

@ -94,8 +94,8 @@ class HardwareInterface {
uint32_t &interfaceDescriptorIndex,
const DispatchInfo &dispatchInfo,
size_t offsetInterfaceDescriptorTable,
Vec3<size_t> &numberOfWorkgroups,
Vec3<size_t> &startOfWorkgroups);
const Vec3<size_t> &numberOfWorkgroups,
const Vec3<size_t> &startOfWorkgroups);
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
const Kernel &kernel);

View File

@ -193,18 +193,18 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
}
//Get dispatch geometry
uint32_t dim = dispatchInfo.getDim();
Vec3<size_t> gws = dispatchInfo.getGWS();
Vec3<size_t> offset = dispatchInfo.getOffset();
Vec3<size_t> startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
auto dim = dispatchInfo.getDim();
const auto &gws = dispatchInfo.getGWS();
const auto &offset = dispatchInfo.getOffset();
const auto &startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
// Compute local workgroup sizes
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
Vec3<size_t> elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
const auto &lws = dispatchInfo.getLocalWorkgroupSize();
const auto &elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
// Compute number of work groups
Vec3<size_t> totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups();
Vec3<size_t> numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups();
const auto &totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups();
const auto &numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups();
UNRECOVERABLE_IF(totalNumberOfWorkgroups.x == 0);
UNRECOVERABLE_IF(numberOfWorkgroups.x == 0);

View File

@ -67,8 +67,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
uint32_t &interfaceDescriptorIndex,
const DispatchInfo &dispatchInfo,
size_t offsetInterfaceDescriptorTable,
Vec3<size_t> &numberOfWorkgroups,
Vec3<size_t> &startOfWorkgroups) {
const Vec3<size_t> &numberOfWorkgroups,
const Vec3<size_t> &startOfWorkgroups) {
auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel);
WALKER_TYPE<GfxFamily> walkerCmd = GfxFamily::cmdInitGpgpuWalker;

View File

@ -57,8 +57,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
uint32_t &interfaceDescriptorIndex,
const DispatchInfo &dispatchInfo,
size_t offsetInterfaceDescriptorTable,
Vec3<size_t> &numberOfWorkgroups,
Vec3<size_t> &startOfWorkgroups) {
const Vec3<size_t> &numberOfWorkgroups,
const Vec3<size_t> &startOfWorkgroups) {
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;

View File

@ -452,13 +452,13 @@ Vec3<size_t> generateWorkgroupSize(const DispatchInfo &dispatchInfo) {
return (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) ? computeWorkgroupSize(dispatchInfo) : dispatchInfo.getEnqueuedWorkgroupSize();
}
Vec3<size_t> computeWorkgroupsNumber(const Vec3<size_t> gws, const Vec3<size_t> lws) {
Vec3<size_t> computeWorkgroupsNumber(const Vec3<size_t> &gws, const Vec3<size_t> &lws) {
return (Vec3<size_t>(gws.x / lws.x + ((gws.x % lws.x) ? 1 : 0),
gws.y / lws.y + ((gws.y % lws.y) ? 1 : 0),
gws.z / lws.z + ((gws.z % lws.z) ? 1 : 0)));
}
Vec3<size_t> generateWorkgroupsNumber(const Vec3<size_t> gws, const Vec3<size_t> lws) {
Vec3<size_t> generateWorkgroupsNumber(const Vec3<size_t> &gws, const Vec3<size_t> &lws) {
return (lws.x > 0) ? computeWorkgroupsNumber(gws, lws) : Vec3<size_t>(0, 0, 0);
}
@ -466,7 +466,7 @@ Vec3<size_t> generateWorkgroupsNumber(const DispatchInfo &dispatchInfo) {
return generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getLocalWorkgroupSize());
}
Vec3<size_t> canonizeWorkgroup(Vec3<size_t> workgroup) {
Vec3<size_t> canonizeWorkgroup(const Vec3<size_t> &workgroup) {
return ((workgroup.x > 0) ? Vec3<size_t>({workgroup.x, std::max(workgroup.y, static_cast<size_t>(1)), std::max(workgroup.z, static_cast<size_t>(1))})
: Vec3<size_t>(0, 0, 0));
}

View File

@ -32,9 +32,9 @@ class DispatchInfo {
using EstimateCommandsMethodT = size_t(size_t, const HardwareInfo &, bool);
DispatchInfo() = default;
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset)
: pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset, const Vec3<size_t> &agws, const Vec3<size_t> &lws, const Vec3<size_t> &twgs, const Vec3<size_t> &nwgs, const Vec3<size_t> &swgs)
: pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
ClDevice &getClDevice() const { return *pClDevice; }

View File

@ -1828,7 +1828,7 @@ TEST_F(VmeBuiltInTests, WhenValidatingDispatchThenCorrectReturns) {
struct MockVmeBuilder : BuiltInOp<EBuiltInOps::VmeBlockMotionEstimateIntel> {
using BuiltInOp<EBuiltInOps::VmeBlockMotionEstimateIntel>::BuiltInOp;
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const override {
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
receivedInputRegion = inputRegion;
receivedOffset = offset;
receivedBlkNum = blkNum;

View File

@ -53,7 +53,7 @@ struct EncodeDispatchKernel {
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount,
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount,
bool isInternal, bool isCooperative);
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,

View File

@ -320,8 +320,8 @@ template <typename Family>
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
template <typename Family>
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart,
Vec3<size_t> groupCount, bool isInternal,
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
const Vec3<size_t> &groupCount, bool isInternal,
bool isCooperative) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;

View File

@ -422,8 +422,8 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
}
template <typename Family>
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart,
Vec3<size_t> groupCount, bool isInternal,
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
const Vec3<size_t> &groupCount, bool isInternal,
bool isCooperative) {
size_t totalSize = sizeof(WALKER_TYPE);
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);

View File

@ -30,8 +30,8 @@ struct ImplicitScalingDispatch {
static size_t getSize(bool nativeCrossTileAtomicSync,
bool preferStaticPartitioning,
const DeviceBitfield &devices,
Vec3<size_t> groupStart,
Vec3<size_t> groupCount);
const Vec3<size_t> &groupStart,
const Vec3<size_t> &groupCount);
static void dispatchCommands(LinearStream &commandStream,
WALKER_TYPE &walkerCmd,
const DeviceBitfield &devices,

View File

@ -15,8 +15,8 @@ template <typename GfxFamily>
size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool nativeCrossTileAtomicSync,
bool preferStaticPartitioning,
const DeviceBitfield &devices,
Vec3<size_t> groupStart,
Vec3<size_t> groupCount) {
const Vec3<size_t> &groupStart,
const Vec3<size_t> &groupCount) {
typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{};
bool staticPartitioning = false;
const uint32_t tileCount = static_cast<uint32_t>(devices.count());

View File

@ -92,8 +92,8 @@ bool inline isCrossTileAtomicRequired() {
template <typename GfxFamily>
uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitionCount,
bool preferStaticPartitioning,
Vec3<size_t> groupStart,
Vec3<size_t> groupCount,
const Vec3<size_t> &groupStart,
const Vec3<size_t> &groupCount,
std::optional<typename COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE> requestedPartitionType,
typename COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE *outSelectedPartitionType,
bool *outSelectStaticPartitioning) {

View File

@ -25,8 +25,8 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
GraphicsAllocation *memObjAllocation,
GraphicsAllocation *preallocatedHostAllocation,
const void *hostPtr, uint64_t memObjGpuVa,
uint64_t hostAllocGpuVa, Vec3<size_t> hostPtrOffset,
Vec3<size_t> copyOffset, Vec3<size_t> copySize,
uint64_t hostAllocGpuVa, const Vec3<size_t> &hostPtrOffset,
const Vec3<size_t> &copyOffset, Vec3<size_t> copySize,
size_t hostRowPitch, size_t hostSlicePitch,
size_t gpuRowPitch, size_t gpuSlicePitch) {
GraphicsAllocation *hostAllocation = nullptr;
@ -94,7 +94,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
}
BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
const Vec3<size_t> &dstOffset, const Vec3<size_t> &srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation) {
copySize.y = copySize.y ? copySize.y : 1;
@ -166,13 +166,13 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer
}
BlitOperationResult BlitHelper::blitMemoryToAllocation(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size) {
const Vec3<size_t> &size) {
auto memoryBanks = memory->storageInfo.getMemoryBanks();
return blitMemoryToAllocationBanks(device, memory, offset, hostPtr, size, memoryBanks);
}
BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size, DeviceBitfield memoryBanks) {
const Vec3<size_t> &size, DeviceBitfield memoryBanks) {
const auto &hwInfo = device.getHardwareInfo();
auto isBlitterRequired = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isBlitCopyRequiredForLocalMemory(hwInfo, *memory);
if (!hwInfo.capabilityTable.blitterOperationsSupported && !isBlitterRequired) {

View File

@ -44,13 +44,13 @@ struct BlitProperties {
GraphicsAllocation *memObjAllocation,
GraphicsAllocation *preallocatedHostAllocation,
const void *hostPtr, uint64_t memObjGpuVa,
uint64_t hostAllocGpuVa, Vec3<size_t> hostPtrOffset,
Vec3<size_t> copyOffset, Vec3<size_t> copySize,
uint64_t hostAllocGpuVa, const Vec3<size_t> &hostPtrOffset,
const Vec3<size_t> &copyOffset, Vec3<size_t> copySize,
size_t hostRowPitch, size_t hostSlicePitch,
size_t gpuRowPitch, size_t gpuSlicePitch);
static BlitProperties constructPropertiesForCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
const Vec3<size_t> &dstOffset, const Vec3<size_t> &srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation);
@ -96,15 +96,15 @@ using BlitMemoryToAllocationFunc = std::function<BlitOperationResult(const Devic
GraphicsAllocation *memory,
size_t offset,
const void *hostPtr,
Vec3<size_t> size)>;
const Vec3<size_t> &size)>;
extern BlitMemoryToAllocationFunc blitMemoryToAllocation;
} // namespace BlitHelperFunctions
struct BlitHelper {
static BlitOperationResult blitMemoryToAllocation(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size);
const Vec3<size_t> &size);
static BlitOperationResult blitMemoryToAllocationBanks(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size, DeviceBitfield memoryBanks);
const Vec3<size_t> &size, DeviceBitfield memoryBanks);
};
template <typename GfxFamily>
@ -148,7 +148,7 @@ struct BlitCommandsHelper {
static void getBlitAllocationProperties(const GraphicsAllocation &allocation, uint32_t &pitch, uint32_t &qPitch, GMM_TILE_TYPE &tileType, uint32_t &mipTailLod, uint32_t &compressionDetails, const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchDebugPauseCommands(LinearStream &commandStream, uint64_t debugPauseStateGPUAddress, DebugPauseState confirmationTrigger, DebugPauseState waitCondition);
static size_t getSizeForDebugPauseCommands();
static bool useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel);
static bool useOneBlitCopyCommand(const Vec3<size_t> &copySize, uint32_t bytesPerPixel);
static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, size_t srcSize, size_t dstSize);
static bool isCopyRegionPreferred(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment);
static void programGlobalSequencerFlush(LinearStream &commandStream);

View File

@ -316,7 +316,7 @@ size_t BlitCommandsHelper<GfxFamily>::getSizeForDebugPauseCommands() {
}
template <typename GfxFamily>
bool BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel) {
bool BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(const Vec3<size_t> &copySize, uint32_t bytesPerPixel) {
return (copySize.x / bytesPerPixel <= BlitterConstants::maxBlitWidth && copySize.y <= BlitterConstants::maxBlitHeight);
}