Code cleanup - avoid copy 5/n
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
ae88789bce
commit
a924b6a304
|
@ -179,10 +179,10 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
|
@ -208,11 +208,11 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
|
||||
NEO::GraphicsAllocation *dst,
|
||||
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
|
||||
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent);
|
||||
size_t bytesPerPixel, const Vec3<size_t> ©Size,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
|
|
|
@ -883,10 +883,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
|
@ -896,12 +896,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
|||
bool copyOneCommand = NEO::BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(copySize, bytesPerPixel);
|
||||
Vec3<size_t> srcPtrOffset = {(copyOneCommand ? (srcRegion.originX / bytesPerPixel) : srcRegion.originX), srcRegion.originY, srcRegion.originZ};
|
||||
Vec3<size_t> dstPtrOffset = {(copyOneCommand ? (dstRegion.originX / bytesPerPixel) : dstRegion.originX), dstRegion.originY, dstRegion.originZ};
|
||||
copySize.x = copyOneCommand ? copySize.x / bytesPerPixel : copySize.x;
|
||||
auto copySizeModified = copySize;
|
||||
copySizeModified.x = copyOneCommand ? copySizeModified.x / bytesPerPixel : copySizeModified.x;
|
||||
|
||||
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
|
||||
|
||||
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc, srcAlloc,
|
||||
dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch,
|
||||
dstPtrOffset, srcPtrOffset, copySizeModified, srcRowPitch, srcSlicePitch,
|
||||
dstRowPitch, dstSlicePitch, clearColorAllocation);
|
||||
commandContainer.addToResidencyContainer(dstAlloc);
|
||||
commandContainer.addToResidencyContainer(srcAlloc);
|
||||
|
@ -928,11 +929,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::GraphicsAllocation *src,
|
||||
NEO::GraphicsAllocation *dst,
|
||||
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
|
||||
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) {
|
||||
size_t bytesPerPixel, const Vec3<size_t> ©Size,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
|
||||
|
|
|
@ -57,10 +57,10 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
appendMemoryCopyBlitRegionCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
@ -95,11 +95,11 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
|||
}
|
||||
ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
|
||||
NEO::GraphicsAllocation *dst,
|
||||
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
|
||||
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
size_t bytesPerPixel, const Vec3<size_t> ©Size,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
appendCopyImageBlitCalledTimes++;
|
||||
appendImageRegionCopySize = copySize;
|
||||
appendImageRegionSrcOrigin = srcOffsets;
|
||||
|
@ -901,10 +901,10 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
this->srcSize = srcSize;
|
||||
this->dstSize = dstSize;
|
||||
|
|
|
@ -56,10 +56,10 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
appendMemoryCopyBlitRegionCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
@ -94,11 +94,11 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
|||
}
|
||||
ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
|
||||
NEO::GraphicsAllocation *dst,
|
||||
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
|
||||
const Vec3<size_t> &srcOffsets, const Vec3<size_t> &dstOffsets,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
size_t bytesPerPixel, const Vec3<size_t> ©Size,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
appendCopyImageBlitCalledTimes++;
|
||||
appendImageRegionCopySize = copySize;
|
||||
appendImageRegionSrcOrigin = srcOffsets;
|
||||
|
@ -156,10 +156,10 @@ class MockAppendMemoryCopy : public MockCommandListHw<gfxCoreFamily> {
|
|||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
srcBlitCopyRegionOffset = srcOffset;
|
||||
dstBlitCopyRegionOffset = dstOffset;
|
||||
|
|
|
@ -174,7 +174,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
|||
return *(RetType *)(vmeKernel->getCrossThreadData() + element.offset);
|
||||
}
|
||||
|
||||
cl_int validateImages(Vec3<size_t> inputRegion, Vec3<size_t> offset) const {
|
||||
cl_int validateImages(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset) const {
|
||||
Image *srcImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(srcImgArgNum));
|
||||
Image *refImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(refImgArgNum));
|
||||
|
||||
|
@ -208,7 +208,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
|||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const {
|
||||
virtual cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const {
|
||||
{
|
||||
cl_int imageValidationStatus = validateImages(inputRegion, offset);
|
||||
if (imageValidationStatus != CL_SUCCESS) {
|
||||
|
@ -383,7 +383,7 @@ class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuild
|
|||
return predictorsBufferExpSize;
|
||||
}
|
||||
|
||||
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const override {
|
||||
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
|
||||
cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, blkNum, blkMul);
|
||||
if (basicVmeValidationStatus != CL_SUCCESS) {
|
||||
return basicVmeValidationStatus;
|
||||
|
@ -452,7 +452,7 @@ class BuiltInOp<EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel> : public
|
|||
"block_advanced_motion_estimate_check_intel") {
|
||||
}
|
||||
|
||||
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset,
|
||||
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset,
|
||||
size_t gwWidthInBlk, size_t gwHeightInBlk) const override {
|
||||
cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk);
|
||||
if (basicAdvVmeValidationStatus != CL_SUCCESS) {
|
||||
|
|
|
@ -68,22 +68,22 @@ Vec3<size_t> generateWorkgroupSize(
|
|||
const DispatchInfo &dispatchInfo);
|
||||
|
||||
Vec3<size_t> computeWorkgroupsNumber(
|
||||
const Vec3<size_t> gws,
|
||||
const Vec3<size_t> lws);
|
||||
const Vec3<size_t> &gws,
|
||||
const Vec3<size_t> &lws);
|
||||
|
||||
Vec3<size_t> generateWorkgroupsNumber(
|
||||
const Vec3<size_t> gws,
|
||||
const Vec3<size_t> lws);
|
||||
const Vec3<size_t> &gws,
|
||||
const Vec3<size_t> &lws);
|
||||
|
||||
Vec3<size_t> generateWorkgroupsNumber(
|
||||
const DispatchInfo &dispatchInfo);
|
||||
|
||||
inline uint32_t calculateDispatchDim(Vec3<size_t> dispatchSize, Vec3<size_t> dispatchOffset) {
|
||||
inline uint32_t calculateDispatchDim(const Vec3<size_t> &dispatchSize, const Vec3<size_t> &dispatchOffset) {
|
||||
return std::max(1U, std::max(dispatchSize.getSimplifiedDim(), dispatchOffset.getSimplifiedDim()));
|
||||
}
|
||||
|
||||
Vec3<size_t> canonizeWorkgroup(
|
||||
Vec3<size_t> workgroup);
|
||||
const Vec3<size_t> &workgroup);
|
||||
|
||||
void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo);
|
||||
|
||||
|
|
|
@ -94,8 +94,8 @@ class HardwareInterface {
|
|||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups);
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups);
|
||||
|
||||
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
|
||||
const Kernel &kernel);
|
||||
|
|
|
@ -193,18 +193,18 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
|
|||
}
|
||||
|
||||
//Get dispatch geometry
|
||||
uint32_t dim = dispatchInfo.getDim();
|
||||
Vec3<size_t> gws = dispatchInfo.getGWS();
|
||||
Vec3<size_t> offset = dispatchInfo.getOffset();
|
||||
Vec3<size_t> startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
|
||||
auto dim = dispatchInfo.getDim();
|
||||
const auto &gws = dispatchInfo.getGWS();
|
||||
const auto &offset = dispatchInfo.getOffset();
|
||||
const auto &startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
|
||||
|
||||
// Compute local workgroup sizes
|
||||
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
Vec3<size_t> elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
|
||||
const auto &lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
const auto &elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
|
||||
|
||||
// Compute number of work groups
|
||||
Vec3<size_t> totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups();
|
||||
Vec3<size_t> numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups();
|
||||
const auto &totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups();
|
||||
const auto &numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups();
|
||||
UNRECOVERABLE_IF(totalNumberOfWorkgroups.x == 0);
|
||||
UNRECOVERABLE_IF(numberOfWorkgroups.x == 0);
|
||||
|
||||
|
|
|
@ -67,8 +67,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups) {
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups) {
|
||||
|
||||
auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel);
|
||||
WALKER_TYPE<GfxFamily> walkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
|
|
@ -57,8 +57,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
Vec3<size_t> &numberOfWorkgroups,
|
||||
Vec3<size_t> &startOfWorkgroups) {
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups) {
|
||||
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
|
||||
|
|
|
@ -452,13 +452,13 @@ Vec3<size_t> generateWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
|||
return (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) ? computeWorkgroupSize(dispatchInfo) : dispatchInfo.getEnqueuedWorkgroupSize();
|
||||
}
|
||||
|
||||
Vec3<size_t> computeWorkgroupsNumber(const Vec3<size_t> gws, const Vec3<size_t> lws) {
|
||||
Vec3<size_t> computeWorkgroupsNumber(const Vec3<size_t> &gws, const Vec3<size_t> &lws) {
|
||||
return (Vec3<size_t>(gws.x / lws.x + ((gws.x % lws.x) ? 1 : 0),
|
||||
gws.y / lws.y + ((gws.y % lws.y) ? 1 : 0),
|
||||
gws.z / lws.z + ((gws.z % lws.z) ? 1 : 0)));
|
||||
}
|
||||
|
||||
Vec3<size_t> generateWorkgroupsNumber(const Vec3<size_t> gws, const Vec3<size_t> lws) {
|
||||
Vec3<size_t> generateWorkgroupsNumber(const Vec3<size_t> &gws, const Vec3<size_t> &lws) {
|
||||
return (lws.x > 0) ? computeWorkgroupsNumber(gws, lws) : Vec3<size_t>(0, 0, 0);
|
||||
}
|
||||
|
||||
|
@ -466,7 +466,7 @@ Vec3<size_t> generateWorkgroupsNumber(const DispatchInfo &dispatchInfo) {
|
|||
return generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getLocalWorkgroupSize());
|
||||
}
|
||||
|
||||
Vec3<size_t> canonizeWorkgroup(Vec3<size_t> workgroup) {
|
||||
Vec3<size_t> canonizeWorkgroup(const Vec3<size_t> &workgroup) {
|
||||
return ((workgroup.x > 0) ? Vec3<size_t>({workgroup.x, std::max(workgroup.y, static_cast<size_t>(1)), std::max(workgroup.z, static_cast<size_t>(1))})
|
||||
: Vec3<size_t>(0, 0, 0));
|
||||
}
|
||||
|
|
|
@ -32,9 +32,9 @@ class DispatchInfo {
|
|||
using EstimateCommandsMethodT = size_t(size_t, const HardwareInfo &, bool);
|
||||
|
||||
DispatchInfo() = default;
|
||||
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
|
||||
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset)
|
||||
: pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
|
||||
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
||||
DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset, const Vec3<size_t> &agws, const Vec3<size_t> &lws, const Vec3<size_t> &twgs, const Vec3<size_t> &nwgs, const Vec3<size_t> &swgs)
|
||||
: pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
||||
|
||||
ClDevice &getClDevice() const { return *pClDevice; }
|
||||
|
|
|
@ -1828,7 +1828,7 @@ TEST_F(VmeBuiltInTests, WhenValidatingDispatchThenCorrectReturns) {
|
|||
struct MockVmeBuilder : BuiltInOp<EBuiltInOps::VmeBlockMotionEstimateIntel> {
|
||||
using BuiltInOp<EBuiltInOps::VmeBlockMotionEstimateIntel>::BuiltInOp;
|
||||
|
||||
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const override {
|
||||
cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
|
||||
receivedInputRegion = inputRegion;
|
||||
receivedOffset = offset;
|
||||
receivedBlkNum = blkNum;
|
||||
|
|
|
@ -53,7 +53,7 @@ struct EncodeDispatchKernel {
|
|||
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount,
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount,
|
||||
bool isInternal, bool isCooperative);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
|
|
|
@ -320,8 +320,8 @@ template <typename Family>
|
|||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart,
|
||||
Vec3<size_t> groupCount, bool isInternal,
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
|
|
@ -422,8 +422,8 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart,
|
||||
Vec3<size_t> groupCount, bool isInternal,
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative) {
|
||||
size_t totalSize = sizeof(WALKER_TYPE);
|
||||
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);
|
||||
|
|
|
@ -30,8 +30,8 @@ struct ImplicitScalingDispatch {
|
|||
static size_t getSize(bool nativeCrossTileAtomicSync,
|
||||
bool preferStaticPartitioning,
|
||||
const DeviceBitfield &devices,
|
||||
Vec3<size_t> groupStart,
|
||||
Vec3<size_t> groupCount);
|
||||
const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount);
|
||||
static void dispatchCommands(LinearStream &commandStream,
|
||||
WALKER_TYPE &walkerCmd,
|
||||
const DeviceBitfield &devices,
|
||||
|
|
|
@ -15,8 +15,8 @@ template <typename GfxFamily>
|
|||
size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool nativeCrossTileAtomicSync,
|
||||
bool preferStaticPartitioning,
|
||||
const DeviceBitfield &devices,
|
||||
Vec3<size_t> groupStart,
|
||||
Vec3<size_t> groupCount) {
|
||||
const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount) {
|
||||
typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{};
|
||||
bool staticPartitioning = false;
|
||||
const uint32_t tileCount = static_cast<uint32_t>(devices.count());
|
||||
|
|
|
@ -92,8 +92,8 @@ bool inline isCrossTileAtomicRequired() {
|
|||
template <typename GfxFamily>
|
||||
uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitionCount,
|
||||
bool preferStaticPartitioning,
|
||||
Vec3<size_t> groupStart,
|
||||
Vec3<size_t> groupCount,
|
||||
const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount,
|
||||
std::optional<typename COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE> requestedPartitionType,
|
||||
typename COMPUTE_WALKER<GfxFamily>::PARTITION_TYPE *outSelectedPartitionType,
|
||||
bool *outSelectStaticPartitioning) {
|
||||
|
|
|
@ -25,8 +25,8 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
|
|||
GraphicsAllocation *memObjAllocation,
|
||||
GraphicsAllocation *preallocatedHostAllocation,
|
||||
const void *hostPtr, uint64_t memObjGpuVa,
|
||||
uint64_t hostAllocGpuVa, Vec3<size_t> hostPtrOffset,
|
||||
Vec3<size_t> copyOffset, Vec3<size_t> copySize,
|
||||
uint64_t hostAllocGpuVa, const Vec3<size_t> &hostPtrOffset,
|
||||
const Vec3<size_t> ©Offset, Vec3<size_t> copySize,
|
||||
size_t hostRowPitch, size_t hostSlicePitch,
|
||||
size_t gpuRowPitch, size_t gpuSlicePitch) {
|
||||
GraphicsAllocation *hostAllocation = nullptr;
|
||||
|
@ -94,7 +94,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
|
|||
}
|
||||
|
||||
BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
|
||||
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
|
||||
const Vec3<size_t> &dstOffset, const Vec3<size_t> &srcOffset, Vec3<size_t> copySize,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation) {
|
||||
copySize.y = copySize.y ? copySize.y : 1;
|
||||
|
@ -166,13 +166,13 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer
|
|||
}
|
||||
|
||||
BlitOperationResult BlitHelper::blitMemoryToAllocation(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size) {
|
||||
const Vec3<size_t> &size) {
|
||||
auto memoryBanks = memory->storageInfo.getMemoryBanks();
|
||||
return blitMemoryToAllocationBanks(device, memory, offset, hostPtr, size, memoryBanks);
|
||||
}
|
||||
|
||||
BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size, DeviceBitfield memoryBanks) {
|
||||
const Vec3<size_t> &size, DeviceBitfield memoryBanks) {
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
auto isBlitterRequired = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isBlitCopyRequiredForLocalMemory(hwInfo, *memory);
|
||||
if (!hwInfo.capabilityTable.blitterOperationsSupported && !isBlitterRequired) {
|
||||
|
|
|
@ -44,13 +44,13 @@ struct BlitProperties {
|
|||
GraphicsAllocation *memObjAllocation,
|
||||
GraphicsAllocation *preallocatedHostAllocation,
|
||||
const void *hostPtr, uint64_t memObjGpuVa,
|
||||
uint64_t hostAllocGpuVa, Vec3<size_t> hostPtrOffset,
|
||||
Vec3<size_t> copyOffset, Vec3<size_t> copySize,
|
||||
uint64_t hostAllocGpuVa, const Vec3<size_t> &hostPtrOffset,
|
||||
const Vec3<size_t> ©Offset, Vec3<size_t> copySize,
|
||||
size_t hostRowPitch, size_t hostSlicePitch,
|
||||
size_t gpuRowPitch, size_t gpuSlicePitch);
|
||||
|
||||
static BlitProperties constructPropertiesForCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
|
||||
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
|
||||
const Vec3<size_t> &dstOffset, const Vec3<size_t> &srcOffset, Vec3<size_t> copySize,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation);
|
||||
|
||||
|
@ -96,15 +96,15 @@ using BlitMemoryToAllocationFunc = std::function<BlitOperationResult(const Devic
|
|||
GraphicsAllocation *memory,
|
||||
size_t offset,
|
||||
const void *hostPtr,
|
||||
Vec3<size_t> size)>;
|
||||
const Vec3<size_t> &size)>;
|
||||
extern BlitMemoryToAllocationFunc blitMemoryToAllocation;
|
||||
} // namespace BlitHelperFunctions
|
||||
|
||||
struct BlitHelper {
|
||||
static BlitOperationResult blitMemoryToAllocation(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size);
|
||||
const Vec3<size_t> &size);
|
||||
static BlitOperationResult blitMemoryToAllocationBanks(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size, DeviceBitfield memoryBanks);
|
||||
const Vec3<size_t> &size, DeviceBitfield memoryBanks);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -148,7 +148,7 @@ struct BlitCommandsHelper {
|
|||
static void getBlitAllocationProperties(const GraphicsAllocation &allocation, uint32_t &pitch, uint32_t &qPitch, GMM_TILE_TYPE &tileType, uint32_t &mipTailLod, uint32_t &compressionDetails, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void dispatchDebugPauseCommands(LinearStream &commandStream, uint64_t debugPauseStateGPUAddress, DebugPauseState confirmationTrigger, DebugPauseState waitCondition);
|
||||
static size_t getSizeForDebugPauseCommands();
|
||||
static bool useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel);
|
||||
static bool useOneBlitCopyCommand(const Vec3<size_t> ©Size, uint32_t bytesPerPixel);
|
||||
static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, size_t srcSize, size_t dstSize);
|
||||
static bool isCopyRegionPreferred(const Vec3<size_t> ©Size, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void programGlobalSequencerFlush(LinearStream &commandStream);
|
||||
|
|
|
@ -316,7 +316,7 @@ size_t BlitCommandsHelper<GfxFamily>::getSizeForDebugPauseCommands() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel) {
|
||||
bool BlitCommandsHelper<GfxFamily>::useOneBlitCopyCommand(const Vec3<size_t> ©Size, uint32_t bytesPerPixel) {
|
||||
return (copySize.x / bytesPerPixel <= BlitterConstants::maxBlitWidth && copySize.y <= BlitterConstants::maxBlitHeight);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue