feature: use heapless builtins for images

Related-To: NEO-12744
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-10-25 15:10:17 +00:00
committed by Compute-Runtime-Automation
parent bbdf1ac7b6
commit 3891e887c1
18 changed files with 659 additions and 106 deletions

View File

@@ -689,6 +689,25 @@ class BuiltInOp<EBuiltInOps::copyBufferToImage3dStateless> : public BuiltInOp<EB
}
};
template <>
class BuiltInOp<EBuiltInOps::copyBufferToImage3dHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToImage3d> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::copyBufferToImage3d>(kernelsLib, device, false) {
populate(EBuiltInOps::copyBufferToImage3dHeapless,
CompilerOptions::greaterThan4gbBuffersRequired,
"CopyBufferToImage3dBytesStateless", kernelBytes[0],
"CopyBufferToImage3d2BytesStateless", kernelBytes[1],
"CopyBufferToImage3d4BytesStateless", kernelBytes[2],
"CopyBufferToImage3d8BytesStateless", kernelBytes[3],
"CopyBufferToImage3d16BytesStateless", kernelBytes[4]);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
}
};
template <>
class BuiltInOp<EBuiltInOps::copyImage3dToBuffer> : public BuiltinDispatchInfoBuilder {
public:
@@ -811,6 +830,25 @@ class BuiltInOp<EBuiltInOps::copyImage3dToBufferStateless> : public BuiltInOp<EB
}
};
template <>
class BuiltInOp<EBuiltInOps::copyImage3dToBufferHeapless> : public BuiltInOp<EBuiltInOps::copyImage3dToBuffer> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::copyImage3dToBuffer>(kernelsLib, device, false) {
populate(EBuiltInOps::copyImage3dToBufferHeapless,
CompilerOptions::greaterThan4gbBuffersRequired,
"CopyImage3dToBufferBytesStateless", kernelBytes[0],
"CopyImage3dToBuffer2BytesStateless", kernelBytes[1],
"CopyImage3dToBuffer4BytesStateless", kernelBytes[2],
"CopyImage3dToBuffer8BytesStateless", kernelBytes[3],
"CopyImage3dToBuffer16BytesStateless", kernelBytes[4]);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
}
};
template <>
class BuiltInOp<EBuiltInOps::copyImageToImage3d> : public BuiltinDispatchInfoBuilder {
public:
@@ -873,9 +911,29 @@ class BuiltInOp<EBuiltInOps::copyImageToImage3d> : public BuiltinDispatchInfoBui
}
protected:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
if (populateKernels) {
populate(EBuiltInOps::copyImageToImage3d,
"",
"CopyImageToImage3d", kernel);
}
}
MultiDeviceKernel *kernel = nullptr;
};
template <>
class BuiltInOp<EBuiltInOps::copyImageToImage3dHeapless> : public BuiltInOp<EBuiltInOps::copyImageToImage3d> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::copyImageToImage3d>(kernelsLib, device, false) {
populate(EBuiltInOps::copyImageToImage3dHeapless,
"",
"CopyImageToImage3d", kernel);
}
};
template <>
class BuiltInOp<EBuiltInOps::fillImage3d> : public BuiltinDispatchInfoBuilder {
public:
@@ -930,9 +988,30 @@ class BuiltInOp<EBuiltInOps::fillImage3d> : public BuiltinDispatchInfoBuilder {
}
protected:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
if (populateKernels) {
populate(EBuiltInOps::fillImage3d,
"",
"FillImage3d", kernel);
}
}
MultiDeviceKernel *kernel = nullptr;
};
template <>
class BuiltInOp<EBuiltInOps::fillImage3dHeapless> : public BuiltInOp<EBuiltInOps::fillImage3d> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::fillImage3d>(kernelsLib, device, false) {
populate(EBuiltInOps::fillImage3dHeapless,
"",
"FillImage3d", kernel);
}
};
BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) {
uint32_t operationId = static_cast<uint32_t>(operation);
auto &builtins = *device.getDevice().getBuiltIns();
@@ -972,18 +1051,30 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
case EBuiltInOps::copyBufferToImage3dStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3dStateless>>(builtins, device); });
break;
case EBuiltInOps::copyBufferToImage3dHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3dHeapless>>(builtins, device); });
break;
case EBuiltInOps::copyImage3dToBuffer:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyImage3dToBuffer>>(builtins, device); });
break;
case EBuiltInOps::copyImage3dToBufferStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyImage3dToBufferStateless>>(builtins, device); });
break;
case EBuiltInOps::copyImage3dToBufferHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyImage3dToBufferHeapless>>(builtins, device); });
break;
case EBuiltInOps::copyImageToImage3d:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyImageToImage3d>>(builtins, device); });
break;
case EBuiltInOps::copyImageToImage3dHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyImageToImage3dHeapless>>(builtins, device); });
break;
case EBuiltInOps::fillImage3d:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillImage3d>>(builtins, device); });
break;
case EBuiltInOps::fillImage3dHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillImage3dHeapless>>(builtins, device); });
break;
case EBuiltInOps::auxTranslation:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::auxTranslation>>(builtins, device); });
break;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -57,7 +57,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
MultiDispatchInfo dispatchInfo(dc);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::copyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
auto builtIn = EBuiltInOps::adjustImageBuiltinType<EBuiltInOps::copyImageToImage3d>(this->heaplessModeEnabled);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, builtIn, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@@ -29,9 +29,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
if (dstBuffer->getSize() >= 4ull * MemoryConstants::gigaByte) {
isStateless = true;
}
const bool useHeapless = false;
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, useHeapless);
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, this->heaplessModeEnabled);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -29,8 +29,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillImage(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillImage3d,
this->getClDevice());
auto builtInType = EBuiltInOps::adjustImageBuiltinType<EBuiltInOps::fillImage3d>(this->heaplessModeEnabled);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, this->getClDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);
MemObjSurface dstImgSurf(image);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -100,7 +100,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
dc.bcsSplit = bcsSplit;
dc.direction = csrSelectionArgs.direction;
auto eBuiltInOps = EBuiltInOps::copyBufferToImage3d;
auto eBuiltInOps = EBuiltInOps::adjustImageBuiltinType<EBuiltInOps::copyBufferToImage3d>(this->heaplessModeEnabled);
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);