feature: use heapless builtins in OCL if supported
Related-To: NEO-7621 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
6f69fa997a
commit
59f661356c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -156,6 +156,24 @@ class BuiltInOp<EBuiltInOps::copyBufferToBufferStateless> : public BuiltInOp<EBu
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddle", kernMiddle,
|
||||
"CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned,
|
||||
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
|
@ -285,6 +303,22 @@ class BuiltInOp<EBuiltInOps::copyBufferRectStateless> : public BuiltInOp<EBuiltI
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferRect> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferRectBytes2d", kernelBytes[0],
|
||||
"CopyBufferRectBytes2d", kernelBytes[1],
|
||||
"CopyBufferRectBytes3d", kernelBytes[2]);
|
||||
}
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
|
@ -393,6 +427,21 @@ class BuiltInOp<EBuiltInOps::fillBufferStateless> : public BuiltInOp<EBuiltInOps
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::fillBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::fillBufferStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||
"FillBufferMiddle", kernMiddle,
|
||||
"FillBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfos);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
|
@ -764,18 +813,27 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
|
|||
case EBuiltInOps::copyBufferToBufferStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStateless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::copyBufferRect:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRect>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::copyBufferRectStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStateless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::copyBufferRectStatelessHeapless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::fillBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBuffer>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::fillBufferStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStateless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::fillBufferStatelessHeapless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless>>(builtins, device); });
|
||||
break;
|
||||
case EBuiltInOps::copyBufferToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3d>>(builtins, device); });
|
||||
break;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -23,15 +23,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOpsType = EBuiltInOps::copyBufferToBuffer;
|
||||
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOpsType = EBuiltInOps::copyBufferToBufferStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
|
@ -48,7 +48,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
|||
MemObjSurface s2(dstBuffer);
|
||||
Surface *surfaces[] = {&s1, &s2};
|
||||
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -28,15 +28,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
||||
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||
|
||||
MemObjSurface srcBufferSurf(srcBuffer);
|
||||
MemObjSurface dstBufferSurf(dstBuffer);
|
||||
|
@ -56,7 +56,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
|||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -24,13 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOpsType = EBuiltInOps::copyBufferToImage3d;
|
||||
|
||||
if (forceStateless(srcBuffer->getSize())) {
|
||||
eBuiltInOpsType = EBuiltInOps::copyBufferToImage3dStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(srcBuffer->getSize());
|
||||
const bool useHeapless = false;
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, useHeapless);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||
this->getClDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -24,12 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOpsType = EBuiltInOps::copyImage3dToBuffer;
|
||||
|
||||
if (forceStateless(dstBuffer->getSize())) {
|
||||
eBuiltInOpsType = EBuiltInOps::copyImage3dToBufferStateless;
|
||||
}
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
||||
const bool useStateless = forceStateless(dstBuffer->getSize());
|
||||
const bool useHeapless = false;
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(useStateless, useHeapless);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||
this->getClDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
|
|
|
@ -50,12 +50,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
|
|||
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
||||
}
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::fillBuffer;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::fillBufferStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(buffer->getSize());
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps,
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||
this->getClDevice());
|
||||
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -70,10 +70,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
|||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(buffer->getSize());
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||
|
||||
void *dstPtr = ptr;
|
||||
|
||||
|
@ -121,7 +120,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
|||
}
|
||||
}
|
||||
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -54,10 +54,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
|||
bool isCpuCopyAllowed = false;
|
||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(buffer->getSize());
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||
|
||||
void *dstPtr = ptr;
|
||||
|
||||
|
@ -103,7 +102,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
|||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
if (dispatchResult != CL_SUCCESS) {
|
||||
return dispatchResult;
|
||||
}
|
||||
|
|
|
@ -354,10 +354,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||
isStatelessRequired |= forceStateless(dstSvmData->size);
|
||||
}
|
||||
|
||||
auto builtInType = EBuiltInOps::copyBufferToBuffer;
|
||||
if (isStatelessRequired) {
|
||||
builtInType = EBuiltInOps::copyBufferToBufferStateless;
|
||||
}
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(isStatelessRequired, useHeapless);
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
BuiltinOpParams operationParams;
|
||||
|
@ -510,10 +508,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
|
|||
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
||||
}
|
||||
|
||||
auto builtInType = EBuiltInOps::fillBuffer;
|
||||
if (forceStateless(svmData->size)) {
|
||||
builtInType = EBuiltInOps::fillBufferStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(svmData->size);
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||
this->getClDevice());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
|||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(buffer->getSize());
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||
|
||||
void *srcPtr = const_cast<void *>(ptr);
|
||||
|
||||
|
@ -102,7 +101,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
|||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
if (dispatchResult != CL_SUCCESS) {
|
||||
return dispatchResult;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
|||
bool isCpuCopyAllowed = false;
|
||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
||||
}
|
||||
const bool useStateless = forceStateless(buffer->getSize());
|
||||
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||
|
||||
void *srcPtr = const_cast<void *>(ptr);
|
||||
|
||||
|
@ -108,7 +107,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
|||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
if (dispatchResult != CL_SUCCESS) {
|
||||
return dispatchResult;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -146,6 +146,13 @@ struct AuxBuiltinsMatcher {
|
|||
}
|
||||
};
|
||||
|
||||
struct HeaplessSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) {
|
||||
auto mockBuiltinsLib = std::unique_ptr<MockBuiltinsLib>(new MockBuiltinsLib());
|
||||
|
||||
|
@ -2413,3 +2420,184 @@ HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForT
|
|||
EXPECT_FALSE(std::is_copy_constructible<BuiltInOwnershipWrapper>::value);
|
||||
EXPECT_FALSE(std::is_copy_assignable<BuiltInOwnershipWrapper>::value);
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless, *pClDevice);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
|
||||
BuiltinOpParams builtinOpsParams;
|
||||
|
||||
builtinOpsParams.srcMemObj = &srcBuffer;
|
||||
builtinOpsParams.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
builtinOpsParams.dstMemObj = &dstBuffer;
|
||||
builtinOpsParams.dstOffset = {0, 0, 0};
|
||||
builtinOpsParams.size = {static_cast<size_t>(size), 0, 0};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(builtinOpsParams);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToSystemBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
|
||||
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = &srcBuffer;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {static_cast<size_t>(size), 1, 1};
|
||||
dc.srcRowPitch = static_cast<size_t>(size);
|
||||
dc.srcSlicePitch = 0;
|
||||
dc.dstRowPitch = static_cast<size_t>(size);
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToLocalBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
|
||||
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = &srcBuffer;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {static_cast<size_t>(size), 1, 1};
|
||||
dc.srcRowPitch = static_cast<size_t>(size);
|
||||
dc.srcSlicePitch = 0;
|
||||
dc.dstRowPitch = static_cast<size_t>(size);
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInTests, whenBuilderFillSystemBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
|
||||
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = &srcBuffer;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.size = {static_cast<size_t>(size), 0, 0};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInTests, whenBuilderFillLocalBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
|
||||
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = &srcBuffer;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.size = {static_cast<size_t>(size), 0, 0};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -19,6 +19,8 @@
|
|||
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct CommandDeviceFixture : public ClDeviceFixture,
|
||||
|
@ -147,4 +149,11 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
|
|||
bool expectedKernelSystemMemory = false;
|
||||
};
|
||||
|
||||
struct HeaplessSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -252,6 +252,47 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
|
|||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||
pCmdQ->getClDevice());
|
||||
ASSERT_NE(nullptr, &builder);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {50, 50, 1};
|
||||
dc.srcRowPitch = rowPitch;
|
||||
dc.srcSlicePitch = slicePitch;
|
||||
dc.dstRowPitch = rowPitch;
|
||||
dc.dstSlicePitch = slicePitch;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
builder.buildDispatchInfos(multiDispatchInfo);
|
||||
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
|
||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
|
||||
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||
|
||||
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
|
||||
enqueueCopyBufferRect2D<FamilyType>();
|
||||
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
||||
|
|
|
@ -263,6 +263,45 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
|
|||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
auto dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||
pCmdQ->getClDevice());
|
||||
|
||||
ASSERT_NE(nullptr, &builder);
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer.get();
|
||||
dc.dstMemObj = dstBuffer.get();
|
||||
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
|
||||
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
|
||||
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
builder.buildDispatchInfos(multiDispatchInfo);
|
||||
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
|
||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
|
||||
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||
|
||||
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
|
||||
enqueueCopyBufferAndParse<FamilyType>();
|
||||
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
||||
|
|
|
@ -344,6 +344,49 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
|
|||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
|
||||
|
||||
// Extract the kernel used
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless,
|
||||
pCmdQ->getClDevice());
|
||||
ASSERT_NE(nullptr, &builder);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
|
||||
patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
|
||||
dc.srcMemObj = &patternMemObj;
|
||||
dc.dstMemObj = buffer;
|
||||
dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
|
||||
dc.size = {EnqueueFillBufferTraits::size, 0, 0};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
builder.buildDispatchInfos(multiDispatchInfo);
|
||||
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
|
||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
|
||||
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||
|
||||
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) {
|
||||
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -17,25 +17,28 @@ using Type = uint32_t;
|
|||
inline constexpr Type auxTranslation{0};
|
||||
inline constexpr Type copyBufferToBuffer{1};
|
||||
inline constexpr Type copyBufferToBufferStateless{2};
|
||||
inline constexpr Type copyBufferRect{3};
|
||||
inline constexpr Type copyBufferRectStateless{4};
|
||||
inline constexpr Type fillBuffer{5};
|
||||
inline constexpr Type fillBufferStateless{6};
|
||||
inline constexpr Type copyBufferToImage3d{7};
|
||||
inline constexpr Type copyBufferToImage3dStateless{8};
|
||||
inline constexpr Type copyImage3dToBuffer{9};
|
||||
inline constexpr Type copyImage3dToBufferStateless{10};
|
||||
inline constexpr Type copyImageToImage1d{11};
|
||||
inline constexpr Type copyImageToImage2d{12};
|
||||
inline constexpr Type copyImageToImage3d{13};
|
||||
inline constexpr Type fillImage1d{14};
|
||||
inline constexpr Type fillImage2d{15};
|
||||
inline constexpr Type fillImage3d{16};
|
||||
inline constexpr Type queryKernelTimestamps{17};
|
||||
inline constexpr Type copyBufferToBufferStatelessHeapless{3};
|
||||
inline constexpr Type copyBufferRect{4};
|
||||
inline constexpr Type copyBufferRectStateless{5};
|
||||
inline constexpr Type copyBufferRectStatelessHeapless{6};
|
||||
inline constexpr Type fillBuffer{7};
|
||||
inline constexpr Type fillBufferStateless{8};
|
||||
inline constexpr Type fillBufferStatelessHeapless{9};
|
||||
inline constexpr Type copyBufferToImage3d{10};
|
||||
inline constexpr Type copyBufferToImage3dStateless{11};
|
||||
inline constexpr Type copyImage3dToBuffer{12};
|
||||
inline constexpr Type copyImage3dToBufferStateless{13};
|
||||
inline constexpr Type copyImageToImage1d{14};
|
||||
inline constexpr Type copyImageToImage2d{15};
|
||||
inline constexpr Type copyImageToImage3d{16};
|
||||
inline constexpr Type fillImage1d{17};
|
||||
inline constexpr Type fillImage2d{18};
|
||||
inline constexpr Type fillImage3d{19};
|
||||
inline constexpr Type queryKernelTimestamps{20};
|
||||
|
||||
constexpr bool isStateless(Type type) {
|
||||
constexpr std::array<Type, 5> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless}};
|
||||
for (auto &builtinType : statelessBuiltins) {
|
||||
constexpr std::array<Type, 8> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless, copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless}};
|
||||
for (auto builtinType : statelessBuiltins) {
|
||||
if (type == builtinType) {
|
||||
return true;
|
||||
}
|
||||
|
@ -43,7 +46,68 @@ constexpr bool isStateless(Type type) {
|
|||
return false;
|
||||
}
|
||||
|
||||
inline constexpr Type maxBaseValue{17};
|
||||
constexpr bool isHeapless(Type type) {
|
||||
constexpr Type statelessBuiltins[] = {copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless};
|
||||
for (auto builtinType : statelessBuiltins) {
|
||||
if (type == builtinType) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <Type builtinType>
|
||||
constexpr uint32_t adjustBuiltinType(const bool useStateless, const bool useHeapless) {
|
||||
return builtinType;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr uint32_t adjustBuiltinType<copyBufferToBuffer>(const bool useStateless, const bool useHeapless) {
|
||||
if (useHeapless) {
|
||||
return copyBufferToBufferStatelessHeapless;
|
||||
} else if (useStateless) {
|
||||
return copyBufferToBufferStateless;
|
||||
}
|
||||
return copyBufferToBuffer;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr uint32_t adjustBuiltinType<copyBufferRect>(const bool useStateless, const bool useHeapless) {
|
||||
if (useHeapless) {
|
||||
return copyBufferRectStatelessHeapless;
|
||||
} else if (useStateless) {
|
||||
return copyBufferRectStateless;
|
||||
}
|
||||
return copyBufferRect;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr uint32_t adjustBuiltinType<fillBuffer>(const bool useStateless, const bool useHeapless) {
|
||||
if (useHeapless) {
|
||||
return fillBufferStatelessHeapless;
|
||||
} else if (useStateless) {
|
||||
return fillBufferStateless;
|
||||
}
|
||||
return fillBuffer;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr uint32_t adjustBuiltinType<copyBufferToImage3d>(const bool useStateless, const bool useHeapless) {
|
||||
if (useStateless) {
|
||||
return copyBufferToImage3dStateless;
|
||||
}
|
||||
return copyBufferToImage3d;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr uint32_t adjustBuiltinType<copyImage3dToBuffer>(const bool useStateless, const bool useHeapless) {
|
||||
if (useStateless) {
|
||||
return copyImage3dToBufferStateless;
|
||||
}
|
||||
return copyImage3dToBuffer;
|
||||
}
|
||||
|
||||
inline constexpr Type maxBaseValue{20};
|
||||
inline constexpr Type count{64};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -35,14 +35,17 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
|||
case EBuiltInOps::copyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToBufferStateless:
|
||||
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
|
||||
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferRect:
|
||||
return "copy_buffer_rect.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferRectStateless:
|
||||
case EBuiltInOps::copyBufferRectStatelessHeapless:
|
||||
return "copy_buffer_rect_stateless.builtin_kernel";
|
||||
case EBuiltInOps::fillBuffer:
|
||||
return "fill_buffer.builtin_kernel";
|
||||
case EBuiltInOps::fillBufferStateless:
|
||||
case EBuiltInOps::fillBufferStatelessHeapless:
|
||||
return "fill_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.builtin_kernel";
|
||||
|
@ -92,14 +95,15 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
|
|||
return deviceId.str();
|
||||
};
|
||||
const auto deviceIp = createDeviceIdFilenameComponent(hwInfo.ipVersion);
|
||||
const auto builtinName = getBuiltinAsString(builtin);
|
||||
const auto builtinFilename = getBuiltinAsString(builtin);
|
||||
const auto extension = BuiltinCode::getExtension(type);
|
||||
auto getAddressingMode = [type, &productHelper, releaseHelper, builtin]() {
|
||||
auto getAddressingModePrefix = [type, &productHelper, releaseHelper, builtin]() {
|
||||
if (type == BuiltinCode::ECodeType::binary) {
|
||||
const bool requiresStatelessAddressing = (false == productHelper.isStatefulAddressingModeSupported());
|
||||
const bool builtInUsesStatelessAddressing = EBuiltInOps::isStateless(builtin);
|
||||
const bool heaplessEnabled = EBuiltInOps::isHeapless(builtin);
|
||||
if (builtInUsesStatelessAddressing || requiresStatelessAddressing) {
|
||||
return "stateless_";
|
||||
return heaplessEnabled ? "stateless_heapless_" : "stateless_";
|
||||
} else if (ApiSpecificConfig::getBindlessMode(releaseHelper)) {
|
||||
return "bindless_";
|
||||
} else {
|
||||
|
@ -108,21 +112,21 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
|
|||
}
|
||||
return "";
|
||||
};
|
||||
const auto addressingMode = getAddressingMode();
|
||||
const auto addressingModePrefix = getAddressingModePrefix();
|
||||
|
||||
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingMode, ConstStringRef builtinName, ConstStringRef extension) {
|
||||
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingModePrefix, ConstStringRef builtinFilename, ConstStringRef extension) {
|
||||
std::ostringstream outResourceName;
|
||||
if (false == deviceIpPath.empty()) {
|
||||
outResourceName << deviceIpPath.str() << "_";
|
||||
}
|
||||
outResourceName << addressingMode.str() << builtinName.str() << extension.str();
|
||||
outResourceName << addressingModePrefix.str() << builtinFilename.str() << extension.str();
|
||||
return outResourceName.str();
|
||||
};
|
||||
StackVec<std::string, 3> resourcesToLookup = {};
|
||||
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingMode, builtinName, extension));
|
||||
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingModePrefix, builtinFilename, extension));
|
||||
|
||||
if (BuiltinCode::ECodeType::binary != type) {
|
||||
resourcesToLookup.push_back(createBuiltinResourceName("", addressingMode, builtinName, extension));
|
||||
resourcesToLookup.push_back(createBuiltinResourceName("", addressingModePrefix, builtinFilename, extension));
|
||||
}
|
||||
return resourcesToLookup;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -30,6 +30,15 @@ static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
|||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessHeaplessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::copyBufferRect,
|
||||
|
@ -48,6 +57,15 @@ static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
|
|||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectStatelessHeaplessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::fillBuffer,
|
||||
|
@ -66,6 +84,15 @@ static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
|||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferStatelessHeaplessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::fillBufferStatelessHeapless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::copyBufferToImage3d,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -21,4 +21,5 @@ struct TestTraits<IGFX_GEN11_CORE> {
|
|||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = true;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
|
|||
static constexpr bool forceGpuNonCoherent = true;
|
||||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN8_CORE> {
|
|||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = false;
|
||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN9_CORE> {
|
|||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -30,4 +30,5 @@ struct TestTraits<IGFX_XE_HPC_CORE> {
|
|||
static constexpr bool imagesSupported = false;
|
||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = true;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -30,6 +30,7 @@ struct TestTraits<IGFX_XE_HPG_CORE> {
|
|||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = false;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
static constexpr bool heaplessAllowed = false;
|
||||
};
|
||||
#ifdef TESTS_MTL
|
||||
#include "shared/test/common/xe_hpg_core/mtl/test_traits_mtl.h"
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
|
@ -134,3 +135,40 @@ HWTEST_F(BuiltInSharedTest, GivenValidBuiltinTypeAndAnyTypeWhenGettingBuiltinCod
|
|||
EXPECT_EQ(BuiltinCode::ECodeType::binary, builtinCode.type);
|
||||
EXPECT_NE(0U, builtinCode.resource.size());
|
||||
}
|
||||
|
||||
HWTEST2_F(BuiltInSharedTest, GivenHeaplessModeEnabledWhenGetBuiltinResourceNamesIsCalledThenResourceNameIsCorrect, MatchAny) {
|
||||
|
||||
class MockCompilerProductHelper : public CompilerProductHelperHw<productFamily> {
|
||||
public:
|
||||
bool isHeaplessModeEnabled() const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
pDevice->executionEnvironment->rootDeviceEnvironments[0]->compilerProductHelper.reset(new MockCompilerProductHelper());
|
||||
|
||||
auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
|
||||
std::string deviceIpString = std::to_string(hwInfo.ipVersion.architecture) + "_" + std::to_string(hwInfo.ipVersion.release) + "_" + std::to_string(hwInfo.ipVersion.revision);
|
||||
|
||||
struct TestParam {
|
||||
std::string builtInTypeAsString;
|
||||
EBuiltInOps::Type builtinType;
|
||||
};
|
||||
|
||||
TestParam params[] = {
|
||||
{"copy_buffer_to_buffer_stateless", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||
{"copy_buffer_rect_stateless", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||
{"fill_buffer_stateless", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||
|
||||
for (auto &[builtInTypeAsString, builtInType] : params) {
|
||||
|
||||
auto resourceNames = getBuiltinResourceNames(builtInType, BuiltinCode::ECodeType::binary, *pDevice);
|
||||
|
||||
std::string expectedResourceNameGeneric = "stateless_heapless_" + builtInTypeAsString + ".builtin_kernel.bin";
|
||||
std::string expectedResourceNameForRelease = deviceIpString + "_" + expectedResourceNameGeneric;
|
||||
|
||||
EXPECT_EQ(1u, resourceNames.size());
|
||||
EXPECT_EQ(resourceNames[0], expectedResourceNameForRelease);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue