feature: use heapless builtins in OCL if supported
Related-To: NEO-7621 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
6f69fa997a
commit
59f661356c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -156,6 +156,24 @@ class BuiltInOp<EBuiltInOps::copyBufferToBufferStateless> : public BuiltInOp<EBu
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToBuffer> {
|
||||||
|
public:
|
||||||
|
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||||
|
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, false) {
|
||||||
|
populate(EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||||
|
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||||
|
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||||
|
"CopyBufferToBufferMiddle", kernMiddle,
|
||||||
|
"CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned,
|
||||||
|
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||||
|
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder {
|
class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder {
|
||||||
public:
|
public:
|
||||||
|
@ -285,6 +303,22 @@ class BuiltInOp<EBuiltInOps::copyBufferRectStateless> : public BuiltInOp<EBuiltI
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferRect> {
|
||||||
|
public:
|
||||||
|
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||||
|
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, false) {
|
||||||
|
populate(EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||||
|
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||||
|
"CopyBufferRectBytes2d", kernelBytes[0],
|
||||||
|
"CopyBufferRectBytes2d", kernelBytes[1],
|
||||||
|
"CopyBufferRectBytes3d", kernelBytes[2]);
|
||||||
|
}
|
||||||
|
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||||
|
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
|
class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||||
public:
|
public:
|
||||||
|
@ -393,6 +427,21 @@ class BuiltInOp<EBuiltInOps::fillBufferStateless> : public BuiltInOp<EBuiltInOps
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::fillBuffer> {
|
||||||
|
public:
|
||||||
|
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, false) {
|
||||||
|
populate(EBuiltInOps::fillBufferStatelessHeapless,
|
||||||
|
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||||
|
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||||
|
"FillBufferMiddle", kernMiddle,
|
||||||
|
"FillBufferRightLeftover", kernRightLeftover);
|
||||||
|
}
|
||||||
|
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override {
|
||||||
|
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfos);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||||
public:
|
public:
|
||||||
|
@ -764,18 +813,27 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
|
||||||
case EBuiltInOps::copyBufferToBufferStateless:
|
case EBuiltInOps::copyBufferToBufferStateless:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStateless>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStateless>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
|
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
|
||||||
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless>>(builtins, device); });
|
||||||
|
break;
|
||||||
case EBuiltInOps::copyBufferRect:
|
case EBuiltInOps::copyBufferRect:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRect>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRect>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
case EBuiltInOps::copyBufferRectStateless:
|
case EBuiltInOps::copyBufferRectStateless:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStateless>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStateless>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
|
case EBuiltInOps::copyBufferRectStatelessHeapless:
|
||||||
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless>>(builtins, device); });
|
||||||
|
break;
|
||||||
case EBuiltInOps::fillBuffer:
|
case EBuiltInOps::fillBuffer:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBuffer>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBuffer>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
case EBuiltInOps::fillBufferStateless:
|
case EBuiltInOps::fillBufferStateless:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStateless>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStateless>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
|
case EBuiltInOps::fillBufferStatelessHeapless:
|
||||||
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless>>(builtins, device); });
|
||||||
|
break;
|
||||||
case EBuiltInOps::copyBufferToImage3d:
|
case EBuiltInOps::copyBufferToImage3d:
|
||||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3d>>(builtins, device); });
|
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3d>>(builtins, device); });
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -23,15 +23,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
cl_event *event) {
|
cl_event *event) {
|
||||||
auto eBuiltInOpsType = EBuiltInOps::copyBufferToBuffer;
|
|
||||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
|
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
|
||||||
|
|
||||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
|
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
|
||||||
eBuiltInOpsType = EBuiltInOps::copyBufferToBufferStateless;
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
}
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||||
|
|
||||||
BuiltinOpParams dc;
|
BuiltinOpParams dc;
|
||||||
dc.srcMemObj = srcBuffer;
|
dc.srcMemObj = srcBuffer;
|
||||||
|
@ -48,7 +48,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||||
MemObjSurface s2(dstBuffer);
|
MemObjSurface s2(dstBuffer);
|
||||||
Surface *surfaces[] = {&s1, &s2};
|
Surface *surfaces[] = {&s1, &s2};
|
||||||
|
|
||||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
|
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -28,15 +28,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
cl_event *event) {
|
cl_event *event) {
|
||||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
|
||||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
|
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
|
||||||
|
|
||||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
|
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
|
||||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
}
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||||
|
|
||||||
MemObjSurface srcBufferSurf(srcBuffer);
|
MemObjSurface srcBufferSurf(srcBuffer);
|
||||||
MemObjSurface dstBufferSurf(dstBuffer);
|
MemObjSurface dstBufferSurf(dstBuffer);
|
||||||
|
@ -56,7 +56,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||||
dc.direction = csrSelectionArgs.direction;
|
dc.direction = csrSelectionArgs.direction;
|
||||||
|
|
||||||
MultiDispatchInfo dispatchInfo(dc);
|
MultiDispatchInfo dispatchInfo(dc);
|
||||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -24,13 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
|
||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
cl_event *event) {
|
cl_event *event) {
|
||||||
auto eBuiltInOpsType = EBuiltInOps::copyBufferToImage3d;
|
|
||||||
|
|
||||||
if (forceStateless(srcBuffer->getSize())) {
|
const bool useStateless = forceStateless(srcBuffer->getSize());
|
||||||
eBuiltInOpsType = EBuiltInOps::copyBufferToImage3dStateless;
|
const bool useHeapless = false;
|
||||||
}
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, useHeapless);
|
||||||
|
|
||||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||||
this->getClDevice());
|
this->getClDevice());
|
||||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -24,12 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
|
||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
cl_event *event) {
|
cl_event *event) {
|
||||||
auto eBuiltInOpsType = EBuiltInOps::copyImage3dToBuffer;
|
|
||||||
|
|
||||||
if (forceStateless(dstBuffer->getSize())) {
|
const bool useStateless = forceStateless(dstBuffer->getSize());
|
||||||
eBuiltInOpsType = EBuiltInOps::copyImage3dToBufferStateless;
|
const bool useHeapless = false;
|
||||||
}
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(useStateless, useHeapless);
|
||||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
|
||||||
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||||
this->getClDevice());
|
this->getClDevice());
|
||||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||||
|
|
||||||
|
|
|
@ -50,12 +50,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
|
||||||
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto eBuiltInOps = EBuiltInOps::fillBuffer;
|
const bool useStateless = forceStateless(buffer->getSize());
|
||||||
if (forceStateless(buffer->getSize())) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
eBuiltInOps = EBuiltInOps::fillBufferStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps,
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||||
this->getClDevice());
|
this->getClDevice());
|
||||||
|
|
||||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -70,10 +70,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||||
numEventsInWaitList, eventWaitList, event);
|
numEventsInWaitList, eventWaitList, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
|
const bool useStateless = forceStateless(buffer->getSize());
|
||||||
if (forceStateless(buffer->getSize())) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
void *dstPtr = ptr;
|
void *dstPtr = ptr;
|
||||||
|
|
||||||
|
@ -121,7 +120,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -54,10 +54,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||||
bool isCpuCopyAllowed = false;
|
bool isCpuCopyAllowed = false;
|
||||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||||
|
|
||||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
const bool useStateless = forceStateless(buffer->getSize());
|
||||||
if (forceStateless(buffer->getSize())) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
void *dstPtr = ptr;
|
void *dstPtr = ptr;
|
||||||
|
|
||||||
|
@ -103,7 +102,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||||
dc.direction = csrSelectionArgs.direction;
|
dc.direction = csrSelectionArgs.direction;
|
||||||
|
|
||||||
MultiDispatchInfo dispatchInfo(dc);
|
MultiDispatchInfo dispatchInfo(dc);
|
||||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||||
if (dispatchResult != CL_SUCCESS) {
|
if (dispatchResult != CL_SUCCESS) {
|
||||||
return dispatchResult;
|
return dispatchResult;
|
||||||
}
|
}
|
||||||
|
|
|
@ -354,10 +354,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||||
isStatelessRequired |= forceStateless(dstSvmData->size);
|
isStatelessRequired |= forceStateless(dstSvmData->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto builtInType = EBuiltInOps::copyBufferToBuffer;
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
if (isStatelessRequired) {
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(isStatelessRequired, useHeapless);
|
||||||
builtInType = EBuiltInOps::copyBufferToBufferStateless;
|
|
||||||
}
|
|
||||||
|
|
||||||
MultiDispatchInfo dispatchInfo;
|
MultiDispatchInfo dispatchInfo;
|
||||||
BuiltinOpParams operationParams;
|
BuiltinOpParams operationParams;
|
||||||
|
@ -510,10 +508,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
|
||||||
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto builtInType = EBuiltInOps::fillBuffer;
|
const bool useStateless = forceStateless(svmData->size);
|
||||||
if (forceStateless(svmData->size)) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
builtInType = EBuiltInOps::fillBufferStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
|
||||||
this->getClDevice());
|
this->getClDevice());
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||||
numEventsInWaitList, eventWaitList, event);
|
numEventsInWaitList, eventWaitList, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
|
const bool useStateless = forceStateless(buffer->getSize());
|
||||||
if (forceStateless(buffer->getSize())) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
void *srcPtr = const_cast<void *>(ptr);
|
void *srcPtr = const_cast<void *>(ptr);
|
||||||
|
|
||||||
|
@ -102,7 +101,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||||
dc.direction = csrSelectionArgs.direction;
|
dc.direction = csrSelectionArgs.direction;
|
||||||
|
|
||||||
MultiDispatchInfo dispatchInfo(dc);
|
MultiDispatchInfo dispatchInfo(dc);
|
||||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||||
if (dispatchResult != CL_SUCCESS) {
|
if (dispatchResult != CL_SUCCESS) {
|
||||||
return dispatchResult;
|
return dispatchResult;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||||
bool isCpuCopyAllowed = false;
|
bool isCpuCopyAllowed = false;
|
||||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||||
|
|
||||||
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
|
const bool useStateless = forceStateless(buffer->getSize());
|
||||||
if (forceStateless(buffer->getSize())) {
|
const bool useHeapless = this->getHeaplessModeEnabled();
|
||||||
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
|
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
|
||||||
}
|
|
||||||
|
|
||||||
void *srcPtr = const_cast<void *>(ptr);
|
void *srcPtr = const_cast<void *>(ptr);
|
||||||
|
|
||||||
|
@ -108,7 +107,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||||
dc.direction = csrSelectionArgs.direction;
|
dc.direction = csrSelectionArgs.direction;
|
||||||
|
|
||||||
MultiDispatchInfo dispatchInfo(dc);
|
MultiDispatchInfo dispatchInfo(dc);
|
||||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||||
if (dispatchResult != CL_SUCCESS) {
|
if (dispatchResult != CL_SUCCESS) {
|
||||||
return dispatchResult;
|
return dispatchResult;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -146,6 +146,13 @@ struct AuxBuiltinsMatcher {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct HeaplessSupportedMatcher {
|
||||||
|
template <PRODUCT_FAMILY productFamily>
|
||||||
|
static constexpr bool isMatched() {
|
||||||
|
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) {
|
HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) {
|
||||||
auto mockBuiltinsLib = std::unique_ptr<MockBuiltinsLib>(new MockBuiltinsLib());
|
auto mockBuiltinsLib = std::unique_ptr<MockBuiltinsLib>(new MockBuiltinsLib());
|
||||||
|
|
||||||
|
@ -2413,3 +2420,184 @@ HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForT
|
||||||
EXPECT_FALSE(std::is_copy_constructible<BuiltInOwnershipWrapper>::value);
|
EXPECT_FALSE(std::is_copy_constructible<BuiltInOwnershipWrapper>::value);
|
||||||
EXPECT_FALSE(std::is_copy_assignable<BuiltInOwnershipWrapper>::value);
|
EXPECT_FALSE(std::is_copy_assignable<BuiltInOwnershipWrapper>::value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||||
|
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless, *pClDevice);
|
||||||
|
|
||||||
|
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||||
|
|
||||||
|
MockBuffer srcBuffer;
|
||||||
|
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
MockBuffer dstBuffer;
|
||||||
|
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
|
||||||
|
BuiltinOpParams builtinOpsParams;
|
||||||
|
|
||||||
|
builtinOpsParams.srcMemObj = &srcBuffer;
|
||||||
|
builtinOpsParams.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||||
|
builtinOpsParams.dstMemObj = &dstBuffer;
|
||||||
|
builtinOpsParams.dstOffset = {0, 0, 0};
|
||||||
|
builtinOpsParams.size = {static_cast<size_t>(size), 0, 0};
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(builtinOpsParams);
|
||||||
|
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||||
|
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||||
|
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToSystemBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
|
||||||
|
|
||||||
|
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||||
|
|
||||||
|
MockBuffer srcBuffer;
|
||||||
|
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
MockBuffer dstBuffer;
|
||||||
|
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
|
||||||
|
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||||
|
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = &srcBuffer;
|
||||||
|
dc.dstMemObj = &dstBuffer;
|
||||||
|
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||||
|
dc.dstOffset = {0, 0, 0};
|
||||||
|
dc.size = {static_cast<size_t>(size), 1, 1};
|
||||||
|
dc.srcRowPitch = static_cast<size_t>(size);
|
||||||
|
dc.srcSlicePitch = 0;
|
||||||
|
dc.dstRowPitch = static_cast<size_t>(size);
|
||||||
|
dc.dstSlicePitch = 0;
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||||
|
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||||
|
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||||
|
|
||||||
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
|
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToLocalBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
|
||||||
|
|
||||||
|
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||||
|
|
||||||
|
MockBuffer srcBuffer;
|
||||||
|
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
MockBuffer dstBuffer;
|
||||||
|
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
|
||||||
|
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||||
|
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = &srcBuffer;
|
||||||
|
dc.dstMemObj = &dstBuffer;
|
||||||
|
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||||
|
dc.dstOffset = {0, 0, 0};
|
||||||
|
dc.size = {static_cast<size_t>(size), 1, 1};
|
||||||
|
dc.srcRowPitch = static_cast<size_t>(size);
|
||||||
|
dc.srcSlicePitch = 0;
|
||||||
|
dc.dstRowPitch = static_cast<size_t>(size);
|
||||||
|
dc.dstSlicePitch = 0;
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||||
|
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||||
|
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||||
|
|
||||||
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
|
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInTests, whenBuilderFillSystemBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
|
||||||
|
|
||||||
|
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||||
|
|
||||||
|
MockBuffer srcBuffer;
|
||||||
|
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
MockBuffer dstBuffer;
|
||||||
|
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
|
||||||
|
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||||
|
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = &srcBuffer;
|
||||||
|
dc.dstMemObj = &dstBuffer;
|
||||||
|
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||||
|
dc.size = {static_cast<size_t>(size), 0, 0};
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||||
|
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||||
|
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||||
|
|
||||||
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
|
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInTests, whenBuilderFillLocalBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
|
||||||
|
|
||||||
|
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||||
|
uint64_t size = 4ull * MemoryConstants::gigaByte;
|
||||||
|
|
||||||
|
MockBuffer srcBuffer;
|
||||||
|
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
MockBuffer dstBuffer;
|
||||||
|
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||||
|
|
||||||
|
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
|
||||||
|
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = &srcBuffer;
|
||||||
|
dc.dstMemObj = &dstBuffer;
|
||||||
|
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||||
|
dc.size = {static_cast<size_t>(size), 0, 0};
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||||
|
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||||
|
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||||
|
|
||||||
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
|
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -19,6 +19,8 @@
|
||||||
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||||
|
|
||||||
|
#include "test_traits_common.h"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
struct CommandDeviceFixture : public ClDeviceFixture,
|
struct CommandDeviceFixture : public ClDeviceFixture,
|
||||||
|
@ -147,4 +149,11 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
|
||||||
bool expectedKernelSystemMemory = false;
|
bool expectedKernelSystemMemory = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct HeaplessSupportedMatcher {
|
||||||
|
template <PRODUCT_FAMILY productFamily>
|
||||||
|
static constexpr bool isMatched() {
|
||||||
|
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -252,6 +252,47 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
|
||||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||||
|
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||||
|
pCmdQ->getClDevice());
|
||||||
|
ASSERT_NE(nullptr, &builder);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = srcBuffer;
|
||||||
|
dc.dstMemObj = dstBuffer;
|
||||||
|
dc.srcOffset = {0, 0, 0};
|
||||||
|
dc.dstOffset = {0, 0, 0};
|
||||||
|
dc.size = {50, 50, 1};
|
||||||
|
dc.srcRowPitch = rowPitch;
|
||||||
|
dc.srcSlicePitch = slicePitch;
|
||||||
|
dc.dstRowPitch = rowPitch;
|
||||||
|
dc.dstSlicePitch = slicePitch;
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
builder.buildDispatchInfos(multiDispatchInfo);
|
||||||
|
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||||
|
|
||||||
|
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||||
|
ASSERT_NE(nullptr, kernel);
|
||||||
|
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||||
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||||
|
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||||
|
|
||||||
|
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||||
|
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||||
|
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
|
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
|
||||||
enqueueCopyBufferRect2D<FamilyType>();
|
enqueueCopyBufferRect2D<FamilyType>();
|
||||||
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
||||||
|
|
|
@ -263,6 +263,45 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
|
||||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||||
|
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||||
|
auto dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||||
|
|
||||||
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||||
|
pCmdQ->getClDevice());
|
||||||
|
|
||||||
|
ASSERT_NE(nullptr, &builder);
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
dc.srcMemObj = srcBuffer.get();
|
||||||
|
dc.dstMemObj = dstBuffer.get();
|
||||||
|
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
|
||||||
|
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
|
||||||
|
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
builder.buildDispatchInfos(multiDispatchInfo);
|
||||||
|
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||||
|
|
||||||
|
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||||
|
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||||
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||||
|
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||||
|
|
||||||
|
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||||
|
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||||
|
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
|
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
|
||||||
enqueueCopyBufferAndParse<FamilyType>();
|
enqueueCopyBufferAndParse<FamilyType>();
|
||||||
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
||||||
|
|
|
@ -344,6 +344,49 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
|
||||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
|
||||||
|
|
||||||
|
// Extract the kernel used
|
||||||
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless,
|
||||||
|
pCmdQ->getClDevice());
|
||||||
|
ASSERT_NE(nullptr, &builder);
|
||||||
|
|
||||||
|
BuiltinOpParams dc;
|
||||||
|
MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
|
||||||
|
patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
|
||||||
|
dc.srcMemObj = &patternMemObj;
|
||||||
|
dc.dstMemObj = buffer;
|
||||||
|
dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
|
||||||
|
dc.size = {EnqueueFillBufferTraits::size, 0, 0};
|
||||||
|
|
||||||
|
MultiDispatchInfo multiDispatchInfo(dc);
|
||||||
|
builder.buildDispatchInfos(multiDispatchInfo);
|
||||||
|
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||||
|
|
||||||
|
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||||
|
ASSERT_NE(nullptr, kernel);
|
||||||
|
|
||||||
|
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
||||||
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||||
|
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||||
|
|
||||||
|
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||||
|
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
|
||||||
|
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.offset);
|
||||||
|
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
|
||||||
|
|
||||||
|
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) {
|
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) {
|
||||||
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
|
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2019-2023 Intel Corporation
|
* Copyright (C) 2019-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -17,25 +17,28 @@ using Type = uint32_t;
|
||||||
inline constexpr Type auxTranslation{0};
|
inline constexpr Type auxTranslation{0};
|
||||||
inline constexpr Type copyBufferToBuffer{1};
|
inline constexpr Type copyBufferToBuffer{1};
|
||||||
inline constexpr Type copyBufferToBufferStateless{2};
|
inline constexpr Type copyBufferToBufferStateless{2};
|
||||||
inline constexpr Type copyBufferRect{3};
|
inline constexpr Type copyBufferToBufferStatelessHeapless{3};
|
||||||
inline constexpr Type copyBufferRectStateless{4};
|
inline constexpr Type copyBufferRect{4};
|
||||||
inline constexpr Type fillBuffer{5};
|
inline constexpr Type copyBufferRectStateless{5};
|
||||||
inline constexpr Type fillBufferStateless{6};
|
inline constexpr Type copyBufferRectStatelessHeapless{6};
|
||||||
inline constexpr Type copyBufferToImage3d{7};
|
inline constexpr Type fillBuffer{7};
|
||||||
inline constexpr Type copyBufferToImage3dStateless{8};
|
inline constexpr Type fillBufferStateless{8};
|
||||||
inline constexpr Type copyImage3dToBuffer{9};
|
inline constexpr Type fillBufferStatelessHeapless{9};
|
||||||
inline constexpr Type copyImage3dToBufferStateless{10};
|
inline constexpr Type copyBufferToImage3d{10};
|
||||||
inline constexpr Type copyImageToImage1d{11};
|
inline constexpr Type copyBufferToImage3dStateless{11};
|
||||||
inline constexpr Type copyImageToImage2d{12};
|
inline constexpr Type copyImage3dToBuffer{12};
|
||||||
inline constexpr Type copyImageToImage3d{13};
|
inline constexpr Type copyImage3dToBufferStateless{13};
|
||||||
inline constexpr Type fillImage1d{14};
|
inline constexpr Type copyImageToImage1d{14};
|
||||||
inline constexpr Type fillImage2d{15};
|
inline constexpr Type copyImageToImage2d{15};
|
||||||
inline constexpr Type fillImage3d{16};
|
inline constexpr Type copyImageToImage3d{16};
|
||||||
inline constexpr Type queryKernelTimestamps{17};
|
inline constexpr Type fillImage1d{17};
|
||||||
|
inline constexpr Type fillImage2d{18};
|
||||||
|
inline constexpr Type fillImage3d{19};
|
||||||
|
inline constexpr Type queryKernelTimestamps{20};
|
||||||
|
|
||||||
constexpr bool isStateless(Type type) {
|
constexpr bool isStateless(Type type) {
|
||||||
constexpr std::array<Type, 5> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless}};
|
constexpr std::array<Type, 8> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless, copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless}};
|
||||||
for (auto &builtinType : statelessBuiltins) {
|
for (auto builtinType : statelessBuiltins) {
|
||||||
if (type == builtinType) {
|
if (type == builtinType) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -43,7 +46,68 @@ constexpr bool isStateless(Type type) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline constexpr Type maxBaseValue{17};
|
constexpr bool isHeapless(Type type) {
|
||||||
|
constexpr Type statelessBuiltins[] = {copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless};
|
||||||
|
for (auto builtinType : statelessBuiltins) {
|
||||||
|
if (type == builtinType) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <Type builtinType>
|
||||||
|
constexpr uint32_t adjustBuiltinType(const bool useStateless, const bool useHeapless) {
|
||||||
|
return builtinType;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr uint32_t adjustBuiltinType<copyBufferToBuffer>(const bool useStateless, const bool useHeapless) {
|
||||||
|
if (useHeapless) {
|
||||||
|
return copyBufferToBufferStatelessHeapless;
|
||||||
|
} else if (useStateless) {
|
||||||
|
return copyBufferToBufferStateless;
|
||||||
|
}
|
||||||
|
return copyBufferToBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr uint32_t adjustBuiltinType<copyBufferRect>(const bool useStateless, const bool useHeapless) {
|
||||||
|
if (useHeapless) {
|
||||||
|
return copyBufferRectStatelessHeapless;
|
||||||
|
} else if (useStateless) {
|
||||||
|
return copyBufferRectStateless;
|
||||||
|
}
|
||||||
|
return copyBufferRect;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr uint32_t adjustBuiltinType<fillBuffer>(const bool useStateless, const bool useHeapless) {
|
||||||
|
if (useHeapless) {
|
||||||
|
return fillBufferStatelessHeapless;
|
||||||
|
} else if (useStateless) {
|
||||||
|
return fillBufferStateless;
|
||||||
|
}
|
||||||
|
return fillBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr uint32_t adjustBuiltinType<copyBufferToImage3d>(const bool useStateless, const bool useHeapless) {
|
||||||
|
if (useStateless) {
|
||||||
|
return copyBufferToImage3dStateless;
|
||||||
|
}
|
||||||
|
return copyBufferToImage3d;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr uint32_t adjustBuiltinType<copyImage3dToBuffer>(const bool useStateless, const bool useHeapless) {
|
||||||
|
if (useStateless) {
|
||||||
|
return copyImage3dToBufferStateless;
|
||||||
|
}
|
||||||
|
return copyImage3dToBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline constexpr Type maxBaseValue{20};
|
||||||
inline constexpr Type count{64};
|
inline constexpr Type count{64};
|
||||||
} // namespace EBuiltInOps
|
} // namespace EBuiltInOps
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -35,14 +35,17 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||||
case EBuiltInOps::copyBufferToBuffer:
|
case EBuiltInOps::copyBufferToBuffer:
|
||||||
return "copy_buffer_to_buffer.builtin_kernel";
|
return "copy_buffer_to_buffer.builtin_kernel";
|
||||||
case EBuiltInOps::copyBufferToBufferStateless:
|
case EBuiltInOps::copyBufferToBufferStateless:
|
||||||
|
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
|
||||||
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
||||||
case EBuiltInOps::copyBufferRect:
|
case EBuiltInOps::copyBufferRect:
|
||||||
return "copy_buffer_rect.builtin_kernel";
|
return "copy_buffer_rect.builtin_kernel";
|
||||||
case EBuiltInOps::copyBufferRectStateless:
|
case EBuiltInOps::copyBufferRectStateless:
|
||||||
|
case EBuiltInOps::copyBufferRectStatelessHeapless:
|
||||||
return "copy_buffer_rect_stateless.builtin_kernel";
|
return "copy_buffer_rect_stateless.builtin_kernel";
|
||||||
case EBuiltInOps::fillBuffer:
|
case EBuiltInOps::fillBuffer:
|
||||||
return "fill_buffer.builtin_kernel";
|
return "fill_buffer.builtin_kernel";
|
||||||
case EBuiltInOps::fillBufferStateless:
|
case EBuiltInOps::fillBufferStateless:
|
||||||
|
case EBuiltInOps::fillBufferStatelessHeapless:
|
||||||
return "fill_buffer_stateless.builtin_kernel";
|
return "fill_buffer_stateless.builtin_kernel";
|
||||||
case EBuiltInOps::copyBufferToImage3d:
|
case EBuiltInOps::copyBufferToImage3d:
|
||||||
return "copy_buffer_to_image3d.builtin_kernel";
|
return "copy_buffer_to_image3d.builtin_kernel";
|
||||||
|
@ -92,14 +95,15 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
|
||||||
return deviceId.str();
|
return deviceId.str();
|
||||||
};
|
};
|
||||||
const auto deviceIp = createDeviceIdFilenameComponent(hwInfo.ipVersion);
|
const auto deviceIp = createDeviceIdFilenameComponent(hwInfo.ipVersion);
|
||||||
const auto builtinName = getBuiltinAsString(builtin);
|
const auto builtinFilename = getBuiltinAsString(builtin);
|
||||||
const auto extension = BuiltinCode::getExtension(type);
|
const auto extension = BuiltinCode::getExtension(type);
|
||||||
auto getAddressingMode = [type, &productHelper, releaseHelper, builtin]() {
|
auto getAddressingModePrefix = [type, &productHelper, releaseHelper, builtin]() {
|
||||||
if (type == BuiltinCode::ECodeType::binary) {
|
if (type == BuiltinCode::ECodeType::binary) {
|
||||||
const bool requiresStatelessAddressing = (false == productHelper.isStatefulAddressingModeSupported());
|
const bool requiresStatelessAddressing = (false == productHelper.isStatefulAddressingModeSupported());
|
||||||
const bool builtInUsesStatelessAddressing = EBuiltInOps::isStateless(builtin);
|
const bool builtInUsesStatelessAddressing = EBuiltInOps::isStateless(builtin);
|
||||||
|
const bool heaplessEnabled = EBuiltInOps::isHeapless(builtin);
|
||||||
if (builtInUsesStatelessAddressing || requiresStatelessAddressing) {
|
if (builtInUsesStatelessAddressing || requiresStatelessAddressing) {
|
||||||
return "stateless_";
|
return heaplessEnabled ? "stateless_heapless_" : "stateless_";
|
||||||
} else if (ApiSpecificConfig::getBindlessMode(releaseHelper)) {
|
} else if (ApiSpecificConfig::getBindlessMode(releaseHelper)) {
|
||||||
return "bindless_";
|
return "bindless_";
|
||||||
} else {
|
} else {
|
||||||
|
@ -108,21 +112,21 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
const auto addressingMode = getAddressingMode();
|
const auto addressingModePrefix = getAddressingModePrefix();
|
||||||
|
|
||||||
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingMode, ConstStringRef builtinName, ConstStringRef extension) {
|
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingModePrefix, ConstStringRef builtinFilename, ConstStringRef extension) {
|
||||||
std::ostringstream outResourceName;
|
std::ostringstream outResourceName;
|
||||||
if (false == deviceIpPath.empty()) {
|
if (false == deviceIpPath.empty()) {
|
||||||
outResourceName << deviceIpPath.str() << "_";
|
outResourceName << deviceIpPath.str() << "_";
|
||||||
}
|
}
|
||||||
outResourceName << addressingMode.str() << builtinName.str() << extension.str();
|
outResourceName << addressingModePrefix.str() << builtinFilename.str() << extension.str();
|
||||||
return outResourceName.str();
|
return outResourceName.str();
|
||||||
};
|
};
|
||||||
StackVec<std::string, 3> resourcesToLookup = {};
|
StackVec<std::string, 3> resourcesToLookup = {};
|
||||||
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingMode, builtinName, extension));
|
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingModePrefix, builtinFilename, extension));
|
||||||
|
|
||||||
if (BuiltinCode::ECodeType::binary != type) {
|
if (BuiltinCode::ECodeType::binary != type) {
|
||||||
resourcesToLookup.push_back(createBuiltinResourceName("", addressingMode, builtinName, extension));
|
resourcesToLookup.push_back(createBuiltinResourceName("", addressingModePrefix, builtinFilename, extension));
|
||||||
}
|
}
|
||||||
return resourcesToLookup;
|
return resourcesToLookup;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -30,6 +30,15 @@ static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
||||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessHeaplessSrc(
|
||||||
|
createBuiltinResourceName(
|
||||||
|
EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||||
|
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||||
|
.c_str(),
|
||||||
|
std::string(
|
||||||
|
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||||
|
));
|
||||||
|
|
||||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||||
createBuiltinResourceName(
|
createBuiltinResourceName(
|
||||||
EBuiltInOps::copyBufferRect,
|
EBuiltInOps::copyBufferRect,
|
||||||
|
@ -48,6 +57,15 @@ static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
|
||||||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
static RegisterEmbeddedResource registerCopyBufferRectStatelessHeaplessSrc(
|
||||||
|
createBuiltinResourceName(
|
||||||
|
EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||||
|
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||||
|
.c_str(),
|
||||||
|
std::string(
|
||||||
|
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||||
|
));
|
||||||
|
|
||||||
static RegisterEmbeddedResource registerFillBufferSrc(
|
static RegisterEmbeddedResource registerFillBufferSrc(
|
||||||
createBuiltinResourceName(
|
createBuiltinResourceName(
|
||||||
EBuiltInOps::fillBuffer,
|
EBuiltInOps::fillBuffer,
|
||||||
|
@ -66,6 +84,15 @@ static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
||||||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
static RegisterEmbeddedResource registerFillBufferStatelessHeaplessSrc(
|
||||||
|
createBuiltinResourceName(
|
||||||
|
EBuiltInOps::fillBufferStatelessHeapless,
|
||||||
|
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
|
||||||
|
.c_str(),
|
||||||
|
std::string(
|
||||||
|
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||||
|
));
|
||||||
|
|
||||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||||
createBuiltinResourceName(
|
createBuiltinResourceName(
|
||||||
EBuiltInOps::copyBufferToImage3d,
|
EBuiltInOps::copyBufferToImage3d,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2022 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -21,4 +21,5 @@ struct TestTraits<IGFX_GEN11_CORE> {
|
||||||
static constexpr bool imagesSupported = true;
|
static constexpr bool imagesSupported = true;
|
||||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
||||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = true;
|
static constexpr bool programComputeModeCommandProgramsNonCoherent = true;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2023 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
|
||||||
static constexpr bool forceGpuNonCoherent = true;
|
static constexpr bool forceGpuNonCoherent = true;
|
||||||
static constexpr bool imagesSupported = true;
|
static constexpr bool imagesSupported = true;
|
||||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2022 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN8_CORE> {
|
||||||
static constexpr bool imagesSupported = true;
|
static constexpr bool imagesSupported = true;
|
||||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = false;
|
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = false;
|
||||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2022 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN9_CORE> {
|
||||||
static constexpr bool imagesSupported = true;
|
static constexpr bool imagesSupported = true;
|
||||||
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
|
||||||
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2023 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -30,4 +30,5 @@ struct TestTraits<IGFX_XE_HPC_CORE> {
|
||||||
static constexpr bool imagesSupported = false;
|
static constexpr bool imagesSupported = false;
|
||||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = true;
|
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = true;
|
||||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2023 Intel Corporation
|
* Copyright (C) 2021-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -30,6 +30,7 @@ struct TestTraits<IGFX_XE_HPG_CORE> {
|
||||||
static constexpr bool imagesSupported = true;
|
static constexpr bool imagesSupported = true;
|
||||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = false;
|
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = false;
|
||||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||||
|
static constexpr bool heaplessAllowed = false;
|
||||||
};
|
};
|
||||||
#ifdef TESTS_MTL
|
#ifdef TESTS_MTL
|
||||||
#include "shared/test/common/xe_hpg_core/mtl/test_traits_mtl.h"
|
#include "shared/test/common/xe_hpg_core/mtl/test_traits_mtl.h"
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/built_ins/built_ins.h"
|
#include "shared/source/built_ins/built_ins.h"
|
||||||
|
#include "shared/source/helpers/compiler_product_helper.h"
|
||||||
#include "shared/source/helpers/gfx_core_helper.h"
|
#include "shared/source/helpers/gfx_core_helper.h"
|
||||||
#include "shared/test/common/fixtures/device_fixture.h"
|
#include "shared/test/common/fixtures/device_fixture.h"
|
||||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
|
@ -134,3 +135,40 @@ HWTEST_F(BuiltInSharedTest, GivenValidBuiltinTypeAndAnyTypeWhenGettingBuiltinCod
|
||||||
EXPECT_EQ(BuiltinCode::ECodeType::binary, builtinCode.type);
|
EXPECT_EQ(BuiltinCode::ECodeType::binary, builtinCode.type);
|
||||||
EXPECT_NE(0U, builtinCode.resource.size());
|
EXPECT_NE(0U, builtinCode.resource.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(BuiltInSharedTest, GivenHeaplessModeEnabledWhenGetBuiltinResourceNamesIsCalledThenResourceNameIsCorrect, MatchAny) {
|
||||||
|
|
||||||
|
class MockCompilerProductHelper : public CompilerProductHelperHw<productFamily> {
|
||||||
|
public:
|
||||||
|
bool isHeaplessModeEnabled() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pDevice->executionEnvironment->rootDeviceEnvironments[0]->compilerProductHelper.reset(new MockCompilerProductHelper());
|
||||||
|
|
||||||
|
auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||||
|
|
||||||
|
std::string deviceIpString = std::to_string(hwInfo.ipVersion.architecture) + "_" + std::to_string(hwInfo.ipVersion.release) + "_" + std::to_string(hwInfo.ipVersion.revision);
|
||||||
|
|
||||||
|
struct TestParam {
|
||||||
|
std::string builtInTypeAsString;
|
||||||
|
EBuiltInOps::Type builtinType;
|
||||||
|
};
|
||||||
|
|
||||||
|
TestParam params[] = {
|
||||||
|
{"copy_buffer_to_buffer_stateless", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||||
|
{"copy_buffer_rect_stateless", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||||
|
{"fill_buffer_stateless", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||||
|
|
||||||
|
for (auto &[builtInTypeAsString, builtInType] : params) {
|
||||||
|
|
||||||
|
auto resourceNames = getBuiltinResourceNames(builtInType, BuiltinCode::ECodeType::binary, *pDevice);
|
||||||
|
|
||||||
|
std::string expectedResourceNameGeneric = "stateless_heapless_" + builtInTypeAsString + ".builtin_kernel.bin";
|
||||||
|
std::string expectedResourceNameForRelease = deviceIpString + "_" + expectedResourceNameGeneric;
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, resourceNames.size());
|
||||||
|
EXPECT_EQ(resourceNames[0], expectedResourceNameForRelease);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue