mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 07:00:17 +08:00
performance: introduce staging reads from image
Related-To: NEO-12968 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f2725f217e
commit
6c4eb322b1
@@ -2965,7 +2965,7 @@ cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue,
|
||||
TRACING_EXIT(ClEnqueueWriteImage, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
if (pCommandQueue->isValidForStagingWriteImage(pImage, ptr, numEventsInWaitList > 0)) {
|
||||
if (pCommandQueue->isValidForStagingTransferImage(pImage, ptr, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingWriteImage(pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, event);
|
||||
} else {
|
||||
retVal = pCommandQueue->enqueueWriteImage(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# Copyright (C) 2018-2024 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -9,6 +9,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_staging.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/source/utilities/api_intercept.h"
|
||||
#include "shared/source/utilities/staging_buffer_manager.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
@@ -1557,146 +1556,4 @@ void CommandQueue::unregisterGpgpuAndBcsCsrClients() {
|
||||
}
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
|
||||
csrSelectionArgs.direction = TransferDirection::hostToLocal;
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
Event profilingEvent{this, CL_COMMAND_SVM_MEMCPY, CompletionStamp::notReady, CompletionStamp::notReady};
|
||||
if (isProfilingEnabled()) {
|
||||
profilingEvent.setQueueTimeStamp();
|
||||
}
|
||||
|
||||
// If there was only one chunk copy, no barrier for OOQ is needed
|
||||
bool isSingleTransfer = false;
|
||||
ChunkCopyFunction chunkCopy = [&](void *stagingBuffer, size_t chunkSize, void *chunkDst, const void *chunkSrc) -> int32_t {
|
||||
auto isFirstTransfer = (chunkDst == dstPtr);
|
||||
auto isLastTransfer = ptrOffset(chunkDst, chunkSize) == ptrOffset(dstPtr, size);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setSubmitTimeStamp();
|
||||
}
|
||||
memcpy(stagingBuffer, chunkSrc, chunkSize);
|
||||
if (isSingleTransfer) {
|
||||
return this->enqueueSVMMemcpy(false, chunkDst, stagingBuffer, chunkSize, 0, nullptr, event, csr);
|
||||
}
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setStartTimeStamp();
|
||||
}
|
||||
|
||||
cl_event *outEvent = nullptr;
|
||||
if (isLastTransfer && !this->isOOQEnabled()) {
|
||||
outEvent = event;
|
||||
}
|
||||
auto ret = this->enqueueSVMMemcpy(false, chunkDst, stagingBuffer, chunkSize, 0, nullptr, outEvent, csr);
|
||||
return ret;
|
||||
};
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performCopy(dstPtr, srcPtr, size, chunkCopy, csr);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
return postStagingTransferSync(event, profilingEvent, isSingleTransfer, blockingCopy);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event) {
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, this->getDevice().getRootDeviceIndex(), globalRegion, nullptr, globalOrigin};
|
||||
auto &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
Event profilingEvent{this, CL_COMMAND_WRITE_IMAGE, CompletionStamp::notReady, CompletionStamp::notReady};
|
||||
if (isProfilingEnabled()) {
|
||||
profilingEvent.setQueueTimeStamp();
|
||||
}
|
||||
|
||||
// If there was only one chunk write, no barrier for OOQ is needed
|
||||
bool isSingleTransfer = false;
|
||||
ChunkWriteImageFunc chunkWrite = [&](void *stagingBuffer, size_t bufferSize, const void *chunkPtr, const size_t *origin, const size_t *region) -> int32_t {
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setSubmitTimeStamp();
|
||||
}
|
||||
memcpy(stagingBuffer, chunkPtr, bufferSize);
|
||||
if (isSingleTransfer) {
|
||||
return this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, event, csr);
|
||||
}
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setStartTimeStamp();
|
||||
}
|
||||
|
||||
cl_event *outEvent = nullptr;
|
||||
if (isLastTransfer && !this->isOOQEnabled()) {
|
||||
outEvent = event;
|
||||
}
|
||||
auto ret = this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
return ret;
|
||||
};
|
||||
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performImageWrite(ptr, globalOrigin, globalRegion, dstRowPitch, chunkWrite, &csr);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
return postStagingTransferSync(event, profilingEvent, isSingleTransfer, blockingCopy);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::postStagingTransferSync(cl_event *event, const Event &profilingEvent, bool isSingleTransfer, bool isBlocking) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
if (event != nullptr) {
|
||||
if (!isSingleTransfer && this->isOOQEnabled()) {
|
||||
ret = this->enqueueBarrierWithWaitList(0, nullptr, event);
|
||||
}
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
if (isProfilingEnabled()) {
|
||||
pEvent->copyTimestamps(profilingEvent, !isSingleTransfer);
|
||||
pEvent->setCPUProfilingPath(false);
|
||||
}
|
||||
pEvent->setCmdType(profilingEvent.getCommandType());
|
||||
}
|
||||
|
||||
if (isBlocking) {
|
||||
ret = this->finish();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies) {
|
||||
GraphicsAllocation *allocation = nullptr;
|
||||
context->tryGetExistingMapAllocation(srcPtr, size, allocation);
|
||||
if (allocation != nullptr) {
|
||||
// Direct transfer from mapped allocation is faster than staging buffer
|
||||
return false;
|
||||
}
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, nullptr};
|
||||
csrSelectionArgs.direction = TransferDirection::hostToLocal;
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
auto osContextId = csr->getOsContext().getContextId();
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
UNRECOVERABLE_IF(stagingBufferManager == nullptr);
|
||||
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingWriteImage(Image *image, const void *ptr, bool hasDependencies) {
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
if (!stagingBufferManager) {
|
||||
return false;
|
||||
}
|
||||
switch (image->getImageDesc().image_type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
return stagingBufferManager->isValidForStagingWriteImage(this->getDevice(), ptr, hasDependencies);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -45,6 +45,7 @@ struct BuiltinOpParams;
|
||||
struct CsrSelectionArgs;
|
||||
struct MultiDispatchInfo;
|
||||
struct TimestampPacketDependencies;
|
||||
struct StagingTransferStatus;
|
||||
|
||||
enum class QueuePriority {
|
||||
low,
|
||||
@@ -147,6 +148,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0;
|
||||
|
||||
virtual cl_int enqueueReadImageImpl(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0;
|
||||
|
||||
virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb,
|
||||
const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) = 0;
|
||||
@@ -396,8 +401,11 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
|
||||
cl_int enqueueStagingWriteImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event);
|
||||
cl_int enqueueStagingReadImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event);
|
||||
|
||||
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
|
||||
bool isValidForStagingWriteImage(Image *image, const void *ptr, bool hasDependencies);
|
||||
bool isValidForStagingTransferImage(Image *image, const void *ptr, bool hasDependencies);
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
@@ -441,7 +449,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
void unregisterGpgpuAndBcsCsrClients();
|
||||
|
||||
cl_int postStagingTransferSync(cl_event *event, const Event &profilingEvent, bool isSingleTransfer, bool isBlocking);
|
||||
cl_int postStagingTransferSync(const StagingTransferStatus &status, cl_event *event, const cl_event profilingEvent, bool isSingleTransfer, bool isBlocking, cl_command_type commandType);
|
||||
cl_event *assignEventForStaging(cl_event *userEvent, cl_event *profilingEvent, bool isFirstTransfer, bool isLastTransfer) const;
|
||||
|
||||
Context *context = nullptr;
|
||||
ClDevice *device = nullptr;
|
||||
|
||||
@@ -269,6 +269,18 @@ class CommandQueueHw : public CommandQueue {
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueReadImageImpl(Image *srcImage,
|
||||
cl_bool blockingRead,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t rowPitch,
|
||||
size_t slicePitch,
|
||||
void *ptr,
|
||||
GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event, CommandStreamReceiver &csr) override;
|
||||
|
||||
cl_int enqueueWriteBuffer(Buffer *buffer,
|
||||
cl_bool blockingWrite,
|
||||
size_t offset,
|
||||
|
||||
169
opencl/source/command_queue/command_queue_staging.cpp
Normal file
169
opencl/source/command_queue/command_queue_staging.cpp
Normal file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/utilities/staging_buffer_manager.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/command_queue/csr_selection_args.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/helpers/base_object.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
|
||||
csrSelectionArgs.direction = TransferDirection::hostToLocal;
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
cl_event profilingEvent;
|
||||
|
||||
bool isSingleTransfer = false;
|
||||
ChunkCopyFunction chunkCopy = [&](void *chunkSrc, void *chunkDst, size_t chunkSize) -> int32_t {
|
||||
auto isFirstTransfer = (chunkDst == dstPtr);
|
||||
auto isLastTransfer = ptrOffset(chunkDst, chunkSize) == ptrOffset(dstPtr, size);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr);
|
||||
};
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performCopy(dstPtr, srcPtr, size, chunkCopy, csr);
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, CL_COMMAND_SVM_MEMCPY);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_WRITE_IMAGE, nullptr, dstImage, this->getDevice().getRootDeviceIndex(), globalRegion, nullptr, globalOrigin};
|
||||
auto &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
cl_event profilingEvent;
|
||||
|
||||
bool isSingleTransfer = false;
|
||||
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
};
|
||||
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, chunkWrite, &csr, false);
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, CL_COMMAND_WRITE_IMAGE);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingReadImage(Image *srcImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_IMAGE, srcImage, nullptr, this->getDevice().getRootDeviceIndex(), globalRegion, nullptr, globalOrigin};
|
||||
auto &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
cl_event profilingEvent;
|
||||
|
||||
bool isSingleTransfer = false;
|
||||
ChunkTransferImageFunc chunkRead = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
};
|
||||
auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, chunkRead, &csr, true);
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, CL_COMMAND_READ_IMAGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's single transfer, use user event.
|
||||
* Otherwise, first transfer uses profiling event to obtain queue/submit/start timestamps.
|
||||
* Last transfer uses user event in case of IOQ.
|
||||
* For OOQ user event will be passed to barrier to gather all submitted transfers.
|
||||
*/
|
||||
cl_event *CommandQueue::assignEventForStaging(cl_event *userEvent, cl_event *profilingEvent, bool isFirstTransfer, bool isLastTransfer) const {
|
||||
cl_event *outEvent = nullptr;
|
||||
if (userEvent != nullptr) {
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
outEvent = profilingEvent;
|
||||
} else if (isLastTransfer && !this->isOOQEnabled()) {
|
||||
outEvent = userEvent;
|
||||
}
|
||||
}
|
||||
if (isFirstTransfer && isLastTransfer) {
|
||||
outEvent = userEvent;
|
||||
}
|
||||
return outEvent;
|
||||
}
|
||||
|
||||
cl_int CommandQueue::postStagingTransferSync(const StagingTransferStatus &status, cl_event *event, const cl_event profilingEvent, bool isSingleTransfer, bool isBlocking, cl_command_type commandType) {
|
||||
if (status.waitStatus == WaitStatus::gpuHang) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
} else if (status.chunkCopyStatus != CL_SUCCESS) {
|
||||
return status.chunkCopyStatus;
|
||||
}
|
||||
|
||||
cl_int ret = CL_SUCCESS;
|
||||
if (event != nullptr) {
|
||||
if (!isSingleTransfer && this->isOOQEnabled()) {
|
||||
ret = this->enqueueBarrierWithWaitList(0, nullptr, event);
|
||||
}
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
if (!isSingleTransfer && isProfilingEnabled()) {
|
||||
auto pProfilingEvent = castToObjectOrAbort<Event>(profilingEvent);
|
||||
pEvent->copyTimestamps(*pProfilingEvent);
|
||||
pProfilingEvent->release();
|
||||
}
|
||||
pEvent->setCmdType(commandType);
|
||||
}
|
||||
|
||||
if (isBlocking) {
|
||||
ret = this->finish();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies) {
|
||||
GraphicsAllocation *allocation = nullptr;
|
||||
context->tryGetExistingMapAllocation(srcPtr, size, allocation);
|
||||
if (allocation != nullptr) {
|
||||
// Direct transfer from mapped allocation is faster than staging buffer
|
||||
return false;
|
||||
}
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, nullptr};
|
||||
csrSelectionArgs.direction = TransferDirection::hostToLocal;
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
auto osContextId = csr->getOsContext().getContextId();
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
UNRECOVERABLE_IF(stagingBufferManager == nullptr);
|
||||
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingTransferImage(Image *image, const void *ptr, bool hasDependencies) {
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
if (!stagingBufferManager) {
|
||||
return false;
|
||||
}
|
||||
switch (image->getImageDesc().image_type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
return stagingBufferManager->isValidForStagingTransferImage(this->getDevice(), ptr, hasDependencies);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -39,6 +39,25 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, {}, device->getRootDeviceIndex(), region, origin, nullptr};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
return enqueueReadImageImpl(srcImage, blockingRead, origin, region, inputRowPitch, inputSlicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueReadImageImpl(
|
||||
Image *srcImage,
|
||||
cl_bool blockingRead,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t inputRowPitch,
|
||||
size_t inputSlicePitch,
|
||||
void *ptr,
|
||||
GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event, CommandStreamReceiver &csr) {
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, {}, device->getRootDeviceIndex(), region, origin, nullptr};
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
|
||||
@@ -397,10 +397,6 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
||||
auto &device = this->cmdQueue->getDevice();
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
auto resolution = device.getDeviceInfo().profilingTimerResolution;
|
||||
if (isAdjustmentNeeded) {
|
||||
// Adjust startTS since we calculate profiling based on other event timestamps
|
||||
contextStartTS = startTimeStamp.gpuTimeStamp;
|
||||
}
|
||||
|
||||
// Calculate startTimestamp only if it was not already set on CPU
|
||||
if (startTimeStamp.cpuTimeInNs == 0) {
|
||||
@@ -1046,4 +1042,20 @@ TaskCountType Event::peekTaskLevel() const {
|
||||
return taskLevel;
|
||||
}
|
||||
|
||||
void Event::copyTimestamps(Event &srcEvent) {
|
||||
if (timestampPacketContainer) {
|
||||
this->addTimestampPacketNodes(*srcEvent.getTimestampPacketNodes());
|
||||
} else {
|
||||
if (this->timeStampNode != nullptr) {
|
||||
this->timeStampNode->returnTag();
|
||||
}
|
||||
this->timeStampNode = srcEvent.timeStampNode;
|
||||
srcEvent.timeStampNode = nullptr;
|
||||
}
|
||||
this->queueTimeStamp = srcEvent.queueTimeStamp;
|
||||
this->submitTimeStamp = srcEvent.submitTimeStamp;
|
||||
this->startTimeStamp = srcEvent.startTimeStamp;
|
||||
this->endTimeStamp = srcEvent.endTimeStamp;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -312,13 +312,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
|
||||
static void getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS);
|
||||
|
||||
void copyTimestamps(const Event &srcEvent, bool isAdjustmentNeeded) {
|
||||
this->queueTimeStamp = srcEvent.queueTimeStamp;
|
||||
this->submitTimeStamp = srcEvent.submitTimeStamp;
|
||||
this->startTimeStamp = srcEvent.startTimeStamp;
|
||||
this->endTimeStamp = srcEvent.endTimeStamp;
|
||||
this->isAdjustmentNeeded = isAdjustmentNeeded;
|
||||
}
|
||||
void copyTimestamps(Event &srcEvent);
|
||||
|
||||
protected:
|
||||
Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType,
|
||||
@@ -391,7 +385,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
bool profilingEnabled = false;
|
||||
bool profilingCpuPath = false;
|
||||
bool dataCalculated = false;
|
||||
bool isAdjustmentNeeded = false;
|
||||
|
||||
ProfilingInfo queueTimeStamp{};
|
||||
ProfilingInfo submitTimeStamp{};
|
||||
|
||||
@@ -1097,4 +1097,136 @@ HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageWithUsmPtrThenDontImportAlloc
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
svmManager->freeSVMAlloc(usmPtr);
|
||||
}
|
||||
|
||||
struct ReadImageStagingBufferTest : public EnqueueReadImageTest {
|
||||
void SetUp() override {
|
||||
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
|
||||
EnqueueReadImageTest::SetUp();
|
||||
ptr = new unsigned char[readSize];
|
||||
device.reset(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
|
||||
return;
|
||||
}
|
||||
delete[] ptr;
|
||||
EnqueueReadImageTest::TearDown();
|
||||
}
|
||||
|
||||
static constexpr size_t stagingBufferSize = MemoryConstants::megaByte * 2;
|
||||
static constexpr size_t readSize = stagingBufferSize * 4;
|
||||
unsigned char *ptr;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {4, 8, 1};
|
||||
std::unique_ptr<ClDevice> device;
|
||||
cl_queue_properties props = {};
|
||||
};
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
region[0] = MemoryConstants::megaByte / srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenBlockingEnqueueStagingReadImageCalledThenFinishCalled) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, true, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(1u, mockCommandQueueHw.finishCalledCount);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithEventThenReturnValidEvent) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_IMAGE;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingReadImageCalledWithEventThenReturnValidEvent) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_BARRIER), mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_IMAGE), pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingReadImageCalledWithSingleTransferThenNoBarrierEnqueued) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_IMAGE;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
region[1] = 1;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStagingReadImageThenTimestampsSetCorrectly) {
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_FALSE(pEvent->isCPUProfilingPath());
|
||||
EXPECT_TRUE(pEvent->isProfilingEnabled());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.enqueueReadImageCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithGpuHangThenReturnOutOfResources) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_IMAGE, srcImage, nullptr, pDevice->getRootDeviceIndex(), region, nullptr, origin};
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(&mockCommandQueueHw.selectCsrForBuiltinOperation(csrSelectionArgs));
|
||||
ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang;
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_OUT_OF_RESOURCES);
|
||||
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
}
|
||||
@@ -801,7 +801,7 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithUsmPtrAndSizeLowerThanR
|
||||
svmManager->freeSVMAlloc(usmPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueWriteImageTest, whenIsValidForStagingWriteImageCalledThenReturnCorrectValue) {
|
||||
HWTEST_F(EnqueueWriteImageTest, whenIsValidForStagingTransferImageCalledThenReturnCorrectValue) {
|
||||
bool svmSupported = pDevice->getHardwareInfo().capabilityTable.ftrSvm;
|
||||
if (!svmSupported) {
|
||||
GTEST_SKIP();
|
||||
@@ -810,13 +810,13 @@ HWTEST_F(EnqueueWriteImageTest, whenIsValidForStagingWriteImageCalledThenReturnC
|
||||
unsigned char ptr[16];
|
||||
|
||||
std::unique_ptr<Image> image(Image1dHelper<>::create(context));
|
||||
EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingTransferImage(image.get(), ptr, false));
|
||||
|
||||
image.reset(Image2dHelper<>::create(context));
|
||||
EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingTransferImage(image.get(), ptr, false));
|
||||
|
||||
image.reset(Image3dHelper<>::create(context));
|
||||
EXPECT_FALSE(pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
EXPECT_FALSE(pCmdQ->isValidForStagingTransferImage(image.get(), ptr, false));
|
||||
}
|
||||
|
||||
struct WriteImageStagingBufferTest : public EnqueueWriteImageTest {
|
||||
@@ -854,6 +854,17 @@ HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenRetu
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
region[0] = MemoryConstants::megaByte / dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenBlockingEnqueueStagingWriteImageCalledThenFinishCalled) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, true, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
@@ -188,6 +188,11 @@ class MockCommandQueue : public CommandQueue {
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; }
|
||||
|
||||
cl_int enqueueReadImageImpl(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr,
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override { return CL_SUCCESS; }
|
||||
|
||||
cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
@@ -379,6 +384,34 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
}
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
cl_int enqueueReadImageImpl(Image *srcImage,
|
||||
cl_bool blockingRead,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t rowPitch,
|
||||
size_t slicePitch,
|
||||
void *ptr,
|
||||
GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event, CommandStreamReceiver &csr) override {
|
||||
enqueueReadImageCounter++;
|
||||
if (enqueueReadImageCallBase) {
|
||||
return BaseClass::enqueueReadImageImpl(srcImage,
|
||||
blockingRead,
|
||||
origin,
|
||||
region,
|
||||
rowPitch,
|
||||
slicePitch,
|
||||
ptr,
|
||||
mapAllocation,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
csr);
|
||||
}
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) override {
|
||||
cpuDataTransferHandlerCalled = true;
|
||||
return BaseClass::cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
@@ -493,6 +526,8 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
MultiDispatchInfo storedMultiDispatchInfo;
|
||||
size_t enqueueWriteImageCounter = 0;
|
||||
bool enqueueWriteImageCallBase = true;
|
||||
size_t enqueueReadImageCounter = 0;
|
||||
bool enqueueReadImageCallBase = true;
|
||||
size_t enqueueWriteBufferCounter = 0;
|
||||
size_t requestedCmdStreamSize = 0;
|
||||
bool blockingWriteBuffer = false;
|
||||
|
||||
Reference in New Issue
Block a user