/* * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "runtime/built_ins/built_ins.h" #include "runtime/command_queue/command_queue_hw.h" #include "runtime/command_stream/command_stream_receiver.h" #include "runtime/helpers/hardware_commands_helper.h" #include "runtime/mem_obj/buffer.h" #include "runtime/memory_manager/internal_allocation_storage.h" #include "runtime/memory_manager/memory_manager.h" #include "runtime/memory_manager/surface.h" #include "hw_cmds.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueFillBuffer( Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto memoryManager = getDevice().getMemoryManager(); DEBUG_BREAK_IF(nullptr == memoryManager); auto patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN}); if (patternSize == 1) { int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else if (patternSize == 2) { int patternInt = (uint32_t)((*(uint16_t *)pattern << 16) | *(uint16_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else { memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } auto eBuiltInOps = EBuiltInOps::FillBuffer; if (forceStateless(size)) { eBuiltInOps = EBuiltInOps::FillBufferStateless; } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getContext(), this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams dc; MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); MemObjSurface s1(buffer); GeneralSurface s2(patternAllocation); Surface *surfaces[] = {&s1, &s2}; enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr(patternAllocation), TEMPORARY_ALLOCATION, taskCount); return CL_SUCCESS; } } // namespace NEO