mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Add function to flush caches
Related-To: NEO-2536 Change-Id: Ifbf7e7a42514dd66eb0914f9d13407287481e123 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
8a2917dd2c
commit
4eb48e3d06
@@ -1,11 +1,13 @@
|
||||
#
|
||||
# Copyright (C) 2018 Intel Corporation
|
||||
# Copyright (C) 2018-2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_API
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_extensions.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/additional_extensions.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/api.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/api.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_types.h
|
||||
@@ -14,3 +16,4 @@ set(RUNTIME_SRCS_API
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_API})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_API ${RUNTIME_SRCS_API})
|
||||
add_subdirectories()
|
||||
|
||||
15
runtime/api/additional_extensions.cpp
Normal file
15
runtime/api/additional_extensions.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/api/additional_extensions.h"
|
||||
|
||||
namespace NEO {
|
||||
void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
16
runtime/api/additional_extensions.h
Normal file
16
runtime/api/additional_extensions.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
#include <CL/cl.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName);
|
||||
}
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "api.h"
|
||||
|
||||
#include "runtime/accelerators/intel_motion_estimation.h"
|
||||
#include "runtime/api/additional_extensions.h"
|
||||
#include "runtime/aub/aub_center.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
@@ -3253,15 +3254,15 @@ cl_program CL_API_CALL clCreateProgramWithILKHR(cl_context context,
|
||||
return program;
|
||||
}
|
||||
|
||||
#define RETURN_FUNC_PTR_IF_EXIST(name) \
|
||||
{ \
|
||||
if (!strcmp(func_name, #name)) { \
|
||||
return ((void *)(name)); \
|
||||
} \
|
||||
#define RETURN_FUNC_PTR_IF_EXIST(name) \
|
||||
{ \
|
||||
if (!strcmp(funcName, #name)) { \
|
||||
return ((void *)(name)); \
|
||||
} \
|
||||
}
|
||||
void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
|
||||
void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
|
||||
|
||||
DBG_LOG_INPUTS("func_name", func_name);
|
||||
DBG_LOG_INPUTS("funcName", funcName);
|
||||
// Support an internal call by the ICD
|
||||
RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR);
|
||||
|
||||
@@ -3277,7 +3278,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
|
||||
RETURN_FUNC_PTR_IF_EXIST(clAddCommentINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clEnqueueVerifyMemory);
|
||||
|
||||
void *ret = sharingFactory.getExtensionFunctionAddress(func_name);
|
||||
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
|
||||
if (ret != nullptr)
|
||||
return ret;
|
||||
|
||||
@@ -3285,7 +3286,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
|
||||
RETURN_FUNC_PTR_IF_EXIST(clCreateProgramWithILKHR);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clCreateCommandQueueWithPropertiesKHR);
|
||||
|
||||
return nullptr;
|
||||
return getAdditionalExtensionFunctionAddress(funcName);
|
||||
}
|
||||
|
||||
// OpenCL 1.2
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018 Intel Corporation
|
||||
# Copyright (C) 2018-2019 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -26,6 +26,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barrier.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect.h
|
||||
@@ -44,6 +45,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_avx2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_sse4.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/resource_barrier.h
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE})
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class BarrierCommand;
|
||||
class Buffer;
|
||||
class LinearStream;
|
||||
class Context;
|
||||
@@ -47,14 +48,6 @@ inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) {
|
||||
printfHandler);
|
||||
}
|
||||
|
||||
inline bool isCommandWithoutKernel(uint32_t commandType) {
|
||||
return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) ||
|
||||
(commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) ||
|
||||
(commandType == CL_COMMAND_SVM_MAP) ||
|
||||
(commandType == CL_COMMAND_SVM_UNMAP) ||
|
||||
(commandType == CL_COMMAND_SVM_FREE));
|
||||
}
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_command_queue> {
|
||||
typedef class CommandQueue DerivedType;
|
||||
@@ -306,12 +299,21 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
MOCKABLE_VIRTUAL void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal);
|
||||
|
||||
virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int finish(bool dcFlush) { return CL_SUCCESS; }
|
||||
|
||||
virtual cl_int flush() { return CL_SUCCESS; }
|
||||
|
||||
MOCKABLE_VIRTUAL void updateFromCompletionStamp(const CompletionStamp &completionStamp);
|
||||
|
||||
virtual bool isCacheFlushCommand(uint32_t commandType) { return false; }
|
||||
|
||||
cl_int getCommandQueueInfo(cl_command_queue_info paramName,
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
@@ -277,6 +277,11 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int finish(bool dcFlush) override;
|
||||
cl_int flush() override;
|
||||
|
||||
@@ -338,6 +343,21 @@ class CommandQueueHw : public CommandQueue {
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> printfHandler);
|
||||
|
||||
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel);
|
||||
void processDispatchForCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream);
|
||||
|
||||
bool isCacheFlushCommand(uint32_t commandType) override;
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
|
||||
|
||||
@@ -45,4 +45,13 @@ template <typename Family>
|
||||
bool CommandQueueHw<Family>::requiresCacheFlushAfterWalkerBasedOnProperties(const cl_queue_properties *properties) {
|
||||
return false;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream) {
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isCacheFlushCommand(uint32_t commandType) {
|
||||
return false;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -148,7 +148,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
if (isProfilingEnabled() && event) {
|
||||
this->getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
|
||||
}
|
||||
|
||||
EventBuilder eventBuilder;
|
||||
if (event) {
|
||||
eventBuilder.create<Event>(this, commandType, Event::eventNotReady, 0);
|
||||
@@ -206,13 +205,15 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo);
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
|
||||
previousTimestampPacketNodes, preemption);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream);
|
||||
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType) {
|
||||
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
||||
@@ -274,6 +275,17 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
devQueueHw->getDebugQueue());
|
||||
}
|
||||
}
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
enqueueCommandWithoutKernel(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
blocking,
|
||||
&previousTimestampPacketNodes,
|
||||
eventsRequest,
|
||||
eventBuilder,
|
||||
taskLevel);
|
||||
} else {
|
||||
auto maxTaskCount = this->taskCount;
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
@@ -745,6 +757,39 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel) {
|
||||
|
||||
auto requiresCoherency = false;
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
surface->makeResident(getCommandStreamReceiver());
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags = {};
|
||||
|
||||
CompletionStamp completionStamp = getCommandStreamReceiver().flushTask(
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
|
||||
getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
|
||||
getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
taskLevel,
|
||||
dispatchFlags,
|
||||
*device);
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
|
||||
20
runtime/command_queue/enqueue_resource_barrier.h
Normal file
20
runtime/command_queue/enqueue_resource_barrier.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueResourceBarrier(BarrierCommand *resourceBarrier,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -224,7 +224,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
|
||||
}
|
||||
|
||||
template <typename GfxFamily, uint32_t eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces) {
|
||||
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, multiDispatchInfo);
|
||||
return commandQueue.getCS(expectedSizeCS);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/queue_helpers.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
|
||||
10
runtime/command_queue/resource_barrier.h
Normal file
10
runtime/command_queue/resource_barrier.h
Normal file
@@ -0,0 +1,10 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
struct BarrierCommand {};
|
||||
} // namespace NEO
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/command_queue_hw.inl"
|
||||
#include "runtime/command_queue/enqueue_resource_barrier.h"
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/command_queue_hw.inl"
|
||||
#include "runtime/command_queue/enqueue_resource_barrier.h"
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/command_queue_hw.inl"
|
||||
#include "runtime/command_queue/enqueue_resource_barrier.h"
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -22,4 +22,11 @@ void CommandQueue::processProperties(const cl_queue_properties *properties) {
|
||||
void getIntelQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, GetInfoHelper &getInfoHelper, cl_int &retVal) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
}
|
||||
bool isCommandWithoutKernel(uint32_t commandType) {
|
||||
return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) ||
|
||||
(commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) ||
|
||||
(commandType == CL_COMMAND_SVM_MAP) ||
|
||||
(commandType == CL_COMMAND_SVM_UNMAP) ||
|
||||
(commandType == CL_COMMAND_SVM_FREE));
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -21,6 +21,8 @@ inline void releaseVirtualEvent(CommandQueue &commandQueue) {
|
||||
inline void releaseVirtualEvent(DeviceQueue &commandQueue) {
|
||||
}
|
||||
|
||||
bool isCommandWithoutKernel(uint32_t commandType);
|
||||
|
||||
template <typename QueueType>
|
||||
void retainQueue(cl_command_queue commandQueue, cl_int &retVal) {
|
||||
using BaseType = typename QueueType::BaseType;
|
||||
|
||||
Reference in New Issue
Block a user