Add function to flush caches

Related-To: NEO-2536

Change-Id: Ifbf7e7a42514dd66eb0914f9d13407287481e123
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2019-03-22 13:40:41 +01:00
committed by sys_ocldev
parent 8a2917dd2c
commit 4eb48e3d06
26 changed files with 297 additions and 30 deletions

View File

@@ -1,11 +1,13 @@
#
# Copyright (C) 2018 Intel Corporation
# Copyright (C) 2018-2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(RUNTIME_SRCS_API
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_extensions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/additional_extensions.h
${CMAKE_CURRENT_SOURCE_DIR}/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/api.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_types.h
@@ -14,3 +16,4 @@ set(RUNTIME_SRCS_API
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_API})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_API ${RUNTIME_SRCS_API})
add_subdirectories()

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/api/additional_extensions.h"
namespace NEO {
void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName) {
return nullptr;
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "config.h"
#include <CL/cl.h>
namespace NEO {
void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName);
}

View File

@@ -8,6 +8,7 @@
#include "api.h"
#include "runtime/accelerators/intel_motion_estimation.h"
#include "runtime/api/additional_extensions.h"
#include "runtime/aub/aub_center.h"
#include "runtime/built_ins/built_ins.h"
#include "runtime/command_queue/command_queue.h"
@@ -3253,15 +3254,15 @@ cl_program CL_API_CALL clCreateProgramWithILKHR(cl_context context,
return program;
}
#define RETURN_FUNC_PTR_IF_EXIST(name) \
{ \
if (!strcmp(func_name, #name)) { \
return ((void *)(name)); \
} \
#define RETURN_FUNC_PTR_IF_EXIST(name) \
{ \
if (!strcmp(funcName, #name)) { \
return ((void *)(name)); \
} \
}
void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
DBG_LOG_INPUTS("func_name", func_name);
DBG_LOG_INPUTS("funcName", funcName);
// Support an internal call by the ICD
RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR);
@@ -3277,7 +3278,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
RETURN_FUNC_PTR_IF_EXIST(clAddCommentINTEL);
RETURN_FUNC_PTR_IF_EXIST(clEnqueueVerifyMemory);
void *ret = sharingFactory.getExtensionFunctionAddress(func_name);
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
if (ret != nullptr)
return ret;
@@ -3285,7 +3286,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *func_name) {
RETURN_FUNC_PTR_IF_EXIST(clCreateProgramWithILKHR);
RETURN_FUNC_PTR_IF_EXIST(clCreateCommandQueueWithPropertiesKHR);
return nullptr;
return getAdditionalExtensionFunctionAddress(funcName);
}
// OpenCL 1.2

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018 Intel Corporation
# Copyright (C) 2018-2019 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -26,6 +26,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barrier.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect.h
@@ -44,6 +45,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_avx2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_sse4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/resource_barrier.h
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE})

View File

@@ -18,6 +18,7 @@
#include <cstdint>
namespace NEO {
class BarrierCommand;
class Buffer;
class LinearStream;
class Context;
@@ -47,14 +48,6 @@ inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) {
printfHandler);
}
inline bool isCommandWithoutKernel(uint32_t commandType) {
return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) ||
(commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) ||
(commandType == CL_COMMAND_SVM_MAP) ||
(commandType == CL_COMMAND_SVM_UNMAP) ||
(commandType == CL_COMMAND_SVM_FREE));
}
template <>
struct OpenCLObjectMapper<_cl_command_queue> {
typedef class CommandQueue DerivedType;
@@ -306,12 +299,21 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal);
virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
return CL_SUCCESS;
}
virtual cl_int finish(bool dcFlush) { return CL_SUCCESS; }
virtual cl_int flush() { return CL_SUCCESS; }
MOCKABLE_VIRTUAL void updateFromCompletionStamp(const CompletionStamp &completionStamp);
virtual bool isCacheFlushCommand(uint32_t commandType) { return false; }
cl_int getCommandQueueInfo(cl_command_queue_info paramName,
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet);

View File

@@ -277,6 +277,11 @@ class CommandQueueHw : public CommandQueue {
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) override;
cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) override;
cl_int finish(bool dcFlush) override;
cl_int flush() override;
@@ -338,6 +343,21 @@ class CommandQueueHw : public CommandQueue {
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> printfHandler);
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
size_t surfaceCount,
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
TimestampPacketContainer *previousTimestampPacketNodes,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
uint32_t taskLevel);
void processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream);
bool isCacheFlushCommand(uint32_t commandType) override;
protected:
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);

View File

@@ -45,4 +45,13 @@ template <typename Family>
bool CommandQueueHw<Family>::requiresCacheFlushAfterWalkerBasedOnProperties(const cl_queue_properties *properties) {
return false;
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream) {
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::isCacheFlushCommand(uint32_t commandType) {
return false;
}
} // namespace NEO

View File

@@ -148,7 +148,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (isProfilingEnabled() && event) {
this->getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
}
EventBuilder eventBuilder;
if (event) {
eventBuilder.create<Event>(this, commandType, Event::eventNotReady, 0);
@@ -206,13 +205,15 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo);
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
auto commandStreamStart = commandStream.getUsed();
if (multiDispatchInfo.empty() == false) {
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
previousTimestampPacketNodes, preemption);
} else if (isCacheFlushCommand(commandType)) {
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream);
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (CL_COMMAND_BARRIER == commandType) {
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
@@ -274,6 +275,17 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
devQueueHw->getDebugQueue());
}
}
} else if (isCacheFlushCommand(commandType)) {
enqueueCommandWithoutKernel(
surfacesForResidency,
numSurfaceForResidency,
commandStream,
commandStreamStart,
blocking,
&previousTimestampPacketNodes,
eventsRequest,
eventBuilder,
taskLevel);
} else {
auto maxTaskCount = this->taskCount;
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
@@ -745,6 +757,39 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
this->virtualEvent = eventBuilder->getEvent();
}
template <typename GfxFamily>
CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
Surface **surfaces,
size_t surfaceCount,
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
TimestampPacketContainer *previousTimestampPacketNodes,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
uint32_t taskLevel) {
auto requiresCoherency = false;
for (auto surface : CreateRange(surfaces, surfaceCount)) {
surface->makeResident(getCommandStreamReceiver());
requiresCoherency |= surface->IsCoherent;
}
DispatchFlags dispatchFlags = {};
CompletionStamp completionStamp = getCommandStreamReceiver().flushTask(
commandStream,
commandStreamStart,
getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
taskLevel,
dispatchFlags,
*device);
return completionStamp;
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/command_queue/command_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
cl_int CommandQueueHw<GfxFamily>::enqueueResourceBarrier(BarrierCommand *resourceBarrier,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -224,7 +224,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
}
template <typename GfxFamily, uint32_t eventType>
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces) {
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, multiDispatchInfo);
return commandQueue.getCS(expectedSizeCS);
}

View File

@@ -17,6 +17,7 @@
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/queue_helpers.h"
#include "runtime/helpers/validators.h"
#include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/mem_obj/mem_obj.h"

View File

@@ -0,0 +1,10 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
struct BarrierCommand {};
} // namespace NEO

View File

@@ -7,6 +7,7 @@
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_queue/command_queue_hw.inl"
#include "runtime/command_queue/enqueue_resource_barrier.h"
#include "runtime/memory_manager/svm_memory_manager.h"
namespace NEO {

View File

@@ -7,6 +7,7 @@
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_queue/command_queue_hw.inl"
#include "runtime/command_queue/enqueue_resource_barrier.h"
#include "runtime/memory_manager/svm_memory_manager.h"
namespace NEO {

View File

@@ -7,6 +7,7 @@
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_queue/command_queue_hw.inl"
#include "runtime/command_queue/enqueue_resource_barrier.h"
#include "runtime/memory_manager/svm_memory_manager.h"
namespace NEO {

View File

@@ -22,4 +22,11 @@ void CommandQueue::processProperties(const cl_queue_properties *properties) {
void getIntelQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, GetInfoHelper &getInfoHelper, cl_int &retVal) {
retVal = CL_INVALID_VALUE;
}
bool isCommandWithoutKernel(uint32_t commandType) {
return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) ||
(commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) ||
(commandType == CL_COMMAND_SVM_MAP) ||
(commandType == CL_COMMAND_SVM_UNMAP) ||
(commandType == CL_COMMAND_SVM_FREE));
}
} // namespace NEO

View File

@@ -21,6 +21,8 @@ inline void releaseVirtualEvent(CommandQueue &commandQueue) {
inline void releaseVirtualEvent(DeviceQueue &commandQueue) {
}
bool isCommandWithoutKernel(uint32_t commandType);
template <typename QueueType>
void retainQueue(cl_command_queue commandQueue, cl_int &retVal) {
using BaseType = typename QueueType::BaseType;