mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-23 03:33:13 +08:00
Initial support for oneAPI Level Zero
Change-Id: I221df8427b1844237a4d9d900c58512706b0be0f
This commit is contained in:
109
level_zero/core/source/CMakeLists.txt
Normal file
109
level_zero/core/source/CMakeLists.txt
Normal file
@@ -0,0 +1,109 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(L0_RUNTIME_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_kernel_properties.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builtin_functions_lib.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builtin_functions_lib_impl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builtin_functions_lib_impl.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_memory_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debugger.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver_handle.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver_handle_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver_handle_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/driver_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fence.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fence.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_build_log.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_build_log.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/module_extra_options.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers${BRANCH_DIR_SUFFIX}/hw_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_extended${BRANCH_DIR_SUFFIX}/cmdlist_extended.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_extended${BRANCH_DIR_SUFFIX}/cmdqueue_extended.inl
|
||||
)
|
||||
|
||||
target_include_directories(${TARGET_NAME_L0}
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
append_sources_from_properties(L0_RUNTIME_SOURCES
|
||||
L0_API
|
||||
L0_SRCS_DLL
|
||||
L0_SOURCES_LINUX
|
||||
L0_SOURCES_WINDOWS
|
||||
L0_SRCS_COMPILER_INTERFACE
|
||||
L0_SRCS_DEBUGGER
|
||||
L0_SRCS_OCLOC_SHARED
|
||||
)
|
||||
|
||||
target_sources(${TARGET_NAME_L0}
|
||||
PRIVATE
|
||||
${L0_RUNTIME_SOURCES}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
set_source_files_properties(${L0_RUNTIME_SOURCES} PROPERTIES COMPILE_FLAGS -Wall)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED L0_DRIVER_VERSION)
|
||||
set(L0_DRIVER_VERSION 1)
|
||||
endif()
|
||||
|
||||
configure_file(driver_version.h.in ${CMAKE_BINARY_DIR}/driver_version_l0.h) # Put Driver version into define
|
||||
|
||||
# Make our source files visible to parent
|
||||
set_property(GLOBAL PROPERTY L0_RUNTIME_SOURCES ${L0_RUNTIME_SOURCES})
|
||||
14
level_zero/core/source/additional_kernel_properties.cpp
Normal file
14
level_zero/core/source/additional_kernel_properties.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
void DeviceImp::processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_kernel_properties_t *pKernelProperties) {
|
||||
}
|
||||
} // namespace L0
|
||||
56
level_zero/core/source/builtin_functions_lib.h
Normal file
56
level_zero/core/source/builtin_functions_lib.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
class BuiltIns;
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
struct Device;
|
||||
struct Kernel;
|
||||
|
||||
enum class Builtin : uint32_t {
|
||||
CopyBufferBytes = 0u,
|
||||
CopyBufferToBufferSide,
|
||||
CopyBufferToBufferMiddle,
|
||||
CopyImageRegion,
|
||||
FillBufferImmediate,
|
||||
FillBufferSSHOffset,
|
||||
CopyBufferRectBytes2d,
|
||||
CopyBufferRectBytes3d,
|
||||
CopyBufferToImage3dBytes,
|
||||
CopyBufferToImage3d2Bytes,
|
||||
CopyBufferToImage3d4Bytes,
|
||||
CopyBufferToImage3d8Bytes,
|
||||
CopyBufferToImage3d16Bytes,
|
||||
CopyImage3dToBufferBytes,
|
||||
CopyImage3dToBuffer2Bytes,
|
||||
CopyImage3dToBuffer4Bytes,
|
||||
CopyImage3dToBuffer8Bytes,
|
||||
CopyImage3dToBuffer16Bytes,
|
||||
COUNT
|
||||
};
|
||||
|
||||
struct BuiltinFunctionsLib {
|
||||
virtual ~BuiltinFunctionsLib() = default;
|
||||
static std::unique_ptr<BuiltinFunctionsLib> create(Device *device,
|
||||
NEO::BuiltIns *builtins);
|
||||
|
||||
virtual Kernel *getFunction(Builtin func) = 0;
|
||||
virtual void initFunctions() = 0;
|
||||
virtual Kernel *getPageFaultFunction() = 0;
|
||||
virtual void initPageFaultFunction() = 0;
|
||||
|
||||
protected:
|
||||
BuiltinFunctionsLib() = default;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
157
level_zero/core/source/builtin_functions_lib_impl.cpp
Normal file
157
level_zero/core/source/builtin_functions_lib_impl.cpp
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/builtin_functions_lib_impl.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/module.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
std::unique_ptr<BuiltinFunctionsLib> BuiltinFunctionsLib::create(Device *device,
|
||||
NEO::BuiltIns *builtins) {
|
||||
return std::unique_ptr<BuiltinFunctionsLib>(new BuiltinFunctionsLibImpl(device, builtins));
|
||||
}
|
||||
|
||||
struct BuiltinFunctionsLibImpl::BuiltinData {
|
||||
~BuiltinData() {
|
||||
func.reset();
|
||||
module.reset();
|
||||
}
|
||||
|
||||
std::unique_ptr<Module> module;
|
||||
std::unique_ptr<Kernel> func;
|
||||
};
|
||||
|
||||
void BuiltinFunctionsLibImpl::initFunctions() {
|
||||
for (uint32_t builtId = 0; builtId < static_cast<uint32_t>(Builtin::COUNT); builtId++) {
|
||||
const char *builtinName = nullptr;
|
||||
NEO::EBuiltInOps::Type builtin;
|
||||
|
||||
switch (static_cast<Builtin>(builtId)) {
|
||||
case Builtin::CopyBufferBytes:
|
||||
builtinName = "copyBufferToBufferBytesSingle";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::CopyBufferToBufferSide:
|
||||
builtinName = "CopyBufferToBufferSideRegion";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::CopyBufferToBufferMiddle:
|
||||
builtinName = "CopyBufferToBufferMiddleRegion";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::CopyImageRegion:
|
||||
builtinName = "CopyImageToImage3d";
|
||||
builtin = NEO::EBuiltInOps::CopyImageToImage3d;
|
||||
break;
|
||||
case Builtin::FillBufferImmediate:
|
||||
builtinName = "FillBufferImmediate";
|
||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||
break;
|
||||
case Builtin::FillBufferSSHOffset:
|
||||
builtinName = "FillBufferSSHOffset";
|
||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||
break;
|
||||
case Builtin::CopyBufferRectBytes2d:
|
||||
builtinName = "CopyBufferRectBytes2d";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferRect;
|
||||
break;
|
||||
case Builtin::CopyBufferRectBytes3d:
|
||||
builtinName = "CopyBufferRectBytes3d";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferRect;
|
||||
break;
|
||||
case Builtin::CopyBufferToImage3dBytes:
|
||||
builtinName = "CopyBufferToImage3dBytes";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToImage3d;
|
||||
break;
|
||||
case Builtin::CopyBufferToImage3d2Bytes:
|
||||
builtinName = "CopyBufferToImage3d2Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToImage3d;
|
||||
break;
|
||||
case Builtin::CopyBufferToImage3d4Bytes:
|
||||
builtinName = "CopyBufferToImage3d4Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToImage3d;
|
||||
break;
|
||||
case Builtin::CopyBufferToImage3d8Bytes:
|
||||
builtinName = "CopyBufferToImage3d8Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToImage3d;
|
||||
break;
|
||||
case Builtin::CopyBufferToImage3d16Bytes:
|
||||
builtinName = "CopyBufferToImage3d16Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyBufferToImage3d;
|
||||
break;
|
||||
case Builtin::CopyImage3dToBufferBytes:
|
||||
builtinName = "CopyImage3dToBufferBytes";
|
||||
builtin = NEO::EBuiltInOps::CopyImage3dToBuffer;
|
||||
break;
|
||||
case Builtin::CopyImage3dToBuffer2Bytes:
|
||||
builtinName = "CopyImage3dToBuffer2Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyImage3dToBuffer;
|
||||
break;
|
||||
case Builtin::CopyImage3dToBuffer4Bytes:
|
||||
builtinName = "CopyImage3dToBuffer4Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyImage3dToBuffer;
|
||||
break;
|
||||
case Builtin::CopyImage3dToBuffer8Bytes:
|
||||
builtinName = "CopyImage3dToBuffer8Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyImage3dToBuffer;
|
||||
break;
|
||||
case Builtin::CopyImage3dToBuffer16Bytes:
|
||||
builtinName = "CopyImage3dToBuffer16Bytes";
|
||||
builtin = NEO::EBuiltInOps::CopyImage3dToBuffer;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
};
|
||||
|
||||
builtins[builtId] = loadBuiltIn(builtin, builtinName);
|
||||
}
|
||||
}
|
||||
|
||||
Kernel *BuiltinFunctionsLibImpl::getFunction(Builtin func) {
|
||||
auto builtId = static_cast<uint32_t>(func);
|
||||
return builtins[builtId]->func.get();
|
||||
}
|
||||
|
||||
void BuiltinFunctionsLibImpl::initPageFaultFunction() {
|
||||
pageFaultBuiltin = loadBuiltIn(NEO::EBuiltInOps::CopyBufferToBuffer, "CopyBufferToBufferSideRegion");
|
||||
}
|
||||
|
||||
Kernel *BuiltinFunctionsLibImpl::getPageFaultFunction() {
|
||||
return pageFaultBuiltin->func.get();
|
||||
}
|
||||
|
||||
std::unique_ptr<BuiltinFunctionsLibImpl::BuiltinData> BuiltinFunctionsLibImpl::loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) {
|
||||
auto builtInCode = builtInsLib->getBuiltinsLib().getBuiltinCode(builtin, NEO::BuiltinCode::ECodeType::Binary, *device->getNEODevice());
|
||||
|
||||
ze_result_t res;
|
||||
std::unique_ptr<Module> module;
|
||||
ze_module_handle_t moduleHandle;
|
||||
ze_module_desc_t moduleDesc = {ZE_MODULE_DESC_VERSION_CURRENT};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
moduleDesc.pInputModule = reinterpret_cast<uint8_t *>(&builtInCode.resource[0]);
|
||||
moduleDesc.inputSize = builtInCode.resource.size();
|
||||
res = device->createModule(&moduleDesc, &moduleHandle, nullptr);
|
||||
UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS);
|
||||
|
||||
module.reset(Module::fromHandle(moduleHandle));
|
||||
|
||||
std::unique_ptr<Kernel> function;
|
||||
ze_kernel_handle_t functionHandle;
|
||||
ze_kernel_desc_t functionDesc = {ZE_KERNEL_DESC_VERSION_CURRENT};
|
||||
functionDesc.pKernelName = builtInName;
|
||||
res = module->createKernel(&functionDesc, &functionHandle);
|
||||
DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS);
|
||||
UNUSED_VARIABLE(res);
|
||||
function.reset(Kernel::fromHandle(functionHandle));
|
||||
return std::unique_ptr<BuiltinData>(new BuiltinData{std::move(module), std::move(function)});
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
44
level_zero/core/source/builtin_functions_lib_impl.h
Normal file
44
level_zero/core/source/builtin_functions_lib_impl.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/builtin_functions_lib.h"
|
||||
|
||||
namespace NEO {
|
||||
namespace EBuiltInOps {
|
||||
using Type = uint32_t;
|
||||
}
|
||||
class BuiltIns;
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
struct BuiltinFunctionsLibImpl : BuiltinFunctionsLib {
|
||||
struct BuiltinData;
|
||||
BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib)
|
||||
: device(device), builtInsLib(builtInsLib) {
|
||||
}
|
||||
~BuiltinFunctionsLibImpl() override {
|
||||
builtins->reset();
|
||||
pageFaultBuiltin.release();
|
||||
}
|
||||
|
||||
Kernel *getFunction(Builtin func) override;
|
||||
Kernel *getPageFaultFunction() override;
|
||||
void initFunctions() override;
|
||||
void initPageFaultFunction() override;
|
||||
std::unique_ptr<BuiltinFunctionsLibImpl::BuiltinData> loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<BuiltinData> builtins[static_cast<uint32_t>(Builtin::COUNT)];
|
||||
std::unique_ptr<BuiltinData> pageFaultBuiltin;
|
||||
|
||||
Device *device;
|
||||
NEO::BuiltIns *builtInsLib;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
94
level_zero/core/source/cmdlist.cpp
Normal file
94
level_zero/core/source/cmdlist.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include "opencl/source/device/device_info.h"
|
||||
|
||||
namespace L0 {
|
||||
CommandList::~CommandList() {
|
||||
if (cmdQImmediate) {
|
||||
cmdQImmediate->destroy();
|
||||
}
|
||||
removeDeallocationContainerData();
|
||||
removeHostPtrAllocations();
|
||||
printfFunctionContainer.clear();
|
||||
}
|
||||
void CommandList::storePrintfFunction(Kernel *function) {
|
||||
auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(),
|
||||
function);
|
||||
|
||||
if (it == this->printfFunctionContainer.end()) {
|
||||
this->printfFunctionContainer.push_back(function);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::removeHostPtrAllocations() {
|
||||
auto memoryManager = device ? device->getDriverHandle()->getMemoryManager() : nullptr;
|
||||
for (auto &allocation : hostPtrMap) {
|
||||
UNRECOVERABLE_IF(memoryManager == nullptr);
|
||||
memoryManager->freeGraphicsMemory(allocation.second);
|
||||
}
|
||||
hostPtrMap.clear();
|
||||
}
|
||||
|
||||
void CommandList::removeDeallocationContainerData() {
|
||||
auto memoryManager = device ? device->getDriverHandle()->getMemoryManager() : nullptr;
|
||||
|
||||
auto container = commandContainer.getDeallocationContainer();
|
||||
for (auto deallocation : container) {
|
||||
DEBUG_BREAK_IF(deallocation == nullptr);
|
||||
UNRECOVERABLE_IF(memoryManager == nullptr);
|
||||
NEO::SvmAllocationData *allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(reinterpret_cast<void *>(deallocation->getGpuAddress()));
|
||||
if (allocData) {
|
||||
device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->remove(*allocData);
|
||||
}
|
||||
if (!((deallocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::INTERNAL_HEAP) ||
|
||||
(deallocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::LINEAR_STREAM))) {
|
||||
memoryManager->freeGraphicsMemory(deallocation);
|
||||
eraseDeallocationContainerEntry(deallocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation) {
|
||||
std::vector<NEO::GraphicsAllocation *>::iterator allocErase;
|
||||
auto container = &commandContainer.getDeallocationContainer();
|
||||
|
||||
allocErase = std::find(container->begin(), container->end(), allocation);
|
||||
if (allocErase != container->end()) {
|
||||
container->erase(allocErase);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation) {
|
||||
std::vector<NEO::GraphicsAllocation *>::iterator allocErase;
|
||||
auto container = &commandContainer.getResidencyContainer();
|
||||
|
||||
allocErase = std::find(container->begin(), container->end(), allocation);
|
||||
if (allocErase != container->end()) {
|
||||
container->erase(allocErase);
|
||||
}
|
||||
}
|
||||
|
||||
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *function) {
|
||||
auto functionAttributes = function->getImmutableData()->getDescriptor().kernelAttributes;
|
||||
|
||||
NEO::PreemptionFlags flags = {};
|
||||
flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption;
|
||||
flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages;
|
||||
flags.flags.deviceSupportsVmePreemption = device->getDeviceInfo().vmeAvcSupportsPreemption;
|
||||
flags.flags.disablePerCtxtPreemptionGranularityControl = device->getHwInfo().workaroundTable.waDisablePerCtxtPreemptionGranularityControl;
|
||||
flags.flags.disableLSQCROPERFforOCL = device->getHwInfo().workaroundTable.waDisableLSQCROPERFforOCL;
|
||||
|
||||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
192
level_zero/core/source/cmdlist.h
Normal file
192
level_zero/core/source/cmdlist.h
Normal file
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/command_stream/preemption_mode.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/kernel.h"
|
||||
#include <level_zero/ze_api.h>
|
||||
#include <level_zero/zet_api.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct _ze_command_list_handle_t {};
|
||||
|
||||
namespace L0 {
|
||||
struct EventPool;
|
||||
struct Event;
|
||||
struct Kernel;
|
||||
|
||||
struct CommandList : _ze_command_list_handle_t {
|
||||
static constexpr uint32_t maxNumInterfaceDescriptorsPerMediaInterfaceDescriptorLoad = 62u;
|
||||
static constexpr uint32_t defaultNumIddsPerBlock = maxNumInterfaceDescriptorsPerMediaInterfaceDescriptorLoad;
|
||||
static constexpr uint32_t commandListimmediateIddsPerBlock = 1u;
|
||||
|
||||
CommandList() {}
|
||||
CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {}
|
||||
|
||||
template <typename Type>
|
||||
struct Allocator {
|
||||
static CommandList *allocate(uint32_t numIddsPerBlock) { return new Type(numIddsPerBlock); }
|
||||
};
|
||||
|
||||
virtual ze_result_t close() = 0;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual ze_result_t appendEventReset(ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
const void **pRanges,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchMultipleFunctionsIndirect(uint32_t numFunctions, const ze_kernel_handle_t *phFunctions,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size,
|
||||
ze_memory_advice_t advice) = 0;
|
||||
virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0;
|
||||
virtual ze_result_t appendMemoryCopyRegion(void *dstPtr,
|
||||
const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch,
|
||||
uint32_t dstSlicePitch,
|
||||
const void *srcPtr,
|
||||
const ze_copy_region_t *srcRegion,
|
||||
uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hSignalEvent) = 0;
|
||||
virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern,
|
||||
size_t patternSize, size_t size, ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
||||
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0;
|
||||
virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0;
|
||||
virtual ze_result_t reset() = 0;
|
||||
|
||||
virtual ze_result_t appendMetricMemoryBarrier() = 0;
|
||||
virtual ze_result_t appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer,
|
||||
uint32_t value) = 0;
|
||||
virtual ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) = 0;
|
||||
virtual ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery,
|
||||
ze_event_handle_t hCompletionEvent) = 0;
|
||||
|
||||
virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0;
|
||||
virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0;
|
||||
virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0;
|
||||
virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0;
|
||||
virtual ze_result_t appendMIMath(void *aluArray, size_t aluCount) = 0;
|
||||
virtual ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) = 0;
|
||||
virtual ze_result_t appendMIBBEnd() = 0;
|
||||
virtual ze_result_t appendMINoop() = 0;
|
||||
|
||||
static CommandList *create(uint32_t productFamily, Device *device);
|
||||
static CommandList *createImmediate(uint32_t productFamily, Device *device,
|
||||
const ze_command_queue_desc_t *desc,
|
||||
bool internalUsage);
|
||||
|
||||
static CommandList *fromHandle(ze_command_list_handle_t handle) {
|
||||
return static_cast<CommandList *>(handle);
|
||||
}
|
||||
|
||||
inline ze_command_list_handle_t toHandle() { return this; }
|
||||
|
||||
uint32_t getCommandListPerThreadScratchSize() const {
|
||||
return commandListPerThreadScratchSize;
|
||||
}
|
||||
|
||||
NEO::PreemptionMode getCommandListPreemptionMode() const {
|
||||
return commandListPreemptionMode;
|
||||
}
|
||||
|
||||
NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *function);
|
||||
|
||||
std::vector<Kernel *> &getPrintfFunctionContainer() {
|
||||
return this->printfFunctionContainer;
|
||||
}
|
||||
|
||||
void storePrintfFunction(Kernel *function);
|
||||
void removeDeallocationContainerData();
|
||||
void removeHostPtrAllocations();
|
||||
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
|
||||
enum CommandListType : uint32_t {
|
||||
TYPE_REGULAR = 0u,
|
||||
TYPE_IMMEDIATE = 1u
|
||||
};
|
||||
|
||||
CommandQueue *cmdQImmediate = nullptr;
|
||||
uint32_t cmdListType = CommandListType::TYPE_REGULAR;
|
||||
const ze_command_queue_desc_t *cmdQImmediateDesc = nullptr;
|
||||
|
||||
Device *device = nullptr;
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
|
||||
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
|
||||
virtual bool initialize(Device *device) = 0;
|
||||
virtual ~CommandList();
|
||||
NEO::CommandContainer commandContainer;
|
||||
|
||||
protected:
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
uint32_t commandListPerThreadScratchSize = 0u;
|
||||
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
extern CommandListAllocatorFn commandListFactory[];
|
||||
extern CommandListAllocatorFn commandListFactoryImmediate[];
|
||||
|
||||
template <uint32_t productFamily, typename CommandListType>
|
||||
struct CommandListPopulateFactory {
|
||||
CommandListPopulateFactory() {
|
||||
commandListFactory[productFamily] = CommandList::Allocator<CommandListType>::allocate;
|
||||
}
|
||||
};
|
||||
|
||||
template <uint32_t productFamily, typename CommandListType>
|
||||
struct CommandListImmediatePopulateFactory {
|
||||
CommandListImmediatePopulateFactory() {
|
||||
commandListFactoryImmediate[productFamily] = CommandList::Allocator<CommandListType>::allocate;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
57
level_zero/core/source/cmdlist_extended/cmdlist_extended.inl
Normal file
57
level_zero/core/source/cmdlist_extended/cmdlist_extended.inl
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegReg(uint32_t reg1, uint32_t reg2) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegMem(uint32_t reg1, uint64_t address) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMIStoreRegMem(uint32_t reg1, uint64_t address) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMIMath(void *aluArray, size_t aluCount) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMIBBStart(uint64_t address,
|
||||
size_t predication,
|
||||
bool secondLevel) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMIBBEnd() {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMINoop() {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
175
level_zero/core/source/cmdlist_hw.h
Normal file
175
level_zero/core/source/cmdlist_hw.h
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/builtin_functions_lib.h"
|
||||
#include "level_zero/core/source/cmdlist_imp.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace NEO {
|
||||
enum class ImageType;
|
||||
}
|
||||
|
||||
namespace L0 {
|
||||
struct AlignedAllocationData {
|
||||
uintptr_t alignedAllocationPtr = 0u;
|
||||
size_t offset = 0u;
|
||||
NEO::GraphicsAllocation *alloc = nullptr;
|
||||
bool needsFlush = false;
|
||||
};
|
||||
|
||||
struct EventPool;
|
||||
struct Event;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandListCoreFamily : CommandListImp {
|
||||
using BaseClass = CommandListImp;
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
using CommandListImp::CommandListImp;
|
||||
CommandListCoreFamily() {}
|
||||
CommandListCoreFamily(uint32_t numIddsPerBlock) : CommandListImp(numIddsPerBlock) {}
|
||||
|
||||
bool initialize(Device *device) override;
|
||||
virtual void programL3(bool isSLMused);
|
||||
|
||||
ze_result_t close() override;
|
||||
ze_result_t appendEventReset(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchMultipleFunctionsIndirect(uint32_t numFunctions,
|
||||
const ze_kernel_handle_t *phFunctions,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendMemAdvise(ze_device_handle_t hDevice,
|
||||
const void *ptr, size_t size,
|
||||
ze_memory_advice_t advice) override;
|
||||
ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
|
||||
NEO::GraphicsAllocation *srcptr,
|
||||
size_t size,
|
||||
bool flushHost) override;
|
||||
ze_result_t appendMemoryCopyRegion(void *dstPtr,
|
||||
const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch,
|
||||
uint32_t dstSlicePitch,
|
||||
const void *srcPtr,
|
||||
const ze_copy_region_t *srcRegion,
|
||||
uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hSignalEvent) override;
|
||||
ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) override;
|
||||
ze_result_t appendMemoryFill(void *ptr, const void *pattern,
|
||||
size_t patternSize, size_t size,
|
||||
ze_event_handle_t hEvent) override;
|
||||
|
||||
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override;
|
||||
ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override;
|
||||
ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override;
|
||||
ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override;
|
||||
ze_result_t appendMIMath(void *aluArray, size_t aluCount) override;
|
||||
ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) override;
|
||||
ze_result_t appendMIBBEnd() override;
|
||||
ze_result_t appendMINoop() override;
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override;
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
ze_result_t reset() override;
|
||||
ze_result_t executeCommandListImmediate(bool performMigration) override;
|
||||
|
||||
protected:
|
||||
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset, void *srcPtr,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint32_t size,
|
||||
uint32_t elementSize, Builtin builtin);
|
||||
|
||||
ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
|
||||
ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents);
|
||||
static constexpr uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize * sizeof(uint8_t);
|
||||
|
||||
static constexpr uint32_t regGlobalTimestamp = 0x2358;
|
||||
|
||||
ze_result_t appendLaunchFunctionWithParams(ze_kernel_handle_t hFunction,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool isIndirect, bool isPredicate);
|
||||
|
||||
ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions);
|
||||
|
||||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
const void **pRanges);
|
||||
|
||||
ze_result_t setGroupSizeIndirect(uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);
|
||||
ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListProductFamily;
|
||||
|
||||
} // namespace L0
|
||||
1098
level_zero/core/source/cmdlist_hw.inl
Normal file
1098
level_zero/core/source/cmdlist_hw.inl
Normal file
File diff suppressed because it is too large
Load Diff
117
level_zero/core/source/cmdlist_hw_base.inl
Normal file
117
level_zero/core/source/cmdlist_hw_base.inl
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/unified_memory/unified_memory.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace L0 {
|
||||
struct DeviceImp;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchFunctionWithParams(ze_kernel_handle_t hFunction,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool isIndirect, bool isPredicate) {
|
||||
const auto function = Kernel::fromHandle(hFunction);
|
||||
UNRECOVERABLE_IF(function == nullptr);
|
||||
const auto functionImmutableData = function->getImmutableData();
|
||||
commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, function->getPerThreadScratchSize());
|
||||
|
||||
auto functionPreemptionMode = obtainFunctionPreemptionMode(function);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode);
|
||||
|
||||
if (!isIndirect) {
|
||||
function->setGroupCount(pThreadGroupDimensions->groupCountX,
|
||||
pThreadGroupDimensions->groupCountY,
|
||||
pThreadGroupDimensions->groupCountZ);
|
||||
}
|
||||
|
||||
if (isIndirect && pThreadGroupDimensions) {
|
||||
prepareIndirectParams(pThreadGroupDimensions);
|
||||
}
|
||||
|
||||
auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
UnifiedMemoryControls unifiedMemoryControls = function->getUnifiedMemoryControls();
|
||||
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed) {
|
||||
device->getDriverHandle()->getSvmAllocsManager()->makeInternalAllocationsResident(*csr, unifiedMemoryControls.generateMask());
|
||||
}
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer,
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), isIndirect, isPredicate, function,
|
||||
0, device->getNEODevice(), commandListPreemptionMode);
|
||||
|
||||
if (hEvent) {
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
|
||||
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = function->getResidencyContainer();
|
||||
for (auto resource : residencyContainer) {
|
||||
commandContainer.addToResidencyContainer(resource);
|
||||
}
|
||||
|
||||
if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
|
||||
storePrintfFunction(function);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (!hEvent) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
if (!event->isTimestampEvent) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t timeStampAddress = 0;
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
if (beforeWalker) {
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_START);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, regGlobalTimestamp, timeStampAddress);
|
||||
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_START);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||
} else {
|
||||
|
||||
// Local Context End SRM
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||
|
||||
// Global End PC
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_END);
|
||||
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
*(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
||||
timeStampAddress,
|
||||
0llu,
|
||||
dcFlushEnable,
|
||||
device->getHwInfo());
|
||||
}
|
||||
}
|
||||
} // namespace L0
|
||||
85
level_zero/core/source/cmdlist_hw_immediate.h
Normal file
85
level_zero/core/source/cmdlist_hw_immediate.h
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct EventPool;
|
||||
struct Event;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
|
||||
using BaseClass::BaseClass;
|
||||
CommandListCoreFamilyImmediate() {}
|
||||
CommandListCoreFamilyImmediate(uint32_t numIddsPerBlock) : CommandListCoreFamily<gfxCoreFamily>(numIddsPerBlock) {}
|
||||
|
||||
ze_result_t appendLaunchFunction(
|
||||
ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendLaunchFunctionIndirect(
|
||||
ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendBarrier(ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendMemoryCopy(void *dstptr,
|
||||
const void *srcptr,
|
||||
size_t size,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendMemoryCopyRegion(void *dstPtr,
|
||||
const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch,
|
||||
uint32_t dstSlicePitch,
|
||||
const void *srcPtr,
|
||||
const ze_copy_region_t *srcRegion,
|
||||
uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hSignalEvent) override;
|
||||
|
||||
ze_result_t appendMemoryFill(void *ptr, const void *pattern,
|
||||
size_t patternSize, size_t size,
|
||||
ze_event_handle_t hEvent) override;
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
|
||||
ze_result_t appendEventReset(ze_event_handle_t hEvent) override;
|
||||
|
||||
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) override;
|
||||
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override;
|
||||
|
||||
ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage,
|
||||
const void *srcPtr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendImageCopyToMemory(void *dstPtr,
|
||||
ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListImmediateProductFamily;
|
||||
|
||||
} // namespace L0
|
||||
153
level_zero/core/source/cmdlist_hw_immediate.inl
Normal file
153
level_zero/core/source/cmdlist_hw_immediate.inl
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.h"
|
||||
|
||||
namespace L0 {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchFunction(
|
||||
ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchFunction(hFunction, pThreadGroupDimensions, hEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchFunctionIndirect(
|
||||
ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchFunctionIndirect(hFunction, pDispatchArgumentsBuffer, hEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
const void *srcptr,
|
||||
size_t size,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegion(void *dstPtr,
|
||||
const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch,
|
||||
uint32_t dstSlicePitch,
|
||||
const void *srcPtr,
|
||||
const ze_copy_region_t *srcRegion,
|
||||
uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hSignalEvent) {
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, srcPtr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void *ptr, const void *pattern,
|
||||
size_t patternSize, size_t size,
|
||||
ze_event_handle_t hEvent) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hEvent);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hEvent);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_event_handle_t hEvent) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hEvent);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstptr, srcptr, size, flushHost);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(false);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phEvent);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMemory(ze_image_handle_t hDstImage,
|
||||
const void *srcPtr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemory(void *dstPtr,
|
||||
ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hEvent, numWaitEvents, phWaitEvents);
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
executeCommandListImmediate(true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} // namespace L0
|
||||
108
level_zero/core/source/cmdlist_imp.cpp
Normal file
108
level_zero/core/source/cmdlist_imp.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/cmdlist_imp.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
||||
CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
|
||||
|
||||
ze_result_t CommandListImp::destroy() {
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t CommandListImp::appendMetricMemoryBarrier() {
|
||||
return MetricQuery::appendMemoryBarrier(*this);
|
||||
}
|
||||
|
||||
ze_result_t CommandListImp::appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer,
|
||||
uint32_t value) {
|
||||
return MetricQuery::appendTracerMarker(*this, hMetricTracer, value);
|
||||
}
|
||||
|
||||
ze_result_t CommandListImp::appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) {
|
||||
return MetricQuery::fromHandle(hMetricQuery)->appendBegin(*this);
|
||||
}
|
||||
|
||||
ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery,
|
||||
ze_event_handle_t hCompletionEvent) {
|
||||
return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent);
|
||||
}
|
||||
|
||||
CommandList *CommandList::create(uint32_t productFamily, Device *device) {
|
||||
CommandListAllocatorFn allocator = nullptr;
|
||||
if (productFamily < IGFX_MAX_PRODUCT) {
|
||||
allocator = commandListFactory[productFamily];
|
||||
}
|
||||
|
||||
CommandListImp *commandList = nullptr;
|
||||
if (allocator) {
|
||||
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock));
|
||||
|
||||
commandList->initialize(device);
|
||||
}
|
||||
return commandList;
|
||||
}
|
||||
|
||||
CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device,
|
||||
const ze_command_queue_desc_t *desc,
|
||||
bool internalUsage) {
|
||||
|
||||
auto deviceImp = static_cast<DeviceImp *>(device);
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
if (internalUsage) {
|
||||
csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver;
|
||||
} else {
|
||||
csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
}
|
||||
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc);
|
||||
if (!commandQueue) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CommandListAllocatorFn allocator = nullptr;
|
||||
if (productFamily < IGFX_MAX_PRODUCT) {
|
||||
allocator = commandListFactoryImmediate[productFamily];
|
||||
}
|
||||
|
||||
CommandListImp *commandList = nullptr;
|
||||
if (allocator) {
|
||||
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
|
||||
|
||||
commandList->initialize(device);
|
||||
}
|
||||
|
||||
if (!commandList) {
|
||||
commandQueue->destroy();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
commandList->cmdQImmediate = commandQueue;
|
||||
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->cmdQImmediateDesc = desc;
|
||||
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
31
level_zero/core/source/cmdlist_imp.h
Normal file
31
level_zero/core/source/cmdlist_imp.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct CommandListImp : CommandList {
|
||||
CommandListImp() {}
|
||||
CommandListImp(uint32_t numIddsPerBlock) : CommandList(numIddsPerBlock) {}
|
||||
ze_result_t destroy() override;
|
||||
|
||||
ze_result_t appendMetricMemoryBarrier() override;
|
||||
ze_result_t appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer,
|
||||
uint32_t value) override;
|
||||
ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) override;
|
||||
ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery,
|
||||
ze_event_handle_t hCompletionEvent) override;
|
||||
|
||||
protected:
|
||||
~CommandListImp() override = default;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
160
level_zero/core/source/cmdqueue.cpp
Normal file
160
level_zero/core/source/cmdqueue.cpp
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/cmdqueue_imp.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
|
||||
#include "hw_helpers.h"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {};
|
||||
|
||||
ze_result_t CommandQueueImp::destroy() {
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void CommandQueueImp::initialize() {
|
||||
buffers.initialize(device, totalCmdBufferSize);
|
||||
NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation();
|
||||
commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(),
|
||||
defaultQueueCmdBufferSize);
|
||||
UNRECOVERABLE_IF(commandStream == nullptr);
|
||||
commandStream->replaceGraphicsAllocation(bufferAllocation);
|
||||
}
|
||||
|
||||
void CommandQueueImp::reserveLinearStreamSize(size_t size) {
|
||||
UNRECOVERABLE_IF(commandStream == nullptr);
|
||||
if (commandStream->getAvailableSpace() < size) {
|
||||
buffers.switchBuffers(csr);
|
||||
NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation();
|
||||
commandStream->replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(),
|
||||
defaultQueueCmdBufferSize);
|
||||
commandStream->replaceGraphicsAllocation(nextBufferAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr) {
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
|
||||
NEO::BatchBuffer batchBuffer(commandStream->getGraphicsAllocation(), offset, 0u, nullptr, false, false,
|
||||
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
||||
commandStream->getUsed(), commandStream, endingCmdPtr);
|
||||
|
||||
csr->submitBatchBuffer(batchBuffer, residencyContainer);
|
||||
buffers.setCurrentFlushStamp(csr->obtainCurrentFlushStamp());
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::synchronize(uint32_t timeout) {
|
||||
return synchronizeByPollingForTaskCount(timeout);
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint32_t timeout) {
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
|
||||
auto taskCountToWait = this->taskCount;
|
||||
|
||||
waitForTaskCountWithKmdNotifyFallbackHelper(csr, this->taskCount, 0, false, false);
|
||||
|
||||
bool enableTimeout = (timeout != std::numeric_limits<uint32_t>::max());
|
||||
csr->waitForCompletionWithTimeout(enableTimeout, timeout, this->taskCount);
|
||||
|
||||
if (*csr->getTagAddress() < taskCountToWait) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
printFunctionsPrintfOutput();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void CommandQueueImp::printFunctionsPrintfOutput() {
|
||||
size_t size = this->printfFunctionContainer.size();
|
||||
if (size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
this->printfFunctionContainer[i]->printPrintfOutput();
|
||||
}
|
||||
this->printfFunctionContainer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) {
|
||||
CommandQueueAllocatorFn allocator = nullptr;
|
||||
if (productFamily < IGFX_MAX_PRODUCT) {
|
||||
allocator = commandQueueFactory[productFamily];
|
||||
}
|
||||
|
||||
CommandQueueImp *commandQueue = nullptr;
|
||||
if (allocator) {
|
||||
commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc));
|
||||
|
||||
commandQueue->initialize();
|
||||
}
|
||||
return commandQueue;
|
||||
}
|
||||
|
||||
ze_result_t fenceCreate(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc,
|
||||
ze_fence_handle_t *phFence) {
|
||||
auto commandQueue = static_cast<CommandQueueImp *>(CommandQueue::fromHandle(hCommandQueue));
|
||||
return commandQueue->createFence(desc, phFence);
|
||||
}
|
||||
|
||||
ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() {
|
||||
return desc.mode;
|
||||
}
|
||||
|
||||
void CommandQueueImp::CommandBufferManager::initialize(Device *device, size_t sizeRequested) {
|
||||
size_t alignedSize = alignUp<size_t>(sizeRequested, MemoryConstants::pageSize64k);
|
||||
NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, alignedSize,
|
||||
NEO::GraphicsAllocation::AllocationType::COMMAND_BUFFER,
|
||||
device->isMultiDeviceCapable(),
|
||||
false,
|
||||
NEO::SubDevice::unspecifiedSubDeviceIndex};
|
||||
|
||||
buffers[BUFFER_ALLOCATION::FIRST] = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
|
||||
UNRECOVERABLE_IF(nullptr == buffers[BUFFER_ALLOCATION::FIRST]);
|
||||
memset(buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBufferSize());
|
||||
|
||||
buffers[BUFFER_ALLOCATION::SECOND] = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
|
||||
UNRECOVERABLE_IF(nullptr == buffers[BUFFER_ALLOCATION::SECOND]);
|
||||
memset(buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBufferSize());
|
||||
flushId[BUFFER_ALLOCATION::FIRST] = 0u;
|
||||
flushId[BUFFER_ALLOCATION::SECOND] = 0u;
|
||||
}
|
||||
|
||||
void CommandQueueImp::CommandBufferManager::destroy(NEO::MemoryManager *memoryManager) {
|
||||
memoryManager->freeGraphicsMemory(buffers[BUFFER_ALLOCATION::FIRST]);
|
||||
memoryManager->freeGraphicsMemory(buffers[BUFFER_ALLOCATION::SECOND]);
|
||||
}
|
||||
|
||||
void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) {
|
||||
if (bufferUse == BUFFER_ALLOCATION::FIRST) {
|
||||
bufferUse = BUFFER_ALLOCATION::SECOND;
|
||||
} else {
|
||||
bufferUse = BUFFER_ALLOCATION::FIRST;
|
||||
}
|
||||
|
||||
NEO::FlushStamp completionId = flushId[bufferUse];
|
||||
if (completionId != 0u) {
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
csr->waitForFlushStamp(completionId);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
76
level_zero/core/source/cmdqueue.h
Normal file
76
level_zero/core/source/cmdqueue.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include <level_zero/ze_common.h>
|
||||
#include <level_zero/ze_fence.h>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
struct _ze_command_queue_handle_t {};
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
}
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct CommandQueue : _ze_command_queue_handle_t {
|
||||
template <typename Type>
|
||||
struct Allocator {
|
||||
static CommandQueue *allocate(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) {
|
||||
return new Type(device, csr, desc);
|
||||
}
|
||||
};
|
||||
|
||||
virtual ~CommandQueue() = default;
|
||||
|
||||
virtual ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) = 0;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual ze_result_t executeCommandLists(uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
ze_fence_handle_t hFence, bool performMigration) = 0;
|
||||
virtual ze_result_t executeCommands(uint32_t numCommands,
|
||||
void *phCommands,
|
||||
ze_fence_handle_t hFence) = 0;
|
||||
virtual ze_result_t synchronize(uint32_t timeout) = 0;
|
||||
|
||||
static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc);
|
||||
|
||||
static CommandQueue *fromHandle(ze_command_queue_handle_t handle) {
|
||||
return static_cast<CommandQueue *>(handle);
|
||||
}
|
||||
|
||||
inline ze_command_queue_handle_t toHandle() { return this; }
|
||||
|
||||
inline void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) {
|
||||
commandQueuePreemptionMode = newPreemptionMode;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::atomic<uint32_t> commandQueuePerThreadScratchSize;
|
||||
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
|
||||
};
|
||||
|
||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc);
|
||||
extern CommandQueueAllocatorFn commandQueueFactory[];
|
||||
|
||||
template <uint32_t productFamily, typename CommandQueueType>
|
||||
struct CommandQueuePopulateFactory {
|
||||
CommandQueuePopulateFactory() {
|
||||
commandQueueFactory[productFamily] = CommandQueue::Allocator<CommandQueueType>::allocate;
|
||||
}
|
||||
};
|
||||
|
||||
ze_result_t fenceCreate(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc,
|
||||
ze_fence_handle_t *phFence);
|
||||
|
||||
ze_result_t fenceDestroy(ze_fence_handle_t hFence);
|
||||
|
||||
} // namespace L0
|
||||
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommands(uint32_t numCommandGraphs,
|
||||
void *phCommands,
|
||||
ze_fence_handle_t hFence) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
} // namespace L0
|
||||
50
level_zero/core/source/cmdqueue_hw.h
Normal file
50
level_zero/core/source/cmdqueue_hw.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/command_stream/submissions_aggregator.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
#include "shared/source/memory_manager/residency_container.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_imp.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandQueueHw : public CommandQueueImp {
|
||||
using CommandQueueImp::CommandQueueImp;
|
||||
|
||||
ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) override;
|
||||
ze_result_t destroy() override;
|
||||
ze_result_t executeCommandLists(uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
ze_fence_handle_t hFence, bool performMigration) override;
|
||||
ze_result_t executeCommands(uint32_t numCommands,
|
||||
void *phCommands,
|
||||
ze_fence_handle_t hFence) override;
|
||||
|
||||
void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) override;
|
||||
|
||||
void programGeneralStateBaseAddress(uint64_t gsba, NEO::LinearStream &commandStream);
|
||||
size_t estimateStateBaseAddressCmdSize();
|
||||
void programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream);
|
||||
|
||||
size_t estimateFrontEndCmdSize();
|
||||
size_t estimatePipelineSelect();
|
||||
void programPipelineSelect(NEO::LinearStream &commandStream);
|
||||
|
||||
void handleScratchSpace(NEO::ResidencyContainer &residency,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState);
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
316
level_zero/core/source/cmdqueue_hw.inl
Normal file
316
level_zero/core/source/cmdqueue_hw.inl
Normal file
@@ -0,0 +1,316 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/interlocked_max.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/cmdqueue_hw.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/fence.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include <limits>
|
||||
#include <thread>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandQueueHw<gfxCoreFamily>::createFence(const ze_fence_desc_t *desc,
|
||||
ze_fence_handle_t *phFence) {
|
||||
*phFence = Fence::create(this, desc);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandQueueHw<gfxCoreFamily>::destroy() {
|
||||
delete commandStream;
|
||||
buffers.destroy(this->getDevice()->getDriverHandle()->getMemoryManager());
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
size_t spaceForResidency = 0;
|
||||
size_t preemptionSize = 0u;
|
||||
constexpr size_t residencyContainerSpaceForPreemption = 2;
|
||||
constexpr size_t residencyContainerSpaceForFence = 1;
|
||||
constexpr size_t residencyContainerSpaceForTagWrite = 1;
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
NEO::PreemptionMode statePreemption = commandQueuePreemptionMode;
|
||||
auto devicePreemption = device->getDevicePreemptionMode();
|
||||
if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) {
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandQueuePreemptionMode,
|
||||
devicePreemption) +
|
||||
NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice) +
|
||||
NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice);
|
||||
statePreemption = devicePreemption;
|
||||
}
|
||||
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
||||
spaceForResidency += residencyContainerSpaceForPreemption;
|
||||
}
|
||||
|
||||
bool directSubmissionEnabled = csr->isDirectSubmissionEnabled();
|
||||
|
||||
NEO::ResidencyContainer residencyContainer;
|
||||
L0::Fence *fence = nullptr;
|
||||
|
||||
device->activateMetricGroups();
|
||||
|
||||
size_t totalCmdBuffers = 0;
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
|
||||
totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
|
||||
spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
preemptionSize += sizeof(PIPE_CONTROL);
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption);
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
|
||||
interlockedMax(commandQueuePerThreadScratchSize, commandList->getCommandListPerThreadScratchSize());
|
||||
}
|
||||
|
||||
size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
if (directSubmissionEnabled) {
|
||||
linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_START);
|
||||
} else {
|
||||
linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END);
|
||||
}
|
||||
|
||||
if (hFence) {
|
||||
fence = Fence::fromHandle(hFence);
|
||||
spaceForResidency += residencyContainerSpaceForFence;
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||
}
|
||||
|
||||
spaceForResidency += residencyContainerSpaceForTagWrite;
|
||||
|
||||
residencyContainer.reserve(spaceForResidency);
|
||||
|
||||
auto scratchSpaceController = csr->getScratchSpaceController();
|
||||
bool gsbaStateDirty = false;
|
||||
bool frontEndStateDirty = false;
|
||||
handleScratchSpace(residencyContainer,
|
||||
scratchSpaceController,
|
||||
gsbaStateDirty, frontEndStateDirty);
|
||||
|
||||
gsbaStateDirty |= !gsbaInit;
|
||||
frontEndStateDirty |= !frontEndInit;
|
||||
|
||||
if (!gpgpuEnabled) {
|
||||
linearStreamSizeEstimate += estimatePipelineSelect();
|
||||
}
|
||||
|
||||
if (frontEndStateDirty) {
|
||||
linearStreamSizeEstimate += estimateFrontEndCmdSize();
|
||||
}
|
||||
|
||||
if (gsbaStateDirty) {
|
||||
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||
|
||||
linearStreamSizeEstimate += preemptionSize;
|
||||
|
||||
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
|
||||
size_t padding = alignedSize - linearStreamSizeEstimate;
|
||||
reserveLinearStreamSize(alignedSize);
|
||||
NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize);
|
||||
|
||||
if (!gpgpuEnabled) {
|
||||
programPipelineSelect(child);
|
||||
}
|
||||
if (frontEndStateDirty) {
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child);
|
||||
}
|
||||
if (gsbaStateDirty) {
|
||||
programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child);
|
||||
}
|
||||
|
||||
if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) {
|
||||
NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation());
|
||||
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice);
|
||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||
devicePreemption,
|
||||
commandQueuePreemptionMode,
|
||||
csr->getPreemptionAllocation());
|
||||
commandQueuePreemptionMode = devicePreemption;
|
||||
statePreemption = commandQueuePreemptionMode;
|
||||
}
|
||||
|
||||
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
||||
residencyContainer.push_back(csr->getPreemptionAllocation());
|
||||
auto sipIsa = neoDevice->getBuiltIns()->getSipKernel(NEO::SipKernelType::Csr, *neoDevice).getSipAllocation();
|
||||
residencyContainer.push_back(sipIsa);
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
||||
auto cmdBufferCount = cmdBufferAllocations.size();
|
||||
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(child, false);
|
||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||
commandListPreemption,
|
||||
statePreemption,
|
||||
csr->getPreemptionAllocation());
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
|
||||
for (size_t iter = 0; iter < cmdBufferCount; iter++) {
|
||||
auto allocation = cmdBufferAllocations[iter];
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, allocation->getGpuAddress(), true);
|
||||
}
|
||||
|
||||
printfFunctionContainer.insert(printfFunctionContainer.end(),
|
||||
commandList->getPrintfFunctionContainer().begin(),
|
||||
commandList->getPrintfFunctionContainer().end());
|
||||
|
||||
NEO::PageFaultManager *pageFaultManager = nullptr;
|
||||
if (performMigration) {
|
||||
pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
if (pageFaultManager == nullptr) {
|
||||
performMigration = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
|
||||
if (residencyContainer.end() ==
|
||||
std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) {
|
||||
residencyContainer.push_back(alloc);
|
||||
|
||||
if (performMigration) {
|
||||
if (alloc &&
|
||||
(alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
|
||||
alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commandQueuePreemptionMode = statePreemption;
|
||||
|
||||
if (hFence) {
|
||||
residencyContainer.push_back(&fence->getAllocation());
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo());
|
||||
}
|
||||
|
||||
dispatchTaskCountWrite(child, true);
|
||||
residencyContainer.push_back(csr->getTagAllocation());
|
||||
void *endingCmd = nullptr;
|
||||
if (directSubmissionEnabled) {
|
||||
endingCmd = child.getSpace(0);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, 0ull, false);
|
||||
} else {
|
||||
MI_BATCH_BUFFER_END cmd = GfxFamily::cmdInitBatchBufferEnd;
|
||||
auto buffer = child.getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*(MI_BATCH_BUFFER_END *)buffer = cmd;
|
||||
}
|
||||
|
||||
if (padding) {
|
||||
void *paddingPtr = child.getSpace(padding);
|
||||
memset(paddingPtr, 0, padding);
|
||||
}
|
||||
|
||||
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), residencyContainer, endingCmd);
|
||||
|
||||
this->taskCount = csr->peekTaskCount();
|
||||
|
||||
csr->makeSurfacePackNonResident(residencyContainer);
|
||||
|
||||
if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) {
|
||||
this->synchronize(std::numeric_limits<uint32_t>::max());
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
NEO::PreambleHelper<GfxFamily>::programVFEState(&commandStream,
|
||||
device->getHwInfo(),
|
||||
commandQueuePerThreadScratchSize,
|
||||
scratchAddress,
|
||||
device->getMaxNumHwThreads(),
|
||||
csr->getOsContext().getEngineType());
|
||||
frontEndInit = true;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSize() {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
return NEO::PreambleHelper<GfxFamily>::getVFECommandsSize();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelect() {
|
||||
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
return NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programPipelineSelect(NEO::LinearStream &commandStream) {
|
||||
NEO::PipelineSelectArgs args = {0, 0};
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, args, device->getHwInfo());
|
||||
gpgpuEnabled = true;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
|
||||
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
||||
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
gpuAddress, taskCountToWrite, true, device->getHwInfo());
|
||||
}
|
||||
} // namespace L0
|
||||
82
level_zero/core/source/cmdqueue_hw_base.inl
Normal file
82
level_zero/core/source/cmdqueue_hw_base.inl
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/interlocked_max.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/state_base_address.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/cmdqueue_hw.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/fence.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programGeneralStateBaseAddress(uint64_t gsba, NEO::LinearStream &commandStream) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
PIPE_CONTROL *pcCmd = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pcCmd = GfxFamily::cmdInitPipeControl;
|
||||
|
||||
pcCmd->setTextureCacheInvalidationEnable(true);
|
||||
pcCmd->setDcFlushEnable(true);
|
||||
pcCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
auto gmmHelper = device->getNEODevice()->getGmmHelper();
|
||||
|
||||
NEO::StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(commandStream,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
gsba,
|
||||
true,
|
||||
(device->getMOCS(true, false) >> 1),
|
||||
device->getDriverHandle()->getMemoryManager()->getInternalHeapBaseAddress(0),
|
||||
true,
|
||||
gmmHelper,
|
||||
false);
|
||||
|
||||
gsbaInit = true;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
constexpr size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState) {
|
||||
|
||||
if (commandQueuePerThreadScratchSize > 0) {
|
||||
scratchController->setRequiredScratchSpace(nullptr, commandQueuePerThreadScratchSize, 0u, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
|
||||
residency.push_back(scratchAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
101
level_zero/core/source/cmdqueue_imp.h
Normal file
101
level_zero/core/source/cmdqueue_imp.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/command_stream/submissions_aggregator.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class LinearStream;
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
struct CommandList;
|
||||
struct Kernel;
|
||||
struct CommandQueueImp : public CommandQueue {
|
||||
class CommandBufferManager {
|
||||
public:
|
||||
enum BUFFER_ALLOCATION : uint32_t {
|
||||
FIRST = 0,
|
||||
SECOND,
|
||||
COUNT
|
||||
};
|
||||
|
||||
void initialize(Device *device, size_t sizeRequested);
|
||||
void destroy(NEO::MemoryManager *memoryManager);
|
||||
void switchBuffers(NEO::CommandStreamReceiver *csr);
|
||||
|
||||
NEO::GraphicsAllocation *getCurrentBufferAllocation() {
|
||||
return buffers[bufferUse];
|
||||
}
|
||||
|
||||
void setCurrentFlushStamp(NEO::FlushStamp flushStamp) {
|
||||
flushId[bufferUse] = flushStamp;
|
||||
}
|
||||
|
||||
private:
|
||||
NEO::GraphicsAllocation *buffers[BUFFER_ALLOCATION::COUNT];
|
||||
NEO::FlushStamp flushId[BUFFER_ALLOCATION::COUNT];
|
||||
BUFFER_ALLOCATION bufferUse = BUFFER_ALLOCATION::FIRST;
|
||||
};
|
||||
static constexpr size_t defaultQueueCmdBufferSize = 128 * MemoryConstants::kiloByte;
|
||||
static constexpr size_t minCmdBufferPtrAlign = 8;
|
||||
static constexpr size_t totalCmdBufferSize =
|
||||
defaultQueueCmdBufferSize +
|
||||
MemoryConstants::cacheLineSize +
|
||||
NEO::CSRequirements::csOverfetchSize;
|
||||
|
||||
CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
|
||||
: device(device), csr(csr), desc(*desc), commandStream(nullptr) {
|
||||
std::atomic_init(&commandQueuePerThreadScratchSize, 0u);
|
||||
}
|
||||
|
||||
ze_result_t destroy() override;
|
||||
|
||||
ze_result_t synchronize(uint32_t timeout) override;
|
||||
|
||||
void initialize();
|
||||
|
||||
Device *getDevice() { return device; }
|
||||
|
||||
uint32_t getTaskCount() { return taskCount; }
|
||||
|
||||
NEO::CommandStreamReceiver *getCsr() { return csr; }
|
||||
|
||||
void reserveLinearStreamSize(size_t size);
|
||||
ze_command_queue_mode_t getSynchronousMode();
|
||||
virtual void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) = 0;
|
||||
|
||||
protected:
|
||||
void submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr);
|
||||
|
||||
ze_result_t synchronizeByPollingForTaskCount(uint32_t timeout);
|
||||
|
||||
void printFunctionsPrintfOutput();
|
||||
|
||||
Device *device;
|
||||
NEO::CommandStreamReceiver *csr;
|
||||
const ze_command_queue_desc_t desc;
|
||||
NEO::LinearStream *commandStream;
|
||||
uint32_t taskCount = 0;
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
bool gsbaInit = false;
|
||||
bool frontEndInit = false;
|
||||
bool gpgpuEnabled = false;
|
||||
CommandBufferManager buffers;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
15
level_zero/core/source/compiler_interface/CMakeLists.txt
Normal file
15
level_zero/core/source/compiler_interface/CMakeLists.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(L0_SRCS_COMPILER_INTERFACE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/l0_reg_path.h
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY L0_SRCS_COMPILER_INTERFACE ${L0_SRCS_COMPILER_INTERFACE})
|
||||
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/compiler_interface/default_cache_config.h"
|
||||
|
||||
#include "level_zero/core/source/compiler_interface/default_l0_cache_config.h"
|
||||
|
||||
namespace NEO {
|
||||
CompilerCacheConfig getDefaultCompilerCacheConfig() {
|
||||
return L0::getDefaultL0CompilerCacheConfig();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#define L0_CACHE_LOCATION "l0_c_cache"
|
||||
|
||||
#include "level_zero/core/source/compiler_interface/default_l0_cache_config.h"
|
||||
|
||||
#include "shared/source/utilities/debug_settings_reader.h"
|
||||
|
||||
#include "level_zero/core/source/compiler_interface/l0_reg_path.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig() {
|
||||
NEO::CompilerCacheConfig ret;
|
||||
|
||||
std::string keyName = registryPath;
|
||||
keyName += "l0_c_cache_dir";
|
||||
std::unique_ptr<NEO::SettingsReader> settingsReader(NEO::SettingsReader::createOsReader(false, keyName));
|
||||
ret.cacheDir = settingsReader->getSetting(settingsReader->appSpecificLocation(keyName), static_cast<std::string>(L0_CACHE_LOCATION));
|
||||
|
||||
ret.cacheFileExtension = ".l0_c_cache";
|
||||
|
||||
return ret;
|
||||
}
|
||||
} // namespace L0
|
||||
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/compiler_interface/compiler_cache.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig();
|
||||
}
|
||||
11
level_zero/core/source/compiler_interface/l0_reg_path.h
Normal file
11
level_zero/core/source/compiler_interface/l0_reg_path.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
namespace L0 {
|
||||
extern const char *registryPath;
|
||||
} // namespace L0
|
||||
40
level_zero/core/source/cpu_page_fault_memory_manager.cpp
Normal file
40
level_zero/core/source/cpu_page_fault_memory_manager.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
#include "level_zero/core/source/driver_handle_imp.h"
|
||||
|
||||
namespace NEO {
|
||||
void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) {
|
||||
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
|
||||
|
||||
NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
|
||||
UNRECOVERABLE_IF(allocData == nullptr);
|
||||
|
||||
auto ret =
|
||||
deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->cpuAllocation,
|
||||
allocData->gpuAllocation,
|
||||
allocData->size, true);
|
||||
UNRECOVERABLE_IF(ret);
|
||||
}
|
||||
void PageFaultManager::transferToGpu(void *ptr, void *device) {
|
||||
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
|
||||
|
||||
NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
|
||||
UNRECOVERABLE_IF(allocData == nullptr);
|
||||
|
||||
auto ret =
|
||||
deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->gpuAllocation,
|
||||
allocData->cpuAllocation,
|
||||
allocData->size, false);
|
||||
UNRECOVERABLE_IF(ret);
|
||||
}
|
||||
} // namespace NEO
|
||||
14
level_zero/core/source/debug_manager.cpp
Normal file
14
level_zero/core/source/debug_manager.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include "level_zero/core/source/compiler_interface/l0_reg_path.h"
|
||||
|
||||
namespace NEO {
|
||||
DebugSettingsManager<globalDebugFunctionalityLevel> DebugManager(L0::registryPath);
|
||||
}
|
||||
14
level_zero/core/source/debugger.cpp
Normal file
14
level_zero/core/source/debugger.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/debugger/debugger.h"
|
||||
|
||||
#include "level_zero/core/source/debugger/debugger_l0.h"
|
||||
|
||||
std::unique_ptr<NEO::Debugger> NEO::Debugger::create(HardwareInfo *hwInfo) {
|
||||
return std::make_unique<L0::DebuggerL0>();
|
||||
}
|
||||
13
level_zero/core/source/debugger/CMakeLists.txt
Normal file
13
level_zero/core/source/debugger/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(L0_SRCS_DEBUGGER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.h
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY L0_SRCS_DEBUGGER ${L0_SRCS_DEBUGGER})
|
||||
14
level_zero/core/source/debugger/debugger_l0.cpp
Normal file
14
level_zero/core/source/debugger/debugger_l0.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/debugger/debugger_l0.h"
|
||||
|
||||
namespace L0 {
|
||||
bool DebuggerL0::isDebuggerActive() {
|
||||
return false;
|
||||
}
|
||||
} // namespace L0
|
||||
18
level_zero/core/source/debugger/debugger_l0.h
Normal file
18
level_zero/core/source/debugger/debugger_l0.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/debugger/debugger.h"
|
||||
|
||||
namespace L0 {
|
||||
class DebuggerL0 : public NEO::Debugger {
|
||||
public:
|
||||
bool isDebuggerActive() override;
|
||||
~DebuggerL0() override = default;
|
||||
};
|
||||
} // namespace L0
|
||||
114
level_zero/core/source/device.h
Normal file
114
level_zero/core/source/device.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/preemption_mode.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
|
||||
#include "level_zero/core/source/driver.h"
|
||||
#include "level_zero/core/source/driver_handle.h"
|
||||
#include <level_zero/ze_api.h>
|
||||
#include <level_zero/zet_api.h>
|
||||
|
||||
struct _ze_device_handle_t {};
|
||||
struct DeviceInfo;
|
||||
namespace NEO {
|
||||
class Device;
|
||||
class MemoryManager;
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
struct DriverHandle;
|
||||
struct BuiltinFunctionsLib;
|
||||
struct ExecutionEnvironment;
|
||||
struct MetricContext;
|
||||
|
||||
struct Device : _ze_device_handle_t {
|
||||
virtual uint32_t getRootDeviceIndex() = 0;
|
||||
virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0;
|
||||
virtual ze_result_t copyCommandList(ze_command_list_handle_t hCommandList,
|
||||
ze_command_list_handle_t *phCommandList) = 0;
|
||||
|
||||
virtual ze_result_t createCommandList(const ze_command_list_desc_t *desc,
|
||||
ze_command_list_handle_t *commandList) = 0;
|
||||
|
||||
virtual ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc,
|
||||
ze_command_list_handle_t *commandList) = 0;
|
||||
|
||||
virtual ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
|
||||
ze_command_queue_handle_t *commandQueue) = 0;
|
||||
|
||||
virtual ze_result_t createEventPool(const ze_event_pool_desc_t *desc,
|
||||
ze_event_pool_handle_t *phEventPool) = 0;
|
||||
virtual ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) = 0;
|
||||
|
||||
virtual ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
|
||||
ze_module_build_log_handle_t *buildLog) = 0;
|
||||
virtual ze_result_t createSampler(const ze_sampler_desc_t *pDesc,
|
||||
ze_sampler_handle_t *phSampler) = 0;
|
||||
virtual ze_result_t evictImage(ze_image_handle_t hImage) = 0;
|
||||
virtual ze_result_t evictMemory(void *ptr, size_t size) = 0;
|
||||
virtual ze_result_t
|
||||
getComputeProperties(ze_device_compute_properties_t *pComputeProperties) = 0;
|
||||
virtual ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice,
|
||||
ze_device_p2p_properties_t *pP2PProperties) = 0;
|
||||
virtual ze_result_t getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) = 0;
|
||||
virtual ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) = 0;
|
||||
virtual ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) = 0;
|
||||
virtual ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) = 0;
|
||||
virtual ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) = 0;
|
||||
virtual ze_result_t makeImageResident(ze_image_handle_t hImage) = 0;
|
||||
virtual ze_result_t makeMemoryResident(void *ptr, size_t size) = 0;
|
||||
virtual ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) = 0;
|
||||
virtual ze_result_t setLastLevelCacheConfig(ze_cache_config_t cacheConfig) = 0;
|
||||
virtual ze_result_t getCacheProperties(ze_device_cache_properties_t *pCacheProperties) = 0;
|
||||
virtual ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) = 0;
|
||||
virtual ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) = 0;
|
||||
|
||||
virtual ze_result_t systemBarrier() = 0;
|
||||
|
||||
virtual ze_result_t registerCLMemory(cl_context context, cl_mem mem, void **ptr) = 0;
|
||||
virtual ze_result_t registerCLProgram(cl_context context, cl_program program,
|
||||
ze_module_handle_t *phModule) = 0;
|
||||
virtual ze_result_t registerCLCommandQueue(cl_context context, cl_command_queue commandQueue,
|
||||
ze_command_queue_handle_t *phCommandQueue) = 0;
|
||||
virtual ~Device() = default;
|
||||
|
||||
virtual void *getExecEnvironment() = 0;
|
||||
virtual BuiltinFunctionsLib *getBuiltinFunctionsLib() = 0;
|
||||
virtual uint32_t getMOCS(bool l3enabled, bool l1enabled) = 0;
|
||||
virtual uint32_t getMaxNumHwThreads() const = 0;
|
||||
|
||||
virtual NEO::HwHelper &getHwHelper() = 0;
|
||||
virtual bool isMultiDeviceCapable() const = 0;
|
||||
virtual const NEO::HardwareInfo &getHwInfo() const = 0;
|
||||
virtual NEO::OSInterface &getOsInterface() = 0;
|
||||
virtual uint32_t getPlatformInfo() const = 0;
|
||||
virtual MetricContext &getMetricContext() = 0;
|
||||
|
||||
virtual ze_result_t activateMetricGroups(uint32_t count,
|
||||
zet_metric_group_handle_t *phMetricGroups) = 0;
|
||||
virtual void activateMetricGroups() = 0;
|
||||
|
||||
virtual DriverHandle *getDriverHandle() = 0;
|
||||
virtual void setDriverHandle(DriverHandle *driverHandle) = 0;
|
||||
|
||||
static Device *fromHandle(ze_device_handle_t handle) { return static_cast<Device *>(handle); }
|
||||
|
||||
inline ze_device_handle_t toHandle() { return this; }
|
||||
|
||||
static Device *create(DriverHandle *driverHandle, NEO::Device *neoDevice);
|
||||
|
||||
virtual NEO::PreemptionMode getDevicePreemptionMode() const = 0;
|
||||
virtual const DeviceInfo &getDeviceInfo() const = 0;
|
||||
virtual NEO::Device *getNEODevice() = 0;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
603
level_zero/core/source/device_imp.cpp
Normal file
603
level_zero/core/source/device_imp.cpp
Normal file
@@ -0,0 +1,603 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/sub_device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
|
||||
#include "opencl/source/device/device_info.h"
|
||||
#include "opencl/source/device/device_info_map.h"
|
||||
#include "opencl/source/mem_obj/mem_obj.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
|
||||
#include "level_zero/core/source/builtin_functions_lib.h"
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/cmdqueue.h"
|
||||
#include "level_zero/core/source/driver_handle_imp.h"
|
||||
#include "level_zero/core/source/event.h"
|
||||
#include "level_zero/core/source/image.h"
|
||||
#include "level_zero/core/source/memory_operations_helper.h"
|
||||
#include "level_zero/core/source/module.h"
|
||||
#include "level_zero/core/source/printf_handler.h"
|
||||
#include "level_zero/core/source/sampler.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include "hw_helpers.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
uint32_t DeviceImp::getRootDeviceIndex() {
|
||||
return neoDevice->getRootDeviceIndex();
|
||||
}
|
||||
|
||||
DriverHandle *DeviceImp::getDriverHandle() {
|
||||
return this->driverHandle;
|
||||
}
|
||||
|
||||
void DeviceImp::setDriverHandle(DriverHandle *driverHandle) {
|
||||
this->driverHandle = driverHandle;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) {
|
||||
*value = false;
|
||||
if (NEO::DebugManager.flags.CreateMultipleRootDevices.get() > 0) {
|
||||
*value = true;
|
||||
}
|
||||
if (NEO::DebugManager.flags.CreateMultipleSubDevices.get() > 0) {
|
||||
*value = true;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::copyCommandList(ze_command_list_handle_t hCommandList,
|
||||
ze_command_list_handle_t *phCommandList) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
|
||||
ze_command_list_handle_t *commandList) {
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
*commandList = CommandList::create(productFamily, this);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
|
||||
ze_command_list_handle_t *phCommandList) {
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
|
||||
ze_command_queue_handle_t *commandQueue) {
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
*commandQueue = CommandQueue::create(productFamily, this, csr, desc);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createEventPool(const ze_event_pool_desc_t *desc,
|
||||
ze_event_pool_handle_t *eventPool) {
|
||||
*eventPool = EventPool::create(this, desc);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) {
|
||||
if (desc->format.layout >= ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_Y8) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
*phImage = Image::create(productFamily, this, desc);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc,
|
||||
ze_sampler_handle_t *sampler) {
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
*sampler = Sampler::create(productFamily, this, desc);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
|
||||
ze_module_build_log_handle_t *buildLog) {
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
|
||||
if (buildLog) {
|
||||
moduleBuildLog = ModuleBuildLog::create();
|
||||
*buildLog = moduleBuildLog->toHandle();
|
||||
}
|
||||
auto modulePtr = Module::create(this, desc, neoDevice, moduleBuildLog);
|
||||
if (modulePtr == nullptr) {
|
||||
return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
}
|
||||
|
||||
*module = modulePtr;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::evictImage(ze_image_handle_t hImage) {
|
||||
auto alloc = Image::fromHandle(hImage)->getAllocation();
|
||||
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
|
||||
auto success = memoryOperationsIface->evict(*alloc);
|
||||
return changeMemoryOperationStatusToL0ResultType(success);
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::evictMemory(void *ptr, size_t size) {
|
||||
auto alloc = getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr);
|
||||
if (alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
|
||||
auto success = memoryOperationsIface->evict(*alloc->gpuAllocation);
|
||||
return changeMemoryOperationStatusToL0ResultType(success);
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getComputeProperties(ze_device_compute_properties_t *pComputeProperties) {
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
|
||||
pComputeProperties->maxTotalGroupSize = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize);
|
||||
|
||||
pComputeProperties->maxGroupSizeX = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[0]);
|
||||
pComputeProperties->maxGroupSizeY = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[1]);
|
||||
pComputeProperties->maxGroupSizeZ = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[2]);
|
||||
|
||||
pComputeProperties->maxGroupCountX = 0xffffffff;
|
||||
pComputeProperties->maxGroupCountY = 0xffffffff;
|
||||
pComputeProperties->maxGroupCountZ = 0xffffffff;
|
||||
|
||||
pComputeProperties->maxSharedLocalMemory = static_cast<uint32_t>(deviceInfo.localMemSize);
|
||||
|
||||
pComputeProperties->numSubGroupSizes = static_cast<uint32_t>(deviceInfo.maxSubGroups.size());
|
||||
|
||||
for (uint32_t i = 0; i < pComputeProperties->numSubGroupSizes; ++i) {
|
||||
pComputeProperties->subGroupSizes[i] = static_cast<uint32_t>(deviceInfo.maxSubGroups[i]);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getP2PProperties(ze_device_handle_t hPeerDevice,
|
||||
ze_device_p2p_properties_t *pP2PProperties) {
|
||||
pP2PProperties->accessSupported = true;
|
||||
pP2PProperties->atomicsSupported = false;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) {
|
||||
if (*pCount == 0) {
|
||||
*pCount = 1;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (*pCount > 1) {
|
||||
*pCount = 1;
|
||||
}
|
||||
|
||||
if (nullptr == pMemProperties) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
|
||||
pMemProperties->maxClockRate = deviceInfo.maxClockFrequency;
|
||||
pMemProperties->maxBusWidth = deviceInfo.addressBits;
|
||||
pMemProperties->totalSize = deviceInfo.globalMemSize;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) {
|
||||
pMemAccessProperties->hostAllocCapabilities =
|
||||
static_cast<ze_memory_access_capabilities_t>(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS);
|
||||
pMemAccessProperties->deviceAllocCapabilities =
|
||||
static_cast<ze_memory_access_capabilities_t>(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS);
|
||||
pMemAccessProperties->sharedSingleDeviceAllocCapabilities =
|
||||
static_cast<ze_memory_access_capabilities_t>(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS);
|
||||
pMemAccessProperties->sharedCrossDeviceAllocCapabilities =
|
||||
ze_memory_access_capabilities_t{};
|
||||
pMemAccessProperties->sharedSystemAllocCapabilities =
|
||||
ze_memory_access_capabilities_t{};
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static constexpr ze_fp_capabilities_t defaultFpFlags = static_cast<ze_fp_capabilities_t>(ZE_FP_CAPS_ROUND_TO_NEAREST |
|
||||
ZE_FP_CAPS_ROUND_TO_ZERO |
|
||||
ZE_FP_CAPS_ROUND_TO_INF |
|
||||
ZE_FP_CAPS_INF_NAN |
|
||||
ZE_FP_CAPS_DENORM |
|
||||
ZE_FP_CAPS_FMA);
|
||||
|
||||
ze_result_t DeviceImp::getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) {
|
||||
const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
std::string ilVersion = deviceInfo.ilVersion;
|
||||
size_t majorVersionPos = ilVersion.find('_');
|
||||
size_t minorVersionPos = ilVersion.find('.');
|
||||
|
||||
if (majorVersionPos != std::string::npos && minorVersionPos != std::string::npos) {
|
||||
uint32_t majorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(majorVersionPos + 1, minorVersionPos)));
|
||||
uint32_t minorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(minorVersionPos + 1)));
|
||||
pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
pKernelProperties->fp16Supported = true;
|
||||
pKernelProperties->int64AtomicsSupported = hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics;
|
||||
pKernelProperties->fp64Supported = hardwareInfo.capabilityTable.ftrSupportsFP64;
|
||||
pKernelProperties->halfFpCapabilities = defaultFpFlags;
|
||||
pKernelProperties->singleFpCapabilities = hardwareInfo.capabilityTable.ftrSupports64BitMath ? ZE_FP_CAPS_ROUNDED_DIVIDE_SQRT : ZE_FP_CAPS_NONE;
|
||||
pKernelProperties->doubleFpCapabilities = hardwareInfo.capabilityTable.ftrSupportsFP64 ? defaultFpFlags : ZE_FP_CAPS_NONE;
|
||||
|
||||
pKernelProperties->nativeKernelSupported.id[0] = 0;
|
||||
|
||||
processAdditionalKernelProperties(hwHelper, pKernelProperties);
|
||||
|
||||
pKernelProperties->maxArgumentsSize = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_MAX_PARAMETER_SIZE>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
pKernelProperties->printfBufferSize = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_PRINTF_BUFFER_SIZE>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) {
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
pDeviceProperties->type = ZE_DEVICE_TYPE_GPU;
|
||||
|
||||
pDeviceProperties->vendorId = deviceInfo.vendorId;
|
||||
|
||||
pDeviceProperties->deviceId = hardwareInfo.platform.usDeviceID;
|
||||
|
||||
uint32_t rootDeviceIndex = this->neoDevice->getRootDeviceIndex();
|
||||
|
||||
memcpy_s(pDeviceProperties->uuid.id, sizeof(uint32_t), &pDeviceProperties->vendorId, sizeof(pDeviceProperties->vendorId));
|
||||
memcpy_s(pDeviceProperties->uuid.id + sizeof(uint32_t), sizeof(uint32_t), &pDeviceProperties->deviceId, sizeof(pDeviceProperties->deviceId));
|
||||
memcpy_s(pDeviceProperties->uuid.id + (2 * sizeof(uint32_t)), sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex));
|
||||
|
||||
pDeviceProperties->isSubdevice = isSubdevice;
|
||||
|
||||
pDeviceProperties->subdeviceId = isSubdevice ? static_cast<NEO::SubDevice *>(neoDevice)->getSubDeviceIndex() : 0;
|
||||
|
||||
pDeviceProperties->coreClockRate = deviceInfo.maxClockFrequency;
|
||||
|
||||
pDeviceProperties->unifiedMemorySupported = true;
|
||||
|
||||
pDeviceProperties->eccMemorySupported = static_cast<ze_bool_t>(DeviceInfoTable::Map<CL_DEVICE_ERROR_CORRECTION_SUPPORT>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
pDeviceProperties->onDemandPageFaultsSupported = true;
|
||||
|
||||
pDeviceProperties->maxCommandQueues = deviceInfo.maxOnDeviceQueues;
|
||||
|
||||
pDeviceProperties->numAsyncComputeEngines = static_cast<uint32_t>(hwHelper.getGpgpuEngineInstances(hardwareInfo).size());
|
||||
|
||||
pDeviceProperties->numAsyncCopyEngines = 1;
|
||||
|
||||
pDeviceProperties->maxCommandQueuePriority = 0;
|
||||
|
||||
pDeviceProperties->numThreadsPerEU = deviceInfo.numThreadsPerEU;
|
||||
|
||||
pDeviceProperties->physicalEUSimdWidth = hwHelper.getMinimalSIMDSize();
|
||||
|
||||
pDeviceProperties->numEUsPerSubslice = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
|
||||
pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.SubSliceCount / hardwareInfo.gtSystemInfo.SliceCount;
|
||||
|
||||
pDeviceProperties->numSlices = hardwareInfo.gtSystemInfo.SliceCount * this->numSubDevices;
|
||||
|
||||
pDeviceProperties->timerResolution = static_cast<uint64_t>(DeviceInfoTable::Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
std::string name = "Intel(R) ";
|
||||
name += NEO::familyName[hardwareInfo.platform.eRenderCoreFamily];
|
||||
name += '\0';
|
||||
memcpy_s(pDeviceProperties->name, name.length(), name.c_str(), name.length());
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) {
|
||||
if (*pCount == 0) {
|
||||
*pCount = this->numSubDevices;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (phSubdevices == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (*pCount > this->numSubDevices) {
|
||||
*pCount = this->numSubDevices;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
phSubdevices[i] = this->subDevices[i];
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::makeImageResident(ze_image_handle_t hImage) {
|
||||
auto alloc = Image::fromHandle(hImage)->getAllocation();
|
||||
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
|
||||
auto success = memoryOperationsIface->makeResident(ArrayRef<NEO::GraphicsAllocation *>(&alloc, 1));
|
||||
return changeMemoryOperationStatusToL0ResultType(success);
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::makeMemoryResident(void *ptr, size_t size) {
|
||||
auto alloc = getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr);
|
||||
if (alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
|
||||
auto success = memoryOperationsIface->makeResident(ArrayRef<NEO::GraphicsAllocation *>(&alloc->gpuAllocation, 1));
|
||||
return changeMemoryOperationStatusToL0ResultType(success);
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::setIntermediateCacheConfig(ze_cache_config_t cacheConfig) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::setLastLevelCacheConfig(ze_cache_config_t cacheConfig) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getCacheProperties(ze_device_cache_properties_t *pCacheProperties) {
|
||||
const auto &hardwareInfo = this->getHwInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
pCacheProperties->intermediateCacheControlSupported = false;
|
||||
|
||||
pCacheProperties->intermediateCacheSize = getIntermediateCacheSize(hardwareInfo);
|
||||
|
||||
pCacheProperties->intermediateCachelineSize = 0;
|
||||
|
||||
pCacheProperties->lastLevelCacheSizeControlSupported = hwHelper.isL3Configurable(hardwareInfo);
|
||||
|
||||
pCacheProperties->lastLevelCacheSize = static_cast<size_t>(hardwareInfo.gtSystemInfo.L3CacheSizeInKb * KB);
|
||||
|
||||
pCacheProperties->lastLevelCachelineSize = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::imageGetProperties(const ze_image_desc_t *desc,
|
||||
ze_image_properties_t *pImageProperties) {
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
|
||||
if (deviceInfo.imageSupport) {
|
||||
pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAGS_LINEAR;
|
||||
} else {
|
||||
pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAGS_NONE;
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) {
|
||||
const auto &deviceInfo = this->neoDevice->getDeviceInfo();
|
||||
|
||||
pDeviceImageProperties->supported = deviceInfo.imageSupport;
|
||||
pDeviceImageProperties->maxImageDims1D = static_cast<uint32_t>(deviceInfo.image2DMaxWidth);
|
||||
pDeviceImageProperties->maxImageDims2D = static_cast<uint32_t>(deviceInfo.image2DMaxHeight);
|
||||
pDeviceImageProperties->maxImageDims3D = static_cast<uint32_t>(deviceInfo.image3DMaxDepth);
|
||||
pDeviceImageProperties->maxImageBufferSize = static_cast<uint64_t>(DeviceInfoTable::Map<CL_DEVICE_IMAGE_MAX_BUFFER_SIZE>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
pDeviceImageProperties->maxImageArraySlices = static_cast<uint32_t>(deviceInfo.imageMaxArraySize);
|
||||
pDeviceImageProperties->maxSamplers = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_MAX_SAMPLERS>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
pDeviceImageProperties->maxReadImageArgs = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_MAX_READ_IMAGE_ARGS>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
pDeviceImageProperties->maxWriteImageArgs = static_cast<uint32_t>(DeviceInfoTable::Map<CL_DEVICE_MAX_WRITE_IMAGE_ARGS>::getValue(this->neoDevice->getDeviceInfo()));
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; }
|
||||
|
||||
ze_result_t DeviceImp::activateMetricGroups(uint32_t count,
|
||||
zet_metric_group_handle_t *phMetricGroups) {
|
||||
return metricContext->activateMetricGroupsDeferred(count, phMetricGroups);
|
||||
}
|
||||
|
||||
void *DeviceImp::getExecEnvironment() { return execEnvironment; }
|
||||
|
||||
BuiltinFunctionsLib *DeviceImp::getBuiltinFunctionsLib() { return builtins.get(); }
|
||||
|
||||
uint32_t DeviceImp::getMOCS(bool l3enabled, bool l1enabled) {
|
||||
return getHwHelper().getMocsIndex(*getNEODevice()->getGmmHelper(), l3enabled, l1enabled) << 1;
|
||||
}
|
||||
|
||||
NEO::HwHelper &DeviceImp::getHwHelper() {
|
||||
const auto &hardwareInfo = neoDevice->getHardwareInfo();
|
||||
return NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
}
|
||||
|
||||
NEO::OSInterface &DeviceImp::getOsInterface() { return *neoDevice->getOSTime()->getOSInterface(); }
|
||||
|
||||
uint32_t DeviceImp::getPlatformInfo() const {
|
||||
const auto &hardwareInfo = neoDevice->getHardwareInfo();
|
||||
return hardwareInfo.platform.eRenderCoreFamily;
|
||||
}
|
||||
|
||||
MetricContext &DeviceImp::getMetricContext() { return *metricContext; }
|
||||
|
||||
void DeviceImp::activateMetricGroups() {
|
||||
if (metricContext != nullptr) {
|
||||
metricContext->activateMetricGroups();
|
||||
}
|
||||
}
|
||||
uint32_t DeviceImp::getMaxNumHwThreads() const { return maxNumHwThreads; }
|
||||
|
||||
ze_result_t DeviceImp::registerCLMemory(cl_context context, cl_mem mem, void **ptr) {
|
||||
NEO::MemObj *memObj = static_cast<NEO::MemObj *>(mem);
|
||||
NEO::GraphicsAllocation *graphicsAllocation = memObj->getGraphicsAllocation();
|
||||
DEBUG_BREAK_IF(graphicsAllocation == nullptr);
|
||||
|
||||
auto allocation = getDriverHandle()->allocateManagedMemoryFromHostPtr(
|
||||
this, graphicsAllocation->getUnderlyingBuffer(),
|
||||
graphicsAllocation->getUnderlyingBufferSize(), nullptr);
|
||||
|
||||
*ptr = allocation->getUnderlyingBuffer();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::registerCLProgram(cl_context context, cl_program program,
|
||||
ze_module_handle_t *phModule) {
|
||||
NEO::Program *neoProgram = static_cast<NEO::Program *>(program);
|
||||
|
||||
if (neoProgram->getIsSpirV()) {
|
||||
size_t deviceBinarySize = 0;
|
||||
if (0 != neoProgram->getInfo(CL_PROGRAM_BINARY_SIZES, sizeof(deviceBinarySize), &deviceBinarySize, nullptr)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> deviceBinary;
|
||||
deviceBinary.resize(deviceBinarySize);
|
||||
auto deviceBinaryPtr = deviceBinary.data();
|
||||
if (0 != neoProgram->getInfo(CL_PROGRAM_BINARIES, sizeof(void *), &deviceBinaryPtr, nullptr)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
ze_module_desc_t module_desc;
|
||||
module_desc.version = ZE_MODULE_DESC_VERSION_CURRENT;
|
||||
module_desc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
module_desc.inputSize = deviceBinarySize;
|
||||
module_desc.pInputModule = deviceBinary.data();
|
||||
module_desc.pBuildFlags = nullptr;
|
||||
|
||||
return createModule(&module_desc, phModule, nullptr);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t DeviceImp::registerCLCommandQueue(cl_context context, cl_command_queue commandQueue,
|
||||
ze_command_queue_handle_t *phCommandQueue) {
|
||||
ze_command_queue_desc_t desc;
|
||||
desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT;
|
||||
desc.flags = ZE_COMMAND_QUEUE_FLAG_NONE;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
|
||||
desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
*phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
const NEO::HardwareInfo &DeviceImp::getHwInfo() const { return neoDevice->getHardwareInfo(); }
|
||||
|
||||
bool DeviceImp::isMultiDeviceCapable() const {
|
||||
return neoDevice->getNumAvailableDevices() > 1u;
|
||||
}
|
||||
|
||||
Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice) {
|
||||
auto device = new DeviceImp;
|
||||
UNRECOVERABLE_IF(device == nullptr);
|
||||
|
||||
device->setDriverHandle(driverHandle);
|
||||
|
||||
device->neoDevice = neoDevice;
|
||||
neoDevice->incRefInternal();
|
||||
|
||||
device->execEnvironment = (void *)neoDevice->getExecutionEnvironment();
|
||||
device->metricContext = MetricContext::create(*device);
|
||||
device->builtins = BuiltinFunctionsLib::create(
|
||||
device, neoDevice->getBuiltIns());
|
||||
device->maxNumHwThreads = NEO::HwHelper::getMaxThreadsForVfe(neoDevice->getHardwareInfo());
|
||||
|
||||
if (device->neoDevice->getNumAvailableDevices() == 1) {
|
||||
device->numSubDevices = 0;
|
||||
} else {
|
||||
device->numSubDevices = device->neoDevice->getNumAvailableDevices();
|
||||
for (uint32_t i = 0; i < device->numSubDevices; i++) {
|
||||
ze_device_handle_t subDevice = Device::create(driverHandle,
|
||||
device->neoDevice->getDeviceById(i));
|
||||
if (subDevice == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
reinterpret_cast<DeviceImp *>(subDevice)->isSubdevice = true;
|
||||
device->subDevices.push_back(static_cast<Device *>(subDevice));
|
||||
}
|
||||
}
|
||||
|
||||
if (neoDevice->getCompilerInterface()) {
|
||||
device->getBuiltinFunctionsLib()->initFunctions();
|
||||
device->getBuiltinFunctionsLib()->initPageFaultFunction();
|
||||
}
|
||||
|
||||
auto supportDualStorageSharedMemory = device->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(device->neoDevice->getRootDeviceIndex());
|
||||
if (NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
|
||||
supportDualStorageSharedMemory = NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
|
||||
}
|
||||
|
||||
if (supportDualStorageSharedMemory) {
|
||||
ze_command_queue_desc_t cmdQueueDesc;
|
||||
cmdQueueDesc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT;
|
||||
cmdQueueDesc.ordinal = 0;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
device->pageFaultCommandList =
|
||||
CommandList::createImmediate(
|
||||
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true);
|
||||
}
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
DeviceImp::~DeviceImp() {
|
||||
for (uint32_t i = 0; i < this->numSubDevices; i++) {
|
||||
delete this->subDevices[i];
|
||||
}
|
||||
if (this->pageFaultCommandList) {
|
||||
this->pageFaultCommandList->destroy();
|
||||
this->pageFaultCommandList = nullptr;
|
||||
}
|
||||
metricContext.reset();
|
||||
builtins.reset();
|
||||
if (neoDevice) {
|
||||
neoDevice->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
NEO::PreemptionMode DeviceImp::getDevicePreemptionMode() const {
|
||||
return neoDevice->getPreemptionMode();
|
||||
}
|
||||
|
||||
const DeviceInfo &DeviceImp::getDeviceInfo() const {
|
||||
return neoDevice->getDeviceInfo();
|
||||
}
|
||||
|
||||
NEO::Device *DeviceImp::getNEODevice() {
|
||||
return neoDevice;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
95
level_zero/core/source/device_imp.h
Normal file
95
level_zero/core/source/device_imp.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/builtin_functions_lib.h"
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/driver_handle.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
#include "level_zero/tools/source/tracing/tracing.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct DeviceImp : public Device {
|
||||
uint32_t getRootDeviceIndex() override;
|
||||
ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override;
|
||||
ze_result_t copyCommandList(ze_command_list_handle_t hCommandList,
|
||||
ze_command_list_handle_t *phCommandList) override;
|
||||
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
|
||||
ze_command_list_handle_t *commandList) override;
|
||||
ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc,
|
||||
ze_command_list_handle_t *phCommandList) override;
|
||||
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
|
||||
ze_command_queue_handle_t *commandQueue) override;
|
||||
ze_result_t createEventPool(const ze_event_pool_desc_t *desc,
|
||||
ze_event_pool_handle_t *eventPool) override;
|
||||
ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) override;
|
||||
ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
|
||||
ze_module_build_log_handle_t *buildLog) override;
|
||||
ze_result_t createSampler(const ze_sampler_desc_t *pDesc,
|
||||
ze_sampler_handle_t *phSampler) override;
|
||||
ze_result_t evictImage(ze_image_handle_t hImage) override;
|
||||
ze_result_t evictMemory(void *ptr, size_t size) override;
|
||||
ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) override;
|
||||
ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice,
|
||||
ze_device_p2p_properties_t *pP2PProperties) override;
|
||||
ze_result_t getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) override;
|
||||
ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) override;
|
||||
ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) override;
|
||||
ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) override;
|
||||
ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) override;
|
||||
ze_result_t makeImageResident(ze_image_handle_t hImage) override;
|
||||
ze_result_t makeMemoryResident(void *ptr, size_t size) override;
|
||||
ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) override;
|
||||
ze_result_t setLastLevelCacheConfig(ze_cache_config_t cacheConfig) override;
|
||||
ze_result_t getCacheProperties(ze_device_cache_properties_t *pCacheProperties) override;
|
||||
ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) override;
|
||||
ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) override;
|
||||
ze_result_t systemBarrier() override;
|
||||
void *getExecEnvironment() override;
|
||||
BuiltinFunctionsLib *getBuiltinFunctionsLib() override;
|
||||
uint32_t getMOCS(bool l3enabled, bool l1enabled) override;
|
||||
NEO::HwHelper &getHwHelper() override;
|
||||
bool isMultiDeviceCapable() const override;
|
||||
const NEO::HardwareInfo &getHwInfo() const override;
|
||||
NEO::OSInterface &getOsInterface() override;
|
||||
uint32_t getPlatformInfo() const override;
|
||||
MetricContext &getMetricContext() override;
|
||||
uint32_t getMaxNumHwThreads() const override;
|
||||
ze_result_t registerCLMemory(cl_context context, cl_mem mem, void **ptr) override;
|
||||
ze_result_t registerCLProgram(cl_context context, cl_program program,
|
||||
ze_module_handle_t *phModule) override;
|
||||
ze_result_t registerCLCommandQueue(cl_context context, cl_command_queue commandQueue,
|
||||
ze_command_queue_handle_t *phCommandQueue) override;
|
||||
ze_result_t activateMetricGroups(uint32_t count,
|
||||
zet_metric_group_handle_t *phMetricGroups) override;
|
||||
|
||||
DriverHandle *getDriverHandle() override;
|
||||
void setDriverHandle(DriverHandle *driverHandle) override;
|
||||
NEO::PreemptionMode getDevicePreemptionMode() const override;
|
||||
const DeviceInfo &getDeviceInfo() const override;
|
||||
NEO::Device *getNEODevice() override;
|
||||
void activateMetricGroups() override;
|
||||
void processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_kernel_properties_t *pKernelProperties);
|
||||
|
||||
~DeviceImp() override;
|
||||
|
||||
NEO::Device *neoDevice = nullptr;
|
||||
bool isSubdevice = false;
|
||||
void *execEnvironment = nullptr;
|
||||
std::unique_ptr<BuiltinFunctionsLib> builtins = nullptr;
|
||||
std::unique_ptr<MetricContext> metricContext = nullptr;
|
||||
uint32_t maxNumHwThreads = 0;
|
||||
uint32_t numSubDevices = 0;
|
||||
std::vector<Device *> subDevices;
|
||||
DriverHandle *driverHandle = nullptr;
|
||||
CommandList *pageFaultCommandList = nullptr;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
11
level_zero/core/source/dll/CMakeLists.txt
Normal file
11
level_zero/core/source/dll/CMakeLists.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(L0_SRCS_DLL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/disallow_deferred_deleter.cpp
|
||||
)
|
||||
set_property(GLOBAL PROPERTY L0_SRCS_DLL ${L0_SRCS_DLL})
|
||||
12
level_zero/core/source/dll/disallow_deferred_deleter.cpp
Normal file
12
level_zero/core/source/dll/disallow_deferred_deleter.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/deferred_deleter_helper.h"
|
||||
|
||||
bool NEO::isDeferredDeleterEnabled() {
|
||||
return false;
|
||||
} // namespace NEO
|
||||
79
level_zero/core/source/driver.cpp
Normal file
79
level_zero/core/source/driver.cpp
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/driver.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/driver_handle.h"
|
||||
#include "level_zero/core/source/driver_imp.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
ze_driver_handle_t GlobalDrivers[1];
|
||||
uint32_t driverCount = 1;
|
||||
|
||||
void DriverImp::initialize(bool *result) {
|
||||
*result = false;
|
||||
|
||||
auto executionEnvironment = new NEO::ExecutionEnvironment();
|
||||
UNRECOVERABLE_IF(nullptr == executionEnvironment);
|
||||
|
||||
executionEnvironment->incRefInternal();
|
||||
auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment);
|
||||
executionEnvironment->decRefInternal();
|
||||
if (!devices.empty()) {
|
||||
GlobalDrivers[0] = DriverHandle::create(std::move(devices));
|
||||
if (GlobalDrivers[0]) {
|
||||
*result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DriverImp::initStatus(false);
|
||||
|
||||
ze_result_t DriverImp::driverInit(ze_init_flag_t flag) {
|
||||
std::call_once(initDriverOnce, [this]() {
|
||||
bool result;
|
||||
this->initialize(&result);
|
||||
initStatus = result;
|
||||
});
|
||||
return ((initStatus) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNINITIALIZED);
|
||||
}
|
||||
|
||||
ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDriverHandles) {
|
||||
if (*pCount == 0) {
|
||||
*pCount = driverCount;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (*pCount > driverCount) {
|
||||
*pCount = driverCount;
|
||||
}
|
||||
|
||||
if (phDriverHandles == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
phDriverHandles[i] = GlobalDrivers[i];
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static DriverImp driverImp;
|
||||
Driver *Driver::driver = &driverImp;
|
||||
|
||||
ze_result_t init(ze_init_flag_t flag) { return Driver::get()->driverInit(flag); }
|
||||
|
||||
} // namespace L0
|
||||
27
level_zero/core/source/driver.h
Normal file
27
level_zero/core/source/driver.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
|
||||
namespace L0 {
|
||||
struct Driver {
|
||||
virtual ze_result_t driverInit(_ze_init_flag_t) = 0;
|
||||
virtual void initialize(bool *result) = 0;
|
||||
static Driver *get() { return driver; }
|
||||
|
||||
protected:
|
||||
static Driver *driver;
|
||||
};
|
||||
|
||||
ze_result_t init(_ze_init_flag_t);
|
||||
ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDrivers);
|
||||
|
||||
extern uint32_t driverCount;
|
||||
extern ze_driver_handle_t GlobalDrivers[1];
|
||||
} // namespace L0
|
||||
73
level_zero/core/source/driver_handle.h
Normal file
73
level_zero/core/source/driver_handle.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include <level_zero/ze_api.h>
|
||||
|
||||
struct _ze_driver_handle_t {};
|
||||
|
||||
namespace L0 {
|
||||
struct Device;
|
||||
|
||||
struct DriverHandle : _ze_driver_handle_t {
|
||||
virtual ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) = 0;
|
||||
virtual ze_result_t getProperties(ze_driver_properties_t *properties) = 0;
|
||||
virtual ze_result_t getApiVersion(ze_api_version_t *version) = 0;
|
||||
virtual ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) = 0;
|
||||
virtual ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) = 0;
|
||||
virtual ze_result_t getMemAllocProperties(const void *ptr,
|
||||
ze_memory_allocation_properties_t *pMemAllocProperties,
|
||||
ze_device_handle_t *phDevice) = 0;
|
||||
|
||||
virtual ze_result_t allocHostMem(ze_host_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) = 0;
|
||||
|
||||
virtual ze_result_t allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t flags, size_t size,
|
||||
size_t alignment, void **ptr) = 0;
|
||||
|
||||
virtual ze_result_t allocSharedMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t deviceFlags,
|
||||
ze_host_mem_alloc_flag_t hostFlags, size_t size, size_t alignment,
|
||||
void **ptr) = 0;
|
||||
virtual ze_result_t freeMem(const void *ptr) = 0;
|
||||
virtual NEO::MemoryManager *getMemoryManager() = 0;
|
||||
virtual void setMemoryManager(NEO::MemoryManager *memoryManager) = 0;
|
||||
virtual ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) = 0;
|
||||
virtual ze_result_t closeIpcMemHandle(const void *ptr) = 0;
|
||||
virtual ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) = 0;
|
||||
virtual ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle,
|
||||
ze_ipc_memory_flag_t flags, void **ptr) = 0;
|
||||
virtual ze_result_t createEventPool(const ze_event_pool_desc_t *desc,
|
||||
uint32_t numDevices,
|
||||
ze_device_handle_t *phDevices,
|
||||
ze_event_pool_handle_t *phEventPool) = 0;
|
||||
virtual ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) = 0;
|
||||
virtual NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(Device *device, void *buffer,
|
||||
size_t size, struct CommandList *commandList) = 0;
|
||||
virtual NEO::GraphicsAllocation *allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) = 0;
|
||||
virtual bool findAllocationDataForRange(const void *buffer,
|
||||
size_t size,
|
||||
NEO::SvmAllocationData **allocData) = 0;
|
||||
virtual std::vector<NEO::SvmAllocationData *> findAllocationsWithinRange(const void *buffer,
|
||||
size_t size,
|
||||
bool *allocationRangeCovered) = 0;
|
||||
|
||||
virtual NEO::SVMAllocsManager *getSvmAllocsManager() = 0;
|
||||
static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast<DriverHandle *>(handle); }
|
||||
inline ze_driver_handle_t toHandle() { return this; }
|
||||
|
||||
virtual ~DriverHandle() = default;
|
||||
|
||||
DriverHandle &operator=(const DriverHandle &) = delete;
|
||||
DriverHandle &operator=(DriverHandle &&) = delete;
|
||||
|
||||
static DriverHandle *create(std::vector<std::unique_ptr<NEO::Device>> devices);
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
293
level_zero/core/source/driver_handle_imp.cpp
Normal file
293
level_zero/core/source/driver_handle_imp.cpp
Normal file
@@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/driver_handle_imp.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_library.h"
|
||||
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
|
||||
#include "driver_version_l0.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
NEO::MemoryManager *DriverHandleImp::getMemoryManager() {
|
||||
return this->memoryManager;
|
||||
}
|
||||
|
||||
void DriverHandleImp::setMemoryManager(NEO::MemoryManager *memoryManager) {
|
||||
this->memoryManager = memoryManager;
|
||||
}
|
||||
|
||||
NEO::SVMAllocsManager *DriverHandleImp::getSvmAllocsManager() {
|
||||
return this->svmAllocsManager;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getApiVersion(ze_api_version_t *version) {
|
||||
*version = ZE_API_VERSION_1_0;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getProperties(ze_driver_properties_t *properties) {
|
||||
uint32_t versionMajor = (uint32_t)strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 16);
|
||||
uint32_t versionMinor = (uint32_t)strtoul(L0_PROJECT_VERSION_MINOR, NULL, 16);
|
||||
|
||||
properties->driverVersion = ZE_MAKE_VERSION(versionMajor, versionMinor);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) {
|
||||
pIPCProperties->eventsSupported = false;
|
||||
pIPCProperties->memsSupported = true;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
inline ze_memory_type_t parseUSMType(InternalMemoryType memoryType) {
|
||||
switch (memoryType) {
|
||||
case InternalMemoryType::SHARED_UNIFIED_MEMORY:
|
||||
return ZE_MEMORY_TYPE_SHARED;
|
||||
case InternalMemoryType::DEVICE_UNIFIED_MEMORY:
|
||||
return ZE_MEMORY_TYPE_DEVICE;
|
||||
case InternalMemoryType::HOST_UNIFIED_MEMORY:
|
||||
return ZE_MEMORY_TYPE_HOST;
|
||||
default:
|
||||
return ZE_MEMORY_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
return ZE_MEMORY_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getExtensionFunctionAddress(const char *pFuncName, void **pfunc) {
|
||||
*pfunc = this->osLibrary->getProcAddress(std::string(pFuncName));
|
||||
if (*pfunc == nullptr) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getMemAllocProperties(const void *ptr,
|
||||
ze_memory_allocation_properties_t *pMemAllocProperties,
|
||||
ze_device_handle_t *phDevice) {
|
||||
auto alloc = svmAllocsManager->getSVMAllocs()->get(ptr);
|
||||
if (alloc) {
|
||||
pMemAllocProperties->type = parseUSMType(alloc->memoryType);
|
||||
pMemAllocProperties->id = alloc->gpuAllocation->getGpuAddress();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
DriverHandleImp::~DriverHandleImp() {
|
||||
for (auto &device : this->devices) {
|
||||
delete device;
|
||||
}
|
||||
if (this->svmAllocsManager) {
|
||||
delete this->svmAllocsManager;
|
||||
this->svmAllocsManager = nullptr;
|
||||
}
|
||||
delete this->osLibrary;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>> devices) {
|
||||
this->memoryManager = devices[0]->getMemoryManager();
|
||||
if (this->memoryManager == nullptr) {
|
||||
return ZE_RESULT_ERROR_UNINITIALIZED;
|
||||
}
|
||||
|
||||
this->svmAllocsManager = new NEO::SVMAllocsManager(memoryManager);
|
||||
if (this->svmAllocsManager == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
this->numDevices = static_cast<uint32_t>(devices.size());
|
||||
|
||||
for (auto &neoDevice : devices) {
|
||||
auto device = Device::create(this, neoDevice.release());
|
||||
this->devices.push_back(device);
|
||||
}
|
||||
|
||||
this->osLibrary = NEO::OsLibrary::load("");
|
||||
if (this->osLibrary->isLoaded() == false) {
|
||||
return ZE_RESULT_ERROR_UNINITIALIZED;
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
DriverHandle *DriverHandle::create(std::vector<std::unique_ptr<NEO::Device>> devices) {
|
||||
DriverHandleImp *driverHandle = new DriverHandleImp;
|
||||
UNRECOVERABLE_IF(nullptr == driverHandle);
|
||||
|
||||
ze_result_t res = driverHandle->initialize(std::move(devices));
|
||||
if (res != ZE_RESULT_SUCCESS) {
|
||||
delete driverHandle;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
driverHandle->memoryManager->setForceNonSvmForExternalHostPtr(true);
|
||||
return driverHandle;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) {
|
||||
if (*pCount == 0) {
|
||||
*pCount = this->numDevices;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (phDevices == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
phDevices[i] = this->devices[i];
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool DriverHandleImp::findAllocationDataForRange(const void *buffer,
|
||||
size_t size,
|
||||
NEO::SvmAllocationData **allocData) {
|
||||
// Make sure the host buffer does not overlap any existing allocation
|
||||
const char *baseAddress = reinterpret_cast<const char *>(buffer);
|
||||
NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress);
|
||||
NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress + size - 1);
|
||||
|
||||
if (allocData) {
|
||||
if (beginAllocData) {
|
||||
*allocData = beginAllocData;
|
||||
} else {
|
||||
*allocData = endAllocData;
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if the whole range requested is covered by the same allocation
|
||||
if (beginAllocData && endAllocData &&
|
||||
(beginAllocData->gpuAllocation == endAllocData->gpuAllocation)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<NEO::SvmAllocationData *> DriverHandleImp::findAllocationsWithinRange(const void *buffer,
|
||||
size_t size,
|
||||
bool *allocationRangeCovered) {
|
||||
std::vector<NEO::SvmAllocationData *> allocDataArray;
|
||||
const char *baseAddress = reinterpret_cast<const char *>(buffer);
|
||||
// Check if the host buffer overlaps any existing allocation
|
||||
NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress);
|
||||
NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress + size - 1);
|
||||
|
||||
// Add the allocation that matches the beginning address
|
||||
if (beginAllocData) {
|
||||
allocDataArray.push_back(beginAllocData);
|
||||
}
|
||||
// Add the allocation that matches the end address range if there was no beginning allocation
|
||||
// or the beginning allocation does not match the ending allocation
|
||||
if (endAllocData) {
|
||||
if ((beginAllocData && (beginAllocData->gpuAllocation != endAllocData->gpuAllocation)) ||
|
||||
!beginAllocData) {
|
||||
allocDataArray.push_back(endAllocData);
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if the whole range requested is covered by the same allocation
|
||||
if (beginAllocData && endAllocData &&
|
||||
(beginAllocData->gpuAllocation == endAllocData->gpuAllocation)) {
|
||||
*allocationRangeCovered = true;
|
||||
} else {
|
||||
*allocationRangeCovered = false;
|
||||
}
|
||||
return allocDataArray;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *DriverHandleImp::allocateManagedMemoryFromHostPtr(Device *device, void *buffer,
|
||||
size_t size, struct CommandList *commandList) {
|
||||
char *baseAddress = reinterpret_cast<char *>(buffer);
|
||||
NEO::GraphicsAllocation *allocation = nullptr;
|
||||
bool allocFound = false;
|
||||
std::vector<NEO::SvmAllocationData *> allocDataArray = findAllocationsWithinRange(buffer, size, &allocFound);
|
||||
if (allocFound) {
|
||||
return allocDataArray[0]->gpuAllocation;
|
||||
}
|
||||
|
||||
if (!allocDataArray.empty()) {
|
||||
UNRECOVERABLE_IF(commandList == nullptr);
|
||||
for (auto allocData : allocDataArray) {
|
||||
allocation = allocData->gpuAllocation;
|
||||
char *allocAddress = reinterpret_cast<char *>(allocation->getGpuAddress());
|
||||
size_t allocSize = allocData->size;
|
||||
|
||||
device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->remove(*allocData);
|
||||
memoryManager->freeGraphicsMemory(allocation);
|
||||
commandList->eraseDeallocationContainerEntry(allocation);
|
||||
commandList->eraseResidencyContainerEntry(allocation);
|
||||
|
||||
if (allocAddress < baseAddress) {
|
||||
buffer = reinterpret_cast<void *>(allocAddress);
|
||||
baseAddress += size;
|
||||
size = ptrDiff(baseAddress, allocAddress);
|
||||
baseAddress = reinterpret_cast<char *>(buffer);
|
||||
} else {
|
||||
allocAddress += allocSize;
|
||||
baseAddress += size;
|
||||
if (allocAddress > baseAddress) {
|
||||
baseAddress = reinterpret_cast<char *>(buffer);
|
||||
size = ptrDiff(allocAddress, baseAddress);
|
||||
} else {
|
||||
baseAddress = reinterpret_cast<char *>(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allocation = memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{0u, false, size, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false},
|
||||
buffer);
|
||||
|
||||
if (allocation == nullptr) {
|
||||
return allocation;
|
||||
}
|
||||
|
||||
NEO::SvmAllocationData allocData;
|
||||
allocData.gpuAllocation = allocation;
|
||||
allocData.cpuAllocation = nullptr;
|
||||
allocData.size = size;
|
||||
allocData.memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
allocData.device = nullptr;
|
||||
svmAllocsManager->getSVMAllocs()->insert(allocData);
|
||||
|
||||
return allocation;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *DriverHandleImp::allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) {
|
||||
NEO::AllocationProperties properties = {0u, false, size, NEO::GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false};
|
||||
properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true;
|
||||
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties,
|
||||
buffer);
|
||||
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
|
||||
return allocation;
|
||||
}
|
||||
|
||||
ze_result_t DriverHandleImp::createEventPool(const ze_event_pool_desc_t *desc,
|
||||
uint32_t numDevices,
|
||||
ze_device_handle_t *phDevices,
|
||||
ze_event_pool_handle_t *phEventPool) {
|
||||
auto device = Device::fromHandle(phDevices[0]);
|
||||
return device->createEventPool(desc, phEventPool);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
69
level_zero/core/source/driver_handle_imp.h
Normal file
69
level_zero/core/source/driver_handle_imp.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/os_interface/os_library.h"
|
||||
|
||||
#include "level_zero/core/source/driver_handle.h"
|
||||
#include "level_zero/tools/source/tracing/tracing.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct DriverHandleImp : public DriverHandle {
|
||||
~DriverHandleImp() override;
|
||||
ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) override;
|
||||
ze_result_t getProperties(ze_driver_properties_t *properties) override;
|
||||
ze_result_t getApiVersion(ze_api_version_t *version) override;
|
||||
ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) override;
|
||||
ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) override;
|
||||
ze_result_t getMemAllocProperties(const void *ptr,
|
||||
ze_memory_allocation_properties_t *pMemAllocProperties,
|
||||
ze_device_handle_t *phDevice) override;
|
||||
|
||||
ze_result_t allocHostMem(ze_host_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) override;
|
||||
|
||||
ze_result_t allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t flags, size_t size,
|
||||
size_t alignment, void **ptr) override;
|
||||
|
||||
ze_result_t allocSharedMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t deviceFlags,
|
||||
ze_host_mem_alloc_flag_t hostFlags, size_t size, size_t alignment,
|
||||
void **ptr) override;
|
||||
|
||||
ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) override;
|
||||
ze_result_t freeMem(const void *ptr) override;
|
||||
NEO::MemoryManager *getMemoryManager() override;
|
||||
void setMemoryManager(NEO::MemoryManager *memoryManager) override;
|
||||
ze_result_t closeIpcMemHandle(const void *ptr) override;
|
||||
ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) override;
|
||||
ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle,
|
||||
ze_ipc_memory_flag_t flags, void **ptr) override;
|
||||
ze_result_t createEventPool(const ze_event_pool_desc_t *desc,
|
||||
uint32_t numDevices,
|
||||
ze_device_handle_t *phDevices,
|
||||
ze_event_pool_handle_t *phEventPool) override;
|
||||
ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) override;
|
||||
NEO::SVMAllocsManager *getSvmAllocsManager() override;
|
||||
ze_result_t initialize(std::vector<std::unique_ptr<NEO::Device>> devices);
|
||||
NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(Device *device, void *buffer,
|
||||
size_t size, struct CommandList *commandList) override;
|
||||
NEO::GraphicsAllocation *allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) override;
|
||||
bool findAllocationDataForRange(const void *buffer,
|
||||
size_t size,
|
||||
NEO::SvmAllocationData **allocData) override;
|
||||
std::vector<NEO::SvmAllocationData *> findAllocationsWithinRange(const void *buffer,
|
||||
size_t size,
|
||||
bool *allocationRangeCovered) override;
|
||||
|
||||
uint32_t numDevices = 0;
|
||||
std::vector<Device *> devices;
|
||||
NEO::MemoryManager *memoryManager = nullptr;
|
||||
NEO::SVMAllocsManager *svmAllocsManager = nullptr;
|
||||
NEO::OsLibrary *osLibrary = nullptr;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
26
level_zero/core/source/driver_imp.h
Normal file
26
level_zero/core/source/driver_imp.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/driver.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
class DriverImp : public Driver {
|
||||
public:
|
||||
ze_result_t driverInit(_ze_init_flag_t) override;
|
||||
|
||||
void initialize(bool *result) override;
|
||||
|
||||
protected:
|
||||
std::once_flag initDriverOnce;
|
||||
static bool initStatus;
|
||||
};
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/driver_version.h.in
Normal file
14
level_zero/core/source/driver_version.h.in
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef L0_DRIVER_VERSION_H
|
||||
#define L0_DRIVER_VERSION_H
|
||||
|
||||
#define L0_DRIVER_VERSION @L0_DRIVER_VERSION@
|
||||
|
||||
#endif /* L0_DRIVER_VERSION_H */
|
||||
|
||||
394
level_zero/core/source/event.cpp
Normal file
394
level_zero/core/source/event.cpp
Normal file
@@ -0,0 +1,394 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/event.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/device_imp.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct EventImp : public Event {
|
||||
EventImp(EventPool *eventPool, int index, Device *device)
|
||||
: device(device), eventPool(eventPool) {}
|
||||
|
||||
~EventImp() override {}
|
||||
|
||||
ze_result_t hostSignal() override;
|
||||
|
||||
ze_result_t hostSynchronize(uint32_t timeout) override;
|
||||
|
||||
ze_result_t queryStatus() override {
|
||||
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
|
||||
auto alloc = &(this->eventPool->getAllocation());
|
||||
auto csr = static_cast<DeviceImp *>(this->device)->neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
if (metricTracer != nullptr) {
|
||||
*hostAddr = metricTracer->getNotificationState();
|
||||
}
|
||||
|
||||
csr->downloadAllocation(*alloc);
|
||||
|
||||
if (isTimestampEvent) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
|
||||
hostAddr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
}
|
||||
|
||||
return *hostAddr == Event::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t reset() override;
|
||||
|
||||
ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) override;
|
||||
|
||||
Device *device;
|
||||
EventPool *eventPool;
|
||||
|
||||
protected:
|
||||
ze_result_t hostEventSetValue(uint32_t eventValue);
|
||||
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
|
||||
void makeAllocationResident();
|
||||
};
|
||||
|
||||
struct EventPoolImp : public EventPool {
|
||||
EventPoolImp(Device *device, uint32_t count, ze_event_pool_flag_t flags) : device(device), count(count) {
|
||||
pool = std::vector<int>(this->count);
|
||||
eventPoolUsedCount = 0;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
pool[i] = EventPool::EVENT_STATE_INITIAL;
|
||||
}
|
||||
|
||||
auto timestampMultiplier = 1;
|
||||
if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) {
|
||||
isEventPoolUsedForTimestamp = true;
|
||||
timestampMultiplier = numEventTimestampTypes;
|
||||
}
|
||||
|
||||
NEO::AllocationProperties properties(
|
||||
device->getRootDeviceIndex(), count * eventSize * timestampMultiplier, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
properties.alignment = eventAlignment;
|
||||
eventPoolAllocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
|
||||
UNRECOVERABLE_IF(eventPoolAllocation == nullptr);
|
||||
}
|
||||
|
||||
~EventPoolImp() override {
|
||||
device->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(eventPoolAllocation);
|
||||
eventPoolAllocation = nullptr;
|
||||
|
||||
eventTracker.clear();
|
||||
}
|
||||
|
||||
ze_result_t destroy() override;
|
||||
|
||||
size_t getPoolSize() override { return this->pool.size(); }
|
||||
uint32_t getPoolUsedCount() override { return eventPoolUsedCount; }
|
||||
|
||||
ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) override;
|
||||
|
||||
ze_result_t closeIpcHandle() override;
|
||||
|
||||
ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) override {
|
||||
*phEvent = Event::create(this, desc, this->getDevice());
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t reserveEventFromPool(int index, Event *event) override;
|
||||
|
||||
ze_result_t releaseEventToPool(Event *event) override;
|
||||
|
||||
uint32_t getEventSize() override { return eventSize; }
|
||||
|
||||
uint32_t getNumEventTimestampTypes() override { return numEventTimestampTypes; }
|
||||
|
||||
ze_result_t destroyPool() {
|
||||
if (eventPoolUsedCount != 0) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
pool.clear();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
Device *getDevice() override { return device; }
|
||||
|
||||
Device *device;
|
||||
uint32_t count;
|
||||
uint32_t eventPoolUsedCount;
|
||||
std::vector<int> pool;
|
||||
std::unordered_map<Event *, int> eventTracker;
|
||||
|
||||
std::queue<int> lastEventPoolOffsetUsed;
|
||||
|
||||
protected:
|
||||
const uint32_t eventSize = 64u;
|
||||
const uint32_t eventAlignment = 64u;
|
||||
|
||||
const uint32_t numEventTimestampTypes = 4u;
|
||||
};
|
||||
|
||||
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
|
||||
auto event = new EventImp(eventPool, desc->index, device);
|
||||
UNRECOVERABLE_IF(event == nullptr);
|
||||
eventPool->reserveEventFromPool(desc->index, static_cast<Event *>(event));
|
||||
|
||||
if (eventPool->isEventPoolUsedForTimestamp) {
|
||||
event->isTimestampEvent = true;
|
||||
}
|
||||
|
||||
event->signalScope = desc->signal;
|
||||
event->waitScope = desc->wait;
|
||||
|
||||
event->reset();
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation &Event::getAllocation() {
|
||||
auto eventImp = static_cast<EventImp *>(this);
|
||||
|
||||
return eventImp->eventPool->getAllocation();
|
||||
}
|
||||
|
||||
uint64_t Event::getOffsetOfProfilingEvent(uint32_t profileEventType) {
|
||||
auto eventImp = static_cast<EventImp *>(this);
|
||||
auto eventSize = eventImp->eventPool->getEventSize();
|
||||
return (profileEventType * eventSize);
|
||||
}
|
||||
|
||||
ze_result_t Event::destroy() {
|
||||
auto eventImp = static_cast<EventImp *>(this);
|
||||
if (eventImp->eventPool->releaseEventToPool(this)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void EventImp::makeAllocationResident() {
|
||||
auto deviceImp = static_cast<DeviceImp *>(this->device);
|
||||
NEO::MemoryOperationsHandler *memoryOperationsIface = deviceImp->neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
|
||||
auto alloc = &(this->eventPool->getAllocation());
|
||||
|
||||
if (memoryOperationsIface) {
|
||||
memoryOperationsIface->makeResident(ArrayRef<NEO::GraphicsAllocation *>(&alloc, 1));
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
|
||||
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampTypes(); i++) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(i);
|
||||
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
|
||||
*(tsptr) = eventVal;
|
||||
|
||||
if (this->signalScope != ZE_EVENT_SCOPE_FLAG_NONE) {
|
||||
NEO::CpuIntrinsics::clFlush(tsptr);
|
||||
}
|
||||
}
|
||||
|
||||
makeAllocationResident();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) {
|
||||
if (isTimestampEvent) {
|
||||
hostEventSetValueTimestamps(eventVal);
|
||||
}
|
||||
|
||||
auto hostAddr = static_cast<uint64_t *>(hostAddress);
|
||||
UNRECOVERABLE_IF(hostAddr == nullptr);
|
||||
*(hostAddr) = eventVal;
|
||||
|
||||
makeAllocationResident();
|
||||
|
||||
if (this->signalScope != ZE_EVENT_SCOPE_FLAG_NONE) {
|
||||
NEO::CpuIntrinsics::clFlush(hostAddr);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostSignal() {
|
||||
return hostEventSetValue(Event::STATE_SIGNALED);
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostSynchronize(uint32_t timeout) {
|
||||
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||
int64_t timeDiff = 0;
|
||||
ze_result_t ret = ZE_RESULT_NOT_READY;
|
||||
auto csr = static_cast<DeviceImp *>(this->device)->neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
if (csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (timeout == 0) {
|
||||
return queryStatus();
|
||||
}
|
||||
|
||||
time1 = std::chrono::high_resolution_clock::now();
|
||||
while (true) {
|
||||
ret = queryStatus();
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
std::this_thread::yield();
|
||||
NEO::CpuIntrinsics::pause();
|
||||
|
||||
if (timeout == std::numeric_limits<uint32_t>::max()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
time2 = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(time2 - time1).count();
|
||||
|
||||
if (timeDiff >= timeout) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ze_result_t EventImp::reset() {
|
||||
return hostEventSetValue(Event::STATE_INITIAL);
|
||||
}
|
||||
|
||||
ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
|
||||
if (!this->isTimestampEvent)
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(timestampType);
|
||||
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
|
||||
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(tsptr), sizeof(uint64_t));
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
EventPool *EventPool::create(Device *device, const ze_event_pool_desc_t *desc) {
|
||||
auto eventPool = new EventPoolImp(device, desc->count, desc->flags);
|
||||
UNRECOVERABLE_IF(eventPool == nullptr);
|
||||
|
||||
return eventPool;
|
||||
}
|
||||
|
||||
ze_result_t EventPoolImp::reserveEventFromPool(int index, Event *event) {
|
||||
if (pool[index] == EventPool::EVENT_STATE_CREATED) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
pool[index] = EventPool::EVENT_STATE_CREATED;
|
||||
eventTracker.insert(std::pair<Event *, int>(event, index));
|
||||
|
||||
if (lastEventPoolOffsetUsed.empty()) {
|
||||
event->offsetUsed = index;
|
||||
} else {
|
||||
event->offsetUsed = lastEventPoolOffsetUsed.front();
|
||||
lastEventPoolOffsetUsed.pop();
|
||||
}
|
||||
|
||||
auto timestampMultiplier = 1;
|
||||
if (static_cast<struct EventPool *>(this)->isEventPoolUsedForTimestamp) {
|
||||
timestampMultiplier = numEventTimestampTypes;
|
||||
}
|
||||
|
||||
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(eventPoolAllocation->getUnderlyingBuffer());
|
||||
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (event->offsetUsed * eventSize * timestampMultiplier));
|
||||
event->gpuAddress = eventPoolAllocation->getGpuAddress() + (event->offsetUsed * eventSize * timestampMultiplier);
|
||||
|
||||
eventPoolUsedCount++;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t EventPoolImp::releaseEventToPool(Event *event) {
|
||||
UNRECOVERABLE_IF(event == nullptr);
|
||||
if (eventTracker.find(event) == eventTracker.end()) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
int index = eventTracker[event];
|
||||
pool[index] = EventPool::EVENT_STATE_DESTROYED;
|
||||
eventTracker.erase(event);
|
||||
|
||||
event->hostAddress = nullptr;
|
||||
event->gpuAddress = 0;
|
||||
|
||||
lastEventPoolOffsetUsed.push(event->offsetUsed);
|
||||
event->offsetUsed = -1;
|
||||
|
||||
eventPoolUsedCount--;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t EventPoolImp::getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t EventPoolImp::closeIpcHandle() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; }
|
||||
|
||||
ze_result_t EventPoolImp::destroy() {
|
||||
if (this->destroyPool()) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
delete this;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t eventPoolOpenIpcHandle(ze_driver_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc,
|
||||
ze_event_pool_handle_t *phEventPool) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t eventCreate(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc,
|
||||
ze_event_handle_t *phEvent) {
|
||||
EventPool *eventPool = EventPool::fromHandle(hEventPool);
|
||||
UNRECOVERABLE_IF(eventPool == nullptr);
|
||||
|
||||
if (desc->index > (eventPool->getPoolSize() - 1)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if ((eventPool->getPoolUsedCount() + 1) > eventPool->getPoolSize()) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
return eventPool->createEvent(desc, phEvent);
|
||||
}
|
||||
|
||||
ze_result_t eventDestroy(ze_event_handle_t hEvent) { return Event::fromHandle(hEvent)->destroy(); }
|
||||
|
||||
} // namespace L0
|
||||
111
level_zero/core/source/event.h
Normal file
111
level_zero/core/source/event.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdlist.h"
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include "level_zero/core/source/driver_handle.h"
|
||||
#include <level_zero/ze_event.h>
|
||||
|
||||
struct _ze_event_handle_t {};
|
||||
|
||||
struct _ze_event_pool_handle_t {};
|
||||
|
||||
namespace L0 {
|
||||
typedef uint64_t FlushStamp;
|
||||
struct EventPool;
|
||||
struct MetricTracer;
|
||||
|
||||
struct Event : _ze_event_handle_t {
|
||||
virtual ~Event() = default;
|
||||
virtual ze_result_t destroy();
|
||||
virtual ze_result_t hostSignal() = 0;
|
||||
virtual ze_result_t hostSynchronize(uint32_t timeout) = 0;
|
||||
virtual ze_result_t queryStatus() = 0;
|
||||
virtual ze_result_t reset() = 0;
|
||||
virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0;
|
||||
|
||||
enum State : uint32_t {
|
||||
STATE_SIGNALED = 0u,
|
||||
STATE_CLEARED = static_cast<uint32_t>(-1),
|
||||
STATE_INITIAL = STATE_CLEARED
|
||||
};
|
||||
|
||||
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
|
||||
|
||||
static Event *fromHandle(ze_event_handle_t handle) { return static_cast<Event *>(handle); }
|
||||
|
||||
inline ze_event_handle_t toHandle() { return this; }
|
||||
|
||||
NEO::GraphicsAllocation &getAllocation();
|
||||
|
||||
uint64_t getGpuAddress() { return gpuAddress; }
|
||||
uint64_t getOffsetOfProfilingEvent(uint32_t profileEventType);
|
||||
|
||||
void *hostAddress = nullptr;
|
||||
uint64_t gpuAddress;
|
||||
int offsetUsed = -1;
|
||||
|
||||
ze_event_scope_flag_t signalScope; // Saving scope for use later
|
||||
ze_event_scope_flag_t waitScope;
|
||||
|
||||
bool isTimestampEvent = false;
|
||||
|
||||
// Metric tracer instance associated with the event.
|
||||
MetricTracer *metricTracer = nullptr;
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *allocation = nullptr;
|
||||
};
|
||||
|
||||
struct EventPool : _ze_event_pool_handle_t {
|
||||
static EventPool *create(Device *device, const ze_event_pool_desc_t *desc);
|
||||
|
||||
virtual ~EventPool() = default;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual size_t getPoolSize() = 0;
|
||||
virtual uint32_t getPoolUsedCount() = 0;
|
||||
virtual ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) = 0;
|
||||
virtual ze_result_t closeIpcHandle() = 0;
|
||||
virtual ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) = 0;
|
||||
virtual ze_result_t reserveEventFromPool(int index, Event *event) = 0;
|
||||
virtual ze_result_t releaseEventToPool(Event *event) = 0;
|
||||
virtual Device *getDevice() = 0;
|
||||
|
||||
enum EventCreationState : int {
|
||||
EVENT_STATE_INITIAL = 0,
|
||||
EVENT_STATE_DESTROYED = EVENT_STATE_INITIAL,
|
||||
EVENT_STATE_CREATED = 1
|
||||
};
|
||||
|
||||
static EventPool *fromHandle(ze_event_pool_handle_t handle) {
|
||||
return static_cast<EventPool *>(handle);
|
||||
}
|
||||
|
||||
inline ze_event_pool_handle_t toHandle() { return this; }
|
||||
|
||||
NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
|
||||
|
||||
virtual uint32_t getEventSize() = 0;
|
||||
virtual uint32_t getNumEventTimestampTypes() = 0;
|
||||
|
||||
bool isEventPoolUsedForTimestamp = false;
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *eventPoolAllocation = nullptr;
|
||||
};
|
||||
|
||||
ze_result_t eventPoolOpenIpcHandle(ze_driver_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc,
|
||||
ze_event_pool_handle_t *phEventPool);
|
||||
|
||||
ze_result_t eventCreate(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc,
|
||||
ze_event_handle_t *phEvent);
|
||||
|
||||
ze_result_t eventDestroy(ze_event_handle_t hEvent);
|
||||
|
||||
} // namespace L0
|
||||
121
level_zero/core/source/fence.cpp
Normal file
121
level_zero/core/source/fence.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/fence.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct FenceImp : public Fence {
|
||||
FenceImp(CommandQueueImp *cmdQueueImp) : cmdQueue(cmdQueueImp) {}
|
||||
|
||||
~FenceImp() override {
|
||||
cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(allocation);
|
||||
allocation = nullptr;
|
||||
}
|
||||
|
||||
ze_result_t destroy() override {
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t hostSynchronize(uint32_t timeout) override;
|
||||
|
||||
ze_result_t queryStatus() override {
|
||||
auto csr = cmdQueue->getCsr();
|
||||
if (csr) {
|
||||
csr->downloadAllocation(*allocation);
|
||||
}
|
||||
|
||||
auto hostAddr = static_cast<uint64_t *>(allocation->getUnderlyingBuffer());
|
||||
return *hostAddr == Fence::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t reset() override;
|
||||
|
||||
static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast<Fence *>(handle); }
|
||||
|
||||
inline ze_fence_handle_t toHandle() { return this; }
|
||||
|
||||
bool initialize();
|
||||
|
||||
protected:
|
||||
CommandQueueImp *cmdQueue;
|
||||
};
|
||||
|
||||
Fence *Fence::create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc) {
|
||||
auto fence = new FenceImp(cmdQueue);
|
||||
UNRECOVERABLE_IF(fence == nullptr);
|
||||
|
||||
fence->initialize();
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
bool FenceImp::initialize() {
|
||||
NEO::AllocationProperties properties(
|
||||
cmdQueue->getDevice()->getRootDeviceIndex(), 64u, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
properties.alignment = 64u;
|
||||
allocation = cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
|
||||
reset();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ze_result_t FenceImp::reset() {
|
||||
auto hostAddress = static_cast<uint64_t *>(allocation->getUnderlyingBuffer());
|
||||
*(hostAddress) = Fence::STATE_CLEARED;
|
||||
|
||||
NEO::CpuIntrinsics::clFlush(hostAddress);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t FenceImp::hostSynchronize(uint32_t timeout) {
|
||||
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||
int64_t timeDiff = 0;
|
||||
ze_result_t ret = ZE_RESULT_NOT_READY;
|
||||
|
||||
if (cmdQueue->getCsr()->getType() == NEO::CommandStreamReceiverType::CSR_AUB) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (timeout == 0) {
|
||||
return queryStatus();
|
||||
}
|
||||
|
||||
time1 = std::chrono::high_resolution_clock::now();
|
||||
while (timeDiff < timeout) {
|
||||
ret = queryStatus();
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
std::this_thread::yield();
|
||||
NEO::CpuIntrinsics::pause();
|
||||
|
||||
if (timeout == std::numeric_limits<uint32_t>::max()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
time2 = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(time2 - time1).count();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ze_result_t fenceDestroy(ze_fence_handle_t phFence) {
|
||||
return Fence::fromHandle(phFence)->destroy();
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
56
level_zero/core/source/fence.h
Normal file
56
level_zero/core/source/fence.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue.h"
|
||||
#include "level_zero/core/source/cmdqueue_imp.h"
|
||||
#include <level_zero/ze_fence.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
struct _ze_fence_handle_t {};
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct Fence : _ze_fence_handle_t {
|
||||
static Fence *create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc);
|
||||
virtual ~Fence() = default;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual ze_result_t hostSynchronize(uint32_t timeout) = 0;
|
||||
virtual ze_result_t queryStatus() = 0;
|
||||
virtual ze_result_t reset() = 0;
|
||||
|
||||
static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast<Fence *>(handle); }
|
||||
|
||||
inline ze_fence_handle_t toHandle() { return this; }
|
||||
|
||||
enum State : uint32_t {
|
||||
STATE_SIGNALED = 0u,
|
||||
STATE_CLEARED = std::numeric_limits<uint32_t>::max(),
|
||||
STATE_INITIAL = STATE_CLEARED
|
||||
};
|
||||
|
||||
enum EnqueueState : uint32_t { ENQUEUE_NOT_READY = 0u,
|
||||
ENQUEUE_READY };
|
||||
|
||||
NEO::GraphicsAllocation &getAllocation() { return *allocation; }
|
||||
|
||||
uint64_t getGpuAddress() {
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
return allocation->getGpuAddress();
|
||||
}
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *allocation = nullptr;
|
||||
};
|
||||
|
||||
ze_result_t fenceDestroy(ze_fence_handle_t *phFence);
|
||||
|
||||
} // namespace L0
|
||||
17
level_zero/core/source/gen11/CMakeLists.txt
Normal file
17
level_zero/core/source/gen11/CMakeLists.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_GEN11)
|
||||
set(HW_SOURCES_GEN11
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen11.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen11.cpp
|
||||
)
|
||||
add_subdirectories()
|
||||
|
||||
target_sources(${TARGET_NAME_L0} PRIVATE ${HW_SOURCES_GEN11})
|
||||
set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN11 ${HW_SOURCES_GEN11})
|
||||
endif()
|
||||
18
level_zero/core/source/gen11/cmdlist_gen11.inl
Normal file
18
level_zero/core/source/gen11/cmdlist_gen11.inl
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
|
||||
true);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
28
level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp
Normal file
28
level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
typedef ICLFamily Family;
|
||||
|
||||
struct EnableL0Gen11 {
|
||||
EnableL0Gen11() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableL0Gen11 enable;
|
||||
} // namespace NEO
|
||||
18
level_zero/core/source/gen11/icllp/CMakeLists.txt
Normal file
18
level_zero/core/source/gen11/icllp/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_ICLLP)
|
||||
set(HW_SOURCES_GEN11
|
||||
${HW_SOURCES_GEN11}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_icllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_icllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_icllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_icllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_icllp.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
40
level_zero/core/source/gen11/icllp/cmdlist_icllp.cpp
Normal file
40
level_zero/core/source/gen11/icllp/cmdlist_icllp.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen11/hw_cmds.h"
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen11/cmdlist_gen11.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template struct CommandListCoreFamily<IGFX_GEN11_CORE>;
|
||||
template struct CommandListCoreFamilyImmediate<IGFX_GEN11_CORE>;
|
||||
|
||||
template <>
|
||||
struct CommandListProductFamily<IGFX_ICELAKE_LP> : public CommandListCoreFamily<IGFX_GEN11_CORE> {
|
||||
using CommandListCoreFamily::CommandListCoreFamily;
|
||||
};
|
||||
|
||||
static CommandListPopulateFactory<IGFX_ICELAKE_LP, CommandListProductFamily<IGFX_ICELAKE_LP>>
|
||||
populateICLLP;
|
||||
|
||||
template <>
|
||||
struct CommandListImmediateProductFamily<IGFX_ICELAKE_LP> : public CommandListCoreFamilyImmediate<IGFX_GEN11_CORE> {
|
||||
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
|
||||
};
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_ICELAKE_LP, CommandListImmediateProductFamily<IGFX_ICELAKE_LP>>
|
||||
populateICLLPImmediate;
|
||||
|
||||
} // namespace L0
|
||||
21
level_zero/core/source/gen11/icllp/cmdqueue_icllp.cpp
Normal file
21
level_zero/core/source/gen11/icllp/cmdqueue_icllp.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen11/hw_cmds.h"
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_ICELAKE_LP, CommandQueueHw<IGFX_GEN11_CORE>> populateICLLP;
|
||||
|
||||
} // namespace L0
|
||||
26
level_zero/core/source/gen11/icllp/image_icllp.cpp
Normal file
26
level_zero/core/source/gen11/icllp/image_icllp.cpp
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen11/hw_cmds.h"
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_ICELAKE_LP> : public ImageCoreFamily<IGFX_GEN11_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
|
||||
bool initialize(Device *device, const ze_image_desc_t *desc) override {
|
||||
return ImageCoreFamily<IGFX_GEN11_CORE>::initialize(device, desc);
|
||||
};
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_ICELAKE_LP, ImageProductFamily<IGFX_ICELAKE_LP>> populateICLLP;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen11/icllp/kernel_icllp.cpp
Normal file
14
level_zero/core/source/gen11/icllp/kernel_icllp.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_ICELAKE_LP, KernelHw<IGFX_GEN11_CORE>> populateICLLP;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen11/icllp/sampler_icllp.cpp
Normal file
22
level_zero/core/source/gen11/icllp/sampler_icllp.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen11/hw_cmds.h"
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_ICELAKE_LP> : public SamplerCoreFamily<IGFX_GEN11_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_ICELAKE_LP, SamplerProductFamily<IGFX_ICELAKE_LP>> populateICLLP;
|
||||
|
||||
} // namespace L0
|
||||
19
level_zero/core/source/gen12lp/CMakeLists.txt
Normal file
19
level_zero/core/source/gen12lp/CMakeLists.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_GEN12LP)
|
||||
set(HW_SOURCES_GEN12LP
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen12lp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/cache_flush_gen12lp.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen12lp.cpp
|
||||
)
|
||||
add_subdirectories()
|
||||
target_include_directories(${TARGET_NAME_L0} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/)
|
||||
|
||||
target_sources(${TARGET_NAME_L0} PRIVATE ${HW_SOURCES_GEN12LP})
|
||||
set_property(GLOBAL APPEND PROPERTY L0_HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP})
|
||||
endif()
|
||||
26
level_zero/core/source/gen12lp/cmdlist_gen12lp.h
Normal file
26
level_zero/core/source/gen12lp/cmdlist_gen12lp.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.h"
|
||||
|
||||
#include "cache_flush_gen12lp.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
struct CommandListProductFamily : public CommandListCoreFamily<IGFX_GEN12LP_CORE> {
|
||||
using CommandListCoreFamily::CommandListCoreFamily;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate<IGFX_GEN12LP_CORE> {
|
||||
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
|
||||
};
|
||||
} // namespace L0
|
||||
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
|
||||
true);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
typedef TGLLPFamily Family;
|
||||
|
||||
struct EnableL0Gen12LP {
|
||||
EnableL0Gen12LP() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableL0Gen12LP enable;
|
||||
} // namespace NEO
|
||||
18
level_zero/core/source/gen12lp/tgllp/CMakeLists.txt
Normal file
18
level_zero/core/source/gen12lp/tgllp/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_TGLLP)
|
||||
set(HW_SOURCES_GEN12LP
|
||||
${HW_SOURCES_GEN12LP}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_tgllp.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
27
level_zero/core/source/gen12lp/tgllp/cmdlist_tgllp.cpp
Normal file
27
level_zero/core/source/gen12lp/tgllp/cmdlist_tgllp.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_cmds.h"
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
template struct CommandListCoreFamily<IGFX_GEN12LP_CORE>;
|
||||
|
||||
static CommandListPopulateFactory<IGFX_TIGERLAKE_LP, CommandListProductFamily<IGFX_TIGERLAKE_LP>>
|
||||
populateTGLLP;
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_TIGERLAKE_LP, CommandListImmediateProductFamily<IGFX_TIGERLAKE_LP>>
|
||||
populateTGLLPImmediate;
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp
Normal file
22
level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_cmds.h"
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_TIGERLAKE_LP, CommandQueueHw<IGFX_GEN12LP_CORE>>
|
||||
populateTGLLP;
|
||||
|
||||
} // namespace L0
|
||||
26
level_zero/core/source/gen12lp/tgllp/image_tgllp.cpp
Normal file
26
level_zero/core/source/gen12lp/tgllp/image_tgllp.cpp
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_cmds.h"
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_TIGERLAKE_LP> : public ImageCoreFamily<IGFX_GEN12LP_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
|
||||
bool initialize(Device *device, const ze_image_desc_t *desc) override {
|
||||
return ImageCoreFamily<IGFX_GEN12LP_CORE>::initialize(device, desc);
|
||||
};
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_TIGERLAKE_LP, ImageProductFamily<IGFX_TIGERLAKE_LP>> populateTGLLP;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen12lp/tgllp/kernel_tgllp.cpp
Normal file
14
level_zero/core/source/gen12lp/tgllp/kernel_tgllp.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_TIGERLAKE_LP, KernelHw<IGFX_GEN12LP_CORE>> populateTGLLP;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen12lp/tgllp/sampler_tgllp.cpp
Normal file
22
level_zero/core/source/gen12lp/tgllp/sampler_tgllp.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_cmds.h"
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_TIGERLAKE_LP> : public SamplerCoreFamily<IGFX_GEN12LP_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_TIGERLAKE_LP, SamplerProductFamily<IGFX_TIGERLAKE_LP>> populateTGLLP;
|
||||
|
||||
} // namespace L0
|
||||
15
level_zero/core/source/gen8/CMakeLists.txt
Normal file
15
level_zero/core/source/gen8/CMakeLists.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_GEN8)
|
||||
set(HW_SOURCES_GEN8
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen8.cpp
|
||||
)
|
||||
|
||||
target_sources(${TARGET_NAME_L0} PRIVATE ${HW_SOURCES_GEN8})
|
||||
set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN8 ${HW_SOURCES_GEN8})
|
||||
endif()
|
||||
28
level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp
Normal file
28
level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
typedef BDWFamily Family;
|
||||
|
||||
struct EnableL0Gen8 {
|
||||
EnableL0Gen8() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableL0Gen8 enable;
|
||||
} // namespace NEO
|
||||
18
level_zero/core/source/gen9/CMakeLists.txt
Normal file
18
level_zero/core/source/gen9/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_GEN9)
|
||||
set(HW_SOURCES_GEN9
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen9.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen9.cpp
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
target_sources(${TARGET_NAME_L0} PRIVATE ${HW_SOURCES_GEN9})
|
||||
set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN9 ${HW_SOURCES_GEN9})
|
||||
endif()
|
||||
18
level_zero/core/source/gen9/cfl/CMakeLists.txt
Normal file
18
level_zero/core/source/gen9/cfl/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_CFL)
|
||||
set(HW_SOURCES_GEN9
|
||||
${HW_SOURCES_GEN9}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_cfl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_cfl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_cfl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_cfl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_cfl.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
27
level_zero/core/source/gen9/cfl/cmdlist_cfl.cpp
Normal file
27
level_zero/core/source/gen9/cfl/cmdlist_cfl.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.h"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandListPopulateFactory<IGFX_COFFEELAKE, CommandListProductFamily<IGFX_COFFEELAKE>>
|
||||
populateCFL;
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_COFFEELAKE, CommandListImmediateProductFamily<IGFX_COFFEELAKE>>
|
||||
populateCFLImmediate;
|
||||
} // namespace L0
|
||||
21
level_zero/core/source/gen9/cfl/cmdqueue_cfl.cpp
Normal file
21
level_zero/core/source/gen9/cfl/cmdqueue_cfl.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_COFFEELAKE, CommandQueueHw<IGFX_GEN9_CORE>> populateCFL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/cfl/image_cfl.cpp
Normal file
22
level_zero/core/source/gen9/cfl/image_cfl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_COFFEELAKE> : public ImageCoreFamily<IGFX_GEN9_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_COFFEELAKE, ImageProductFamily<IGFX_COFFEELAKE>> populateCFL;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen9/cfl/kernel_cfl.cpp
Normal file
14
level_zero/core/source/gen9/cfl/kernel_cfl.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_COFFEELAKE, KernelHw<IGFX_GEN9_CORE>> populateCFL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/cfl/sampler_cfl.cpp
Normal file
22
level_zero/core/source/gen9/cfl/sampler_cfl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_COFFEELAKE> : public SamplerCoreFamily<IGFX_GEN9_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_COFFEELAKE, SamplerProductFamily<IGFX_COFFEELAKE>> populateCFL;
|
||||
|
||||
} // namespace L0
|
||||
39
level_zero/core/source/gen9/cmdlist_gen9.h
Normal file
39
level_zero/core/source/gen9/cmdlist_gen9.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template struct CommandListCoreFamily<IGFX_GEN9_CORE>;
|
||||
template struct CommandListCoreFamilyImmediate<IGFX_GEN9_CORE>;
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListProductFamily : public CommandListCoreFamily<IGFX_GEN9_CORE> {
|
||||
using CommandListCoreFamily::CommandListCoreFamily;
|
||||
|
||||
void programL3(bool isSLMused) override {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<IGFX_GEN9_CORE>::GfxFamily;
|
||||
NEO::EncodeL3State<GfxFamily>::encode(commandContainer, isSLMused);
|
||||
}
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate<IGFX_GEN9_CORE> {
|
||||
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
24
level_zero/core/source/gen9/cmdlist_gen9.inl
Normal file
24
level_zero/core/source/gen9/cmdlist_gen9.inl
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
|
||||
true);
|
||||
}
|
||||
} // namespace L0
|
||||
28
level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp
Normal file
28
level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
typedef SKLFamily Family;
|
||||
|
||||
struct EnableL0Gen9 {
|
||||
EnableL0Gen9() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableL0Gen9 enable;
|
||||
} // namespace NEO
|
||||
18
level_zero/core/source/gen9/glk/CMakeLists.txt
Normal file
18
level_zero/core/source/gen9/glk/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_GLK)
|
||||
set(HW_SOURCES_GEN9
|
||||
${HW_SOURCES_GEN9}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_glk.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_glk.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_glk.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_glk.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_glk.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
29
level_zero/core/source/gen9/glk/cmdlist_glk.cpp
Normal file
29
level_zero/core/source/gen9/glk/cmdlist_glk.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.h"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandListPopulateFactory<IGFX_GEMINILAKE, CommandListProductFamily<IGFX_GEMINILAKE>>
|
||||
populateGLK;
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_GEMINILAKE, CommandListImmediateProductFamily<IGFX_GEMINILAKE>>
|
||||
populateGLKImmediate;
|
||||
|
||||
} // namespace L0
|
||||
21
level_zero/core/source/gen9/glk/cmdqueue_glk.cpp
Normal file
21
level_zero/core/source/gen9/glk/cmdqueue_glk.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_GEMINILAKE, CommandQueueHw<IGFX_GEN9_CORE>> populateGLK;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/glk/image_glk.cpp
Normal file
22
level_zero/core/source/gen9/glk/image_glk.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_GEMINILAKE> : public ImageCoreFamily<IGFX_GEN9_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_GEMINILAKE, ImageProductFamily<IGFX_GEMINILAKE>> populateGLK;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen9/glk/kernel_glk.cpp
Normal file
14
level_zero/core/source/gen9/glk/kernel_glk.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_GEMINILAKE, KernelHw<IGFX_GEN9_CORE>> populateGLK;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/glk/sampler_glk.cpp
Normal file
22
level_zero/core/source/gen9/glk/sampler_glk.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_GEMINILAKE> : public SamplerCoreFamily<IGFX_GEN9_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_GEMINILAKE, SamplerProductFamily<IGFX_GEMINILAKE>> populateGLK;
|
||||
|
||||
} // namespace L0
|
||||
18
level_zero/core/source/gen9/kbl/CMakeLists.txt
Normal file
18
level_zero/core/source/gen9/kbl/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_KBL)
|
||||
set(HW_SOURCES_GEN9
|
||||
${HW_SOURCES_GEN9}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_kbl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_kbl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_kbl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_kbl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_kbl.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
26
level_zero/core/source/gen9/kbl/cmdlist_kbl.cpp
Normal file
26
level_zero/core/source/gen9/kbl/cmdlist_kbl.cpp
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.h"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
static CommandListPopulateFactory<IGFX_KABYLAKE, CommandListProductFamily<IGFX_KABYLAKE>>
|
||||
populateKBL;
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_KABYLAKE, CommandListImmediateProductFamily<IGFX_KABYLAKE>>
|
||||
populateKBLImmediate;
|
||||
} // namespace L0
|
||||
21
level_zero/core/source/gen9/kbl/cmdqueue_kbl.cpp
Normal file
21
level_zero/core/source/gen9/kbl/cmdqueue_kbl.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_KABYLAKE, CommandQueueHw<IGFX_GEN9_CORE>> populateKBL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/kbl/image_kbl.cpp
Normal file
22
level_zero/core/source/gen9/kbl/image_kbl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_KABYLAKE> : public ImageCoreFamily<IGFX_GEN9_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_KABYLAKE, ImageProductFamily<IGFX_KABYLAKE>> populateKBL;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen9/kbl/kernel_kbl.cpp
Normal file
14
level_zero/core/source/gen9/kbl/kernel_kbl.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_KABYLAKE, KernelHw<IGFX_GEN9_CORE>> populateKBL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/kbl/sampler_kbl.cpp
Normal file
22
level_zero/core/source/gen9/kbl/sampler_kbl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_KABYLAKE> : public SamplerCoreFamily<IGFX_GEN9_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_KABYLAKE, SamplerProductFamily<IGFX_KABYLAKE>> populateKBL;
|
||||
|
||||
} // namespace L0
|
||||
18
level_zero/core/source/gen9/skl/CMakeLists.txt
Normal file
18
level_zero/core/source/gen9/skl/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_SKL)
|
||||
set(HW_SOURCES_GEN9
|
||||
${HW_SOURCES_GEN9}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_skl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_skl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_skl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_skl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_skl.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
||||
27
level_zero/core/source/gen9/skl/cmdlist_skl.cpp
Normal file
27
level_zero/core/source/gen9/skl/cmdlist_skl.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_base.inl"
|
||||
#include "level_zero/core/source/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.h"
|
||||
#include "level_zero/core/source/gen9/cmdlist_gen9.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandListPopulateFactory<IGFX_SKYLAKE, CommandListProductFamily<IGFX_SKYLAKE>> populateSKL;
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_SKYLAKE, CommandListImmediateProductFamily<IGFX_SKYLAKE>>
|
||||
populateSKLImmediate;
|
||||
|
||||
} // namespace L0
|
||||
21
level_zero/core/source/gen9/skl/cmdqueue_skl.cpp
Normal file
21
level_zero/core/source/gen9/skl/cmdqueue_skl.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue_hw_base.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static CommandQueuePopulateFactory<IGFX_SKYLAKE, CommandQueueHw<IGFX_GEN9_CORE>> populateSKL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/skl/image_skl.cpp
Normal file
22
level_zero/core/source/gen9/skl/image_skl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_SKYLAKE> : public ImageCoreFamily<IGFX_GEN9_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_SKYLAKE, ImageProductFamily<IGFX_SKYLAKE>> populateSKL;
|
||||
|
||||
} // namespace L0
|
||||
14
level_zero/core/source/gen9/skl/kernel_skl.cpp
Normal file
14
level_zero/core/source/gen9/skl/kernel_skl.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_SKYLAKE, KernelHw<IGFX_GEN9_CORE>> populateSKL;
|
||||
|
||||
} // namespace L0
|
||||
22
level_zero/core/source/gen9/skl/sampler_skl.cpp
Normal file
22
level_zero/core/source/gen9/skl/sampler_skl.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen9/hw_cmds.h"
|
||||
#include "shared/source/gen9/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/sampler_hw.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct SamplerProductFamily<IGFX_SKYLAKE> : public SamplerCoreFamily<IGFX_GEN9_CORE> {
|
||||
using SamplerCoreFamily::SamplerCoreFamily;
|
||||
};
|
||||
|
||||
static SamplerPopulateFactory<IGFX_SKYLAKE, SamplerProductFamily<IGFX_SKYLAKE>> populateSKL;
|
||||
|
||||
} // namespace L0
|
||||
26
level_zero/core/source/hw_helpers/hw_helpers.h
Normal file
26
level_zero/core/source/hw_helpers/hw_helpers.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace L0 {
|
||||
inline uint64_t getIntermediateCacheSize(const NEO::HardwareInfo &hwInfo) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline void waitForTaskCountWithKmdNotifyFallbackHelper(NEO::CommandStreamReceiver *csr,
|
||||
uint32_t taskCountToWait,
|
||||
NEO::FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep,
|
||||
bool forcePowerSavingMode) {
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
56
level_zero/core/source/image.h
Normal file
56
level_zero/core/source/image.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
|
||||
#include "level_zero/core/source/device.h"
|
||||
#include <level_zero/ze_image.h>
|
||||
|
||||
struct _ze_image_handle_t {};
|
||||
|
||||
namespace NEO {
|
||||
struct ImageInfo;
|
||||
}
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct Image : _ze_image_handle_t {
|
||||
template <typename Type>
|
||||
struct Allocator {
|
||||
static Image *allocate() { return new Type(); }
|
||||
};
|
||||
|
||||
virtual ~Image() = default;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
|
||||
static Image *create(uint32_t productFamily, Device *device, const ze_image_desc_t *desc);
|
||||
|
||||
virtual NEO::GraphicsAllocation *getAllocation() = 0;
|
||||
virtual void decoupleAllocation(NEO::CommandContainer &commandContainer) = 0;
|
||||
virtual void copySurfaceStateToSSH(void *surfaceStateHeap,
|
||||
const uint32_t surfaceStateOffset) = 0;
|
||||
virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
|
||||
virtual size_t getSizeInBytes() = 0;
|
||||
virtual NEO::ImageInfo getImageInfo() = 0;
|
||||
virtual ze_image_desc_t getImageDesc() = 0;
|
||||
|
||||
static Image *fromHandle(ze_image_handle_t handle) { return static_cast<Image *>(handle); }
|
||||
|
||||
inline ze_image_handle_t toHandle() { return this; }
|
||||
};
|
||||
|
||||
using ImageAllocatorFn = Image *(*)();
|
||||
extern ImageAllocatorFn imageFactory[];
|
||||
|
||||
template <uint32_t productFamily, typename ImageType>
|
||||
struct ImagePopulateFactory {
|
||||
ImagePopulateFactory() { imageFactory[productFamily] = Image::Allocator<ImageType>::allocate; }
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user