mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
Optimize getRootDeviceIndex.
Virtual functions are not easily optimizable by compiler, so making this normal function. This function is called heavily so it needs to be highly optimized. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5baf25163f
commit
dc44b13b53
@@ -22,8 +22,6 @@
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
static_assert(NEO::HwInfoConfig::uuidSize == ZE_MAX_DEVICE_UUID_SIZE);
|
||||
|
||||
struct _ze_device_handle_t {};
|
||||
namespace NEO {
|
||||
class Device;
|
||||
|
||||
@@ -30,8 +30,8 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
Device::Device(ExecutionEnvironment *executionEnvironment)
|
||||
: executionEnvironment(executionEnvironment) {
|
||||
Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex)
|
||||
: executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex) {
|
||||
this->executionEnvironment->incRefInternal();
|
||||
}
|
||||
|
||||
|
||||
@@ -108,7 +108,9 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
BuiltIns *getBuiltIns() const;
|
||||
void allocateSyncBufferHandler();
|
||||
|
||||
virtual uint32_t getRootDeviceIndex() const = 0;
|
||||
uint32_t getRootDeviceIndex() const {
|
||||
return this->rootDeviceIndex;
|
||||
}
|
||||
uint32_t getNumGenericSubDevices() const;
|
||||
Device *getSubDevice(uint32_t deviceId) const;
|
||||
Device *getNearestGenericSubDevice(uint32_t deviceId);
|
||||
@@ -134,7 +136,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
|
||||
protected:
|
||||
Device() = delete;
|
||||
Device(ExecutionEnvironment *executionEnvironment);
|
||||
Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex);
|
||||
|
||||
MOCKABLE_VIRTUAL void initializeCaps();
|
||||
|
||||
@@ -183,6 +185,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
bool hasGenericSubDevices = false;
|
||||
bool engineInstanced = false;
|
||||
bool rootCsrCreated = false;
|
||||
const uint32_t rootDeviceIndex;
|
||||
|
||||
SelectorCopyEngine selectorCopyEngine = {};
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment), rootDeviceIndex(rootDeviceIndex) {}
|
||||
RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment, rootDeviceIndex) {}
|
||||
|
||||
RootDevice::~RootDevice() {
|
||||
if (getRootDeviceEnvironment().tagsManager) {
|
||||
@@ -29,10 +29,6 @@ RootDevice::~RootDevice() {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t RootDevice::getRootDeviceIndex() const {
|
||||
return rootDeviceIndex;
|
||||
}
|
||||
|
||||
Device *RootDevice::getRootDevice() const {
|
||||
return const_cast<RootDevice *>(this);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@ class RootDevice : public Device {
|
||||
RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex);
|
||||
~RootDevice() override;
|
||||
|
||||
uint32_t getRootDeviceIndex() const override;
|
||||
Device *getRootDevice() const override;
|
||||
bool isSubDevice() const override { return false; }
|
||||
|
||||
@@ -26,7 +25,5 @@ class RootDevice : public Device {
|
||||
void createBindlessHeapsHelper() override;
|
||||
|
||||
void initializeRootCommandStreamReceiver();
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
namespace NEO {
|
||||
|
||||
SubDevice::SubDevice(ExecutionEnvironment *executionEnvironment, uint32_t subDeviceIndex, Device &rootDevice)
|
||||
: Device(executionEnvironment), rootDevice(static_cast<RootDevice &>(rootDevice)), subDeviceIndex(subDeviceIndex) {
|
||||
: Device(executionEnvironment, rootDevice.getRootDeviceIndex()), rootDevice(static_cast<RootDevice &>(rootDevice)), subDeviceIndex(subDeviceIndex) {
|
||||
UNRECOVERABLE_IF(rootDevice.isSubDevice());
|
||||
deviceBitfield = 0;
|
||||
deviceBitfield.set(subDeviceIndex);
|
||||
@@ -33,10 +33,6 @@ unique_ptr_if_unused<Device> SubDevice::decRefInternal() {
|
||||
return rootDevice.decRefInternal();
|
||||
}
|
||||
|
||||
uint32_t SubDevice::getRootDeviceIndex() const {
|
||||
return this->rootDevice.getRootDeviceIndex();
|
||||
}
|
||||
|
||||
uint32_t SubDevice::getSubDeviceIndex() const {
|
||||
return subDeviceIndex;
|
||||
}
|
||||
|
||||
@@ -17,8 +17,6 @@ class SubDevice : public Device {
|
||||
void incRefInternal() override;
|
||||
unique_ptr_if_unused<Device> decRefInternal() override;
|
||||
|
||||
uint32_t getRootDeviceIndex() const override;
|
||||
|
||||
Device *getRootDevice() const override;
|
||||
|
||||
uint32_t getSubDeviceIndex() const;
|
||||
|
||||
Reference in New Issue
Block a user