Optimize getRootDeviceIndex.

Virtual functions are not easily optimizable by compiler,
so making this normal function.
This function is called heavily so it needs to be highly optimized.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2021-12-16 11:08:32 +00:00
committed by Compute-Runtime-Automation
parent 5baf25163f
commit dc44b13b53
7 changed files with 9 additions and 21 deletions

View File

@@ -22,8 +22,6 @@
#include "CL/cl.h"
static_assert(NEO::HwInfoConfig::uuidSize == ZE_MAX_DEVICE_UUID_SIZE);
struct _ze_device_handle_t {};
namespace NEO {
class Device;

View File

@@ -30,8 +30,8 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
Device::Device(ExecutionEnvironment *executionEnvironment)
: executionEnvironment(executionEnvironment) {
Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex)
: executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex) {
this->executionEnvironment->incRefInternal();
}

View File

@@ -108,7 +108,9 @@ class Device : public ReferenceTrackedObject<Device> {
BuiltIns *getBuiltIns() const;
void allocateSyncBufferHandler();
virtual uint32_t getRootDeviceIndex() const = 0;
uint32_t getRootDeviceIndex() const {
return this->rootDeviceIndex;
}
uint32_t getNumGenericSubDevices() const;
Device *getSubDevice(uint32_t deviceId) const;
Device *getNearestGenericSubDevice(uint32_t deviceId);
@@ -134,7 +136,7 @@ class Device : public ReferenceTrackedObject<Device> {
protected:
Device() = delete;
Device(ExecutionEnvironment *executionEnvironment);
Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL void initializeCaps();
@@ -183,6 +185,7 @@ class Device : public ReferenceTrackedObject<Device> {
bool hasGenericSubDevices = false;
bool engineInstanced = false;
bool rootCsrCreated = false;
const uint32_t rootDeviceIndex;
SelectorCopyEngine selectorCopyEngine = {};

View File

@@ -21,7 +21,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment), rootDeviceIndex(rootDeviceIndex) {}
RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment, rootDeviceIndex) {}
RootDevice::~RootDevice() {
if (getRootDeviceEnvironment().tagsManager) {
@@ -29,10 +29,6 @@ RootDevice::~RootDevice() {
}
}
uint32_t RootDevice::getRootDeviceIndex() const {
return rootDeviceIndex;
}
Device *RootDevice::getRootDevice() const {
return const_cast<RootDevice *>(this);
}

View File

@@ -17,7 +17,6 @@ class RootDevice : public Device {
RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex);
~RootDevice() override;
uint32_t getRootDeviceIndex() const override;
Device *getRootDevice() const override;
bool isSubDevice() const override { return false; }
@@ -26,7 +25,5 @@ class RootDevice : public Device {
void createBindlessHeapsHelper() override;
void initializeRootCommandStreamReceiver();
const uint32_t rootDeviceIndex;
};
} // namespace NEO

View File

@@ -14,7 +14,7 @@
namespace NEO {
SubDevice::SubDevice(ExecutionEnvironment *executionEnvironment, uint32_t subDeviceIndex, Device &rootDevice)
: Device(executionEnvironment), rootDevice(static_cast<RootDevice &>(rootDevice)), subDeviceIndex(subDeviceIndex) {
: Device(executionEnvironment, rootDevice.getRootDeviceIndex()), rootDevice(static_cast<RootDevice &>(rootDevice)), subDeviceIndex(subDeviceIndex) {
UNRECOVERABLE_IF(rootDevice.isSubDevice());
deviceBitfield = 0;
deviceBitfield.set(subDeviceIndex);
@@ -33,10 +33,6 @@ unique_ptr_if_unused<Device> SubDevice::decRefInternal() {
return rootDevice.decRefInternal();
}
uint32_t SubDevice::getRootDeviceIndex() const {
return this->rootDevice.getRootDeviceIndex();
}
uint32_t SubDevice::getSubDeviceIndex() const {
return subDeviceIndex;
}

View File

@@ -17,8 +17,6 @@ class SubDevice : public Device {
void incRefInternal() override;
unique_ptr_if_unused<Device> decRefInternal() override;
uint32_t getRootDeviceIndex() const override;
Device *getRootDevice() const override;
uint32_t getSubDeviceIndex() const;