From dc44b13b530af4c0ddc504c4395212f0d6d95e2c Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Thu, 16 Dec 2021 11:08:32 +0000 Subject: [PATCH] Optimize getRootDeviceIndex. Virtual functions are not easily optimizable by compiler, so making this normal function. This function is called heavily so it needs to be highly optimized. Signed-off-by: Michal Mrozek --- level_zero/core/source/device/device.h | 2 -- shared/source/device/device.cpp | 4 ++-- shared/source/device/device.h | 7 +++++-- shared/source/device/root_device.cpp | 6 +----- shared/source/device/root_device.h | 3 --- shared/source/device/sub_device.cpp | 6 +----- shared/source/device/sub_device.h | 2 -- 7 files changed, 9 insertions(+), 21 deletions(-) diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index 8020f59956..e304142413 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -22,8 +22,6 @@ #include "CL/cl.h" -static_assert(NEO::HwInfoConfig::uuidSize == ZE_MAX_DEVICE_UUID_SIZE); - struct _ze_device_handle_t {}; namespace NEO { class Device; diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 635f799243..61a6d2122b 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -30,8 +30,8 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); -Device::Device(ExecutionEnvironment *executionEnvironment) - : executionEnvironment(executionEnvironment) { +Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex) + : executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex) { this->executionEnvironment->incRefInternal(); } diff --git a/shared/source/device/device.h b/shared/source/device/device.h index e3841258f0..2dbd5fb210 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -108,7 +108,9 @@ class Device : public ReferenceTrackedObject { BuiltIns *getBuiltIns() const; void allocateSyncBufferHandler(); - virtual uint32_t getRootDeviceIndex() const = 0; + uint32_t getRootDeviceIndex() const { + return this->rootDeviceIndex; + } uint32_t getNumGenericSubDevices() const; Device *getSubDevice(uint32_t deviceId) const; Device *getNearestGenericSubDevice(uint32_t deviceId); @@ -134,7 +136,7 @@ class Device : public ReferenceTrackedObject { protected: Device() = delete; - Device(ExecutionEnvironment *executionEnvironment); + Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex); MOCKABLE_VIRTUAL void initializeCaps(); @@ -183,6 +185,7 @@ class Device : public ReferenceTrackedObject { bool hasGenericSubDevices = false; bool engineInstanced = false; bool rootCsrCreated = false; + const uint32_t rootDeviceIndex; SelectorCopyEngine selectorCopyEngine = {}; diff --git a/shared/source/device/root_device.cpp b/shared/source/device/root_device.cpp index aecad9c8e0..cc66f69a47 100644 --- a/shared/source/device/root_device.cpp +++ b/shared/source/device/root_device.cpp @@ -21,7 +21,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); -RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment), rootDeviceIndex(rootDeviceIndex) {} +RootDevice::RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : Device(executionEnvironment, rootDeviceIndex) {} RootDevice::~RootDevice() { if (getRootDeviceEnvironment().tagsManager) { @@ -29,10 +29,6 @@ RootDevice::~RootDevice() { } } -uint32_t RootDevice::getRootDeviceIndex() const { - return rootDeviceIndex; -} - Device *RootDevice::getRootDevice() const { return const_cast(this); } diff --git a/shared/source/device/root_device.h b/shared/source/device/root_device.h index eb9a0a7f1e..d191c9b844 100644 --- a/shared/source/device/root_device.h +++ b/shared/source/device/root_device.h @@ -17,7 +17,6 @@ class RootDevice : public Device { RootDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex); ~RootDevice() override; - uint32_t getRootDeviceIndex() const override; Device *getRootDevice() const override; bool isSubDevice() const override { return false; } @@ -26,7 +25,5 @@ class RootDevice : public Device { void createBindlessHeapsHelper() override; void initializeRootCommandStreamReceiver(); - - const uint32_t rootDeviceIndex; }; } // namespace NEO diff --git a/shared/source/device/sub_device.cpp b/shared/source/device/sub_device.cpp index abf3f1032d..a2c27013a5 100644 --- a/shared/source/device/sub_device.cpp +++ b/shared/source/device/sub_device.cpp @@ -14,7 +14,7 @@ namespace NEO { SubDevice::SubDevice(ExecutionEnvironment *executionEnvironment, uint32_t subDeviceIndex, Device &rootDevice) - : Device(executionEnvironment), rootDevice(static_cast(rootDevice)), subDeviceIndex(subDeviceIndex) { + : Device(executionEnvironment, rootDevice.getRootDeviceIndex()), rootDevice(static_cast(rootDevice)), subDeviceIndex(subDeviceIndex) { UNRECOVERABLE_IF(rootDevice.isSubDevice()); deviceBitfield = 0; deviceBitfield.set(subDeviceIndex); @@ -33,10 +33,6 @@ unique_ptr_if_unused SubDevice::decRefInternal() { return rootDevice.decRefInternal(); } -uint32_t SubDevice::getRootDeviceIndex() const { - return this->rootDevice.getRootDeviceIndex(); -} - uint32_t SubDevice::getSubDeviceIndex() const { return subDeviceIndex; } diff --git a/shared/source/device/sub_device.h b/shared/source/device/sub_device.h index 7fad321d1f..937d78cf39 100644 --- a/shared/source/device/sub_device.h +++ b/shared/source/device/sub_device.h @@ -17,8 +17,6 @@ class SubDevice : public Device { void incRefInternal() override; unique_ptr_if_unused decRefInternal() override; - uint32_t getRootDeviceIndex() const override; - Device *getRootDevice() const override; uint32_t getSubDeviceIndex() const;