mirror of
https://github.com/intel/llvm.git
synced 2026-01-19 01:15:50 +08:00
[StreamExecutor] Rename Executor to Device
Summary: This more clearly describes what the class is. Reviewers: jlebar Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23851 llvm-svn: 279669
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
//===-- Executor.h - The Executor class -------------------------*- C++ -*-===//
|
||||
//===-- Device.h - The Device class -----------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@@ -8,12 +8,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// The Executor class which represents a single device of a specific platform.
|
||||
/// The Device class which represents a single device of a specific platform.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef STREAMEXECUTOR_EXECUTOR_H
|
||||
#define STREAMEXECUTOR_EXECUTOR_H
|
||||
#ifndef STREAMEXECUTOR_DEVICE_H
|
||||
#define STREAMEXECUTOR_DEVICE_H
|
||||
|
||||
#include "streamexecutor/KernelSpec.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
@@ -24,10 +24,10 @@ namespace streamexecutor {
|
||||
class KernelInterface;
|
||||
class Stream;
|
||||
|
||||
class Executor {
|
||||
class Device {
|
||||
public:
|
||||
explicit Executor(PlatformExecutor *PExecutor);
|
||||
virtual ~Executor();
|
||||
explicit Device(PlatformDevice *PDevice);
|
||||
virtual ~Device();
|
||||
|
||||
/// Gets the kernel implementation for the underlying platform.
|
||||
virtual Expected<std::unique_ptr<KernelInterface>>
|
||||
@@ -42,7 +42,7 @@ public:
|
||||
template <typename T>
|
||||
Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
|
||||
Expected<GlobalDeviceMemoryBase> MaybeBase =
|
||||
PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
|
||||
PDevice->allocateDeviceMemory(ElementCount * sizeof(T));
|
||||
if (!MaybeBase)
|
||||
return MaybeBase.takeError();
|
||||
return GlobalDeviceMemory<T>(*MaybeBase);
|
||||
@@ -50,7 +50,7 @@ public:
|
||||
|
||||
/// Frees memory previously allocated with allocateDeviceMemory.
|
||||
template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
|
||||
return PExecutor->freeDeviceMemory(Memory);
|
||||
return PDevice->freeDeviceMemory(Memory);
|
||||
}
|
||||
|
||||
/// Allocates an array of ElementCount entries of type T in host memory.
|
||||
@@ -59,7 +59,7 @@ public:
|
||||
/// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
|
||||
template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
|
||||
Expected<void *> MaybeMemory =
|
||||
PExecutor->allocateHostMemory(ElementCount * sizeof(T));
|
||||
PDevice->allocateHostMemory(ElementCount * sizeof(T));
|
||||
if (!MaybeMemory)
|
||||
return MaybeMemory.takeError();
|
||||
return static_cast<T *>(*MaybeMemory);
|
||||
@@ -67,7 +67,7 @@ public:
|
||||
|
||||
/// Frees memory previously allocated with allocateHostMemory.
|
||||
template <typename T> Error freeHostMemory(T *Memory) {
|
||||
return PExecutor->freeHostMemory(Memory);
|
||||
return PDevice->freeHostMemory(Memory);
|
||||
}
|
||||
|
||||
/// Registers a previously allocated host array of type T for asynchronous
|
||||
@@ -77,15 +77,15 @@ public:
|
||||
/// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
|
||||
template <typename T>
|
||||
Error registerHostMemory(T *Memory, size_t ElementCount) {
|
||||
return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
|
||||
return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T));
|
||||
}
|
||||
|
||||
/// Unregisters host memory previously registered by registerHostMemory.
|
||||
template <typename T> Error unregisterHostMemory(T *Memory) {
|
||||
return PExecutor->unregisterHostMemory(Memory);
|
||||
return PDevice->unregisterHostMemory(Memory);
|
||||
}
|
||||
|
||||
/// \anchor ExecutorHostSyncCopyGroup
|
||||
/// \anchor DeviceHostSyncCopyGroup
|
||||
/// \name Host-synchronous device memory copying functions
|
||||
///
|
||||
/// These methods block the calling host thread while copying data to or from
|
||||
@@ -125,9 +125,9 @@ public:
|
||||
return make_error(
|
||||
"copying too many elements, " + llvm::Twine(ElementCount) +
|
||||
", to a host array of element count " + llvm::Twine(Dst.size()));
|
||||
return PExecutor->synchronousCopyD2H(
|
||||
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
|
||||
ElementCount * sizeof(T));
|
||||
return PDevice->synchronousCopyD2H(Src.getBaseMemory(),
|
||||
Src.getElementOffset() * sizeof(T),
|
||||
Dst.data(), 0, ElementCount * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -179,9 +179,9 @@ public:
|
||||
llvm::Twine(ElementCount) +
|
||||
", to a device array of element count " +
|
||||
llvm::Twine(Dst.getElementCount()));
|
||||
return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
|
||||
Dst.getElementOffset() * sizeof(T),
|
||||
ElementCount * sizeof(T));
|
||||
return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
|
||||
Dst.getElementOffset() * sizeof(T),
|
||||
ElementCount * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -234,7 +234,7 @@ public:
|
||||
llvm::Twine(ElementCount) +
|
||||
", to a device array of element count " +
|
||||
llvm::Twine(Dst.getElementCount()));
|
||||
return PExecutor->synchronousCopyD2D(
|
||||
return PDevice->synchronousCopyD2D(
|
||||
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
|
||||
Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
|
||||
ElementCount * sizeof(T));
|
||||
@@ -292,9 +292,9 @@ public:
|
||||
///@} End host-synchronous device memory copying functions
|
||||
|
||||
private:
|
||||
PlatformExecutor *PExecutor;
|
||||
PlatformDevice *PDevice;
|
||||
};
|
||||
|
||||
} // namespace streamexecutor
|
||||
|
||||
#endif // STREAMEXECUTOR_EXECUTOR_H
|
||||
#endif // STREAMEXECUTOR_DEVICE_H
|
||||
@@ -54,13 +54,13 @@
|
||||
/// function as follows:
|
||||
/// \code
|
||||
/// namespace ccn = compiler_cuda_namespace;
|
||||
/// // Assumes Executor is a pointer to the StreamExecutor on which to
|
||||
/// // launch the kernel.
|
||||
/// // Assumes Device is a pointer to the Device on which to launch the
|
||||
/// // kernel.
|
||||
/// //
|
||||
/// // See KernelSpec.h for details on how the compiler can create a
|
||||
/// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
|
||||
/// Expected<ccn::SaxpyKernel> MaybeKernel =
|
||||
/// ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec);
|
||||
/// ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec);
|
||||
/// if (!MaybeKernel) { /* Handle error */ }
|
||||
/// ccn::SaxpyKernel SaxpyKernel = *MaybeKernel;
|
||||
/// Launch(SaxpyKernel, A, X, Y);
|
||||
@@ -84,7 +84,7 @@
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
class Executor;
|
||||
class Device;
|
||||
class KernelInterface;
|
||||
|
||||
/// The base class for device kernel functions.
|
||||
@@ -100,13 +100,13 @@ public:
|
||||
KernelBase &operator=(KernelBase &&) = default;
|
||||
~KernelBase();
|
||||
|
||||
/// Creates a kernel object from an Executor and a MultiKernelLoaderSpec.
|
||||
/// Creates a kernel object from a Device and a MultiKernelLoaderSpec.
|
||||
///
|
||||
/// The Executor knows which platform it belongs to and the
|
||||
/// The Device knows which platform it belongs to and the
|
||||
/// MultiKernelLoaderSpec knows how to find the kernel code for different
|
||||
/// platforms, so the combined information is enough to get the kernel code
|
||||
/// for the appropriate platform.
|
||||
static Expected<KernelBase> create(Executor *ParentExecutor,
|
||||
static Expected<KernelBase> create(Device *Dev,
|
||||
const MultiKernelLoaderSpec &Spec);
|
||||
|
||||
const std::string &getName() const { return Name; }
|
||||
@@ -116,11 +116,11 @@ public:
|
||||
KernelInterface *getImplementation() { return Implementation.get(); }
|
||||
|
||||
private:
|
||||
KernelBase(Executor *ParentExecutor, const std::string &Name,
|
||||
KernelBase(Device *Dev, const std::string &Name,
|
||||
const std::string &DemangledName,
|
||||
std::unique_ptr<KernelInterface> Implementation);
|
||||
|
||||
Executor *ParentExecutor;
|
||||
Device *TheDevice;
|
||||
std::string Name;
|
||||
std::string DemangledName;
|
||||
std::unique_ptr<KernelInterface> Implementation;
|
||||
@@ -136,9 +136,9 @@ public:
|
||||
TypedKernel &operator=(TypedKernel &&) = default;
|
||||
|
||||
/// Parameters here have the same meaning as in KernelBase::create.
|
||||
static Expected<TypedKernel> create(Executor *ParentExecutor,
|
||||
static Expected<TypedKernel> create(Device *Dev,
|
||||
const MultiKernelLoaderSpec &Spec) {
|
||||
auto MaybeBase = KernelBase::create(ParentExecutor, Spec);
|
||||
auto MaybeBase = KernelBase::create(Dev, Spec);
|
||||
if (!MaybeBase) {
|
||||
return MaybeBase.takeError();
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
class PlatformExecutor;
|
||||
class PlatformDevice;
|
||||
|
||||
/// Methods supported by device kernel function objects on all platforms.
|
||||
class KernelInterface {
|
||||
@@ -41,15 +41,14 @@ class KernelInterface {
|
||||
/// Platform-specific stream handle.
|
||||
class PlatformStreamHandle {
|
||||
public:
|
||||
explicit PlatformStreamHandle(PlatformExecutor *PExecutor)
|
||||
: PExecutor(PExecutor) {}
|
||||
explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {}
|
||||
|
||||
virtual ~PlatformStreamHandle();
|
||||
|
||||
PlatformExecutor *getExecutor() { return PExecutor; }
|
||||
PlatformDevice *getDevice() { return PDevice; }
|
||||
|
||||
private:
|
||||
PlatformExecutor *PExecutor;
|
||||
PlatformDevice *PDevice;
|
||||
};
|
||||
|
||||
/// Raw executor methods that must be implemented by each platform.
|
||||
@@ -57,11 +56,11 @@ private:
|
||||
/// This class defines the platform interface that supports executing work on a
|
||||
/// device.
|
||||
///
|
||||
/// The public Executor and Stream classes have the type-safe versions of the
|
||||
/// The public Device and Stream classes have the type-safe versions of the
|
||||
/// functions in this interface.
|
||||
class PlatformExecutor {
|
||||
class PlatformDevice {
|
||||
public:
|
||||
virtual ~PlatformExecutor();
|
||||
virtual ~PlatformDevice();
|
||||
|
||||
virtual std::string getName() const = 0;
|
||||
|
||||
|
||||
@@ -12,19 +12,18 @@
|
||||
/// A Stream instance represents a queue of sequential, host-asynchronous work
|
||||
/// to be performed on a device.
|
||||
///
|
||||
/// To enqueue work on a device, first create a Executor instance for a
|
||||
/// given device and then use that Executor to create a Stream instance.
|
||||
/// The Stream instance will perform its work on the device managed by the
|
||||
/// Executor that created it.
|
||||
/// To enqueue work on a device, first create a Device instance then use that
|
||||
/// Device to create a Stream instance. The Stream instance will perform its
|
||||
/// work on the device managed by the Device object that created it.
|
||||
///
|
||||
/// The various "then" methods of the Stream object, such as thenCopyH2D and
|
||||
/// thenLaunch, may be used to enqueue work on the Stream, and the
|
||||
/// blockHostUntilDone() method may be used to block the host code until the
|
||||
/// Stream has completed all its work.
|
||||
///
|
||||
/// Multiple Stream instances can be created for the same Executor. This
|
||||
/// allows several independent streams of computation to be performed
|
||||
/// simultaneously on a single device.
|
||||
/// Multiple Stream instances can be created for the same Device. This allows
|
||||
/// several independent streams of computation to be performed simultaneously on
|
||||
/// a single device.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -94,8 +93,8 @@ public:
|
||||
const ParameterTs &... Arguments) {
|
||||
auto ArgumentArray =
|
||||
make_kernel_argument_pack<ParameterTs...>(Arguments...);
|
||||
setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize,
|
||||
Kernel, ArgumentArray));
|
||||
setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize,
|
||||
Kernel, ArgumentArray));
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -105,13 +104,13 @@ public:
|
||||
/// return without waiting for the operation to complete.
|
||||
///
|
||||
/// Any host memory used as a source or destination for one of these
|
||||
/// operations must be allocated with Executor::allocateHostMemory or
|
||||
/// registered with Executor::registerHostMemory. Otherwise, the enqueuing
|
||||
/// operation may block until the copy operation is fully complete.
|
||||
/// operations must be allocated with Device::allocateHostMemory or registered
|
||||
/// with Device::registerHostMemory. Otherwise, the enqueuing operation may
|
||||
/// block until the copy operation is fully complete.
|
||||
///
|
||||
/// The arguments and bounds checking for these methods match the API of the
|
||||
/// \ref ExecutorHostSyncCopyGroup
|
||||
/// "host-synchronous device memory copying functions" of Executor.
|
||||
/// \ref DeviceHostSyncCopyGroup
|
||||
/// "host-synchronous device memory copying functions" of Device.
|
||||
///@{
|
||||
|
||||
template <typename T>
|
||||
@@ -125,9 +124,9 @@ public:
|
||||
setError("copying too many elements, " + llvm::Twine(ElementCount) +
|
||||
", to a host array of element count " + llvm::Twine(Dst.size()));
|
||||
else
|
||||
setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
|
||||
Src.getElementOffset() * sizeof(T),
|
||||
Dst.data(), 0, ElementCount * sizeof(T)));
|
||||
setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
|
||||
Src.getElementOffset() * sizeof(T), Dst.data(),
|
||||
0, ElementCount * sizeof(T)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -182,7 +181,7 @@ public:
|
||||
", to a device array of element count " +
|
||||
llvm::Twine(Dst.getElementCount()));
|
||||
else
|
||||
setError(PExecutor->copyH2D(
|
||||
setError(PDevice->copyH2D(
|
||||
ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(),
|
||||
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
|
||||
return *this;
|
||||
@@ -238,7 +237,7 @@ public:
|
||||
", to a device array of element count " +
|
||||
llvm::Twine(Dst.getElementCount()));
|
||||
else
|
||||
setError(PExecutor->copyD2D(
|
||||
setError(PDevice->copyD2D(
|
||||
ThePlatformStream.get(), Src.getBaseMemory(),
|
||||
Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(),
|
||||
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
|
||||
@@ -322,8 +321,8 @@ private:
|
||||
ErrorMessage = Message.str();
|
||||
}
|
||||
|
||||
/// The PlatformExecutor that supports the operations of this stream.
|
||||
PlatformExecutor *PExecutor;
|
||||
/// The PlatformDevice that supports the operations of this stream.
|
||||
PlatformDevice *PDevice;
|
||||
|
||||
/// The platform-specific stream handle for this instance.
|
||||
std::unique_ptr<PlatformStreamHandle> ThePlatformStream;
|
||||
|
||||
@@ -6,7 +6,7 @@ add_library(
|
||||
add_library(
|
||||
streamexecutor
|
||||
$<TARGET_OBJECTS:utils>
|
||||
Executor.cpp
|
||||
Device.cpp
|
||||
Kernel.cpp
|
||||
KernelSpec.cpp
|
||||
PackedKernelArgumentArray.cpp
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//===-- Executor.cpp - Executor implementation ----------------------------===//
|
||||
//===-- Device.cpp - Device implementation --------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@@ -8,11 +8,11 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// Implementation of Executor class internals.
|
||||
/// Implementation of Device class internals.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "streamexecutor/Executor.h"
|
||||
#include "streamexecutor/Device.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
@@ -23,17 +23,17 @@
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {}
|
||||
Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
|
||||
|
||||
Executor::~Executor() = default;
|
||||
Device::~Device() = default;
|
||||
|
||||
Expected<std::unique_ptr<Stream>> Executor::createStream() {
|
||||
Expected<std::unique_ptr<Stream>> Device::createStream() {
|
||||
Expected<std::unique_ptr<PlatformStreamHandle>> MaybePlatformStream =
|
||||
PExecutor->createStream();
|
||||
PDevice->createStream();
|
||||
if (!MaybePlatformStream) {
|
||||
return MaybePlatformStream.takeError();
|
||||
}
|
||||
assert((*MaybePlatformStream)->getExecutor() == PExecutor &&
|
||||
assert((*MaybePlatformStream)->getDevice() == PDevice &&
|
||||
"an executor created a stream with a different stored executor");
|
||||
return llvm::make_unique<Stream>(std::move(*MaybePlatformStream));
|
||||
}
|
||||
@@ -13,31 +13,31 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "streamexecutor/Kernel.h"
|
||||
#include "streamexecutor/Executor.h"
|
||||
#include "streamexecutor/Device.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
|
||||
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name,
|
||||
KernelBase::KernelBase(Device *Dev, const std::string &Name,
|
||||
const std::string &DemangledName,
|
||||
std::unique_ptr<KernelInterface> Implementation)
|
||||
: ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName),
|
||||
: TheDevice(Dev), Name(Name), DemangledName(DemangledName),
|
||||
Implementation(std::move(Implementation)) {}
|
||||
|
||||
KernelBase::~KernelBase() = default;
|
||||
|
||||
Expected<KernelBase> KernelBase::create(Executor *ParentExecutor,
|
||||
Expected<KernelBase> KernelBase::create(Device *Dev,
|
||||
const MultiKernelLoaderSpec &Spec) {
|
||||
auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec);
|
||||
auto MaybeImplementation = Dev->getKernelImplementation(Spec);
|
||||
if (!MaybeImplementation) {
|
||||
return MaybeImplementation.takeError();
|
||||
}
|
||||
std::string Name = Spec.getKernelName();
|
||||
std::string DemangledName =
|
||||
llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
|
||||
KernelBase Instance(ParentExecutor, Name, DemangledName,
|
||||
KernelBase Instance(Dev, Name, DemangledName,
|
||||
std::move(*MaybeImplementation));
|
||||
return std::move(Instance);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,6 @@ namespace streamexecutor {
|
||||
|
||||
PlatformStreamHandle::~PlatformStreamHandle() = default;
|
||||
|
||||
PlatformExecutor::~PlatformExecutor() = default;
|
||||
PlatformDevice::~PlatformDevice() = default;
|
||||
|
||||
} // namespace streamexecutor
|
||||
|
||||
@@ -17,8 +17,7 @@
|
||||
namespace streamexecutor {
|
||||
|
||||
Stream::Stream(std::unique_ptr<PlatformStreamHandle> PStream)
|
||||
: PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) {
|
||||
}
|
||||
: PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {}
|
||||
|
||||
Stream::~Stream() = default;
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
add_executable(
|
||||
executor_test
|
||||
ExecutorTest.cpp)
|
||||
device_test
|
||||
DeviceTest.cpp)
|
||||
target_link_libraries(
|
||||
executor_test
|
||||
device_test
|
||||
streamexecutor
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(ExecutorTest executor_test)
|
||||
add_test(DeviceTest device_test)
|
||||
|
||||
add_executable(
|
||||
kernel_test
|
||||
|
||||
476
parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp
Normal file
476
parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp
Normal file
@@ -0,0 +1,476 @@
|
||||
//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file contains the unit tests for Device code.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "streamexecutor/Device.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace se = ::streamexecutor;
|
||||
|
||||
class MockPlatformDevice : public se::PlatformDevice {
|
||||
public:
|
||||
~MockPlatformDevice() override {}
|
||||
|
||||
std::string getName() const override { return "MockPlatformDevice"; }
|
||||
|
||||
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
|
||||
createStream() override {
|
||||
return se::make_error("not implemented");
|
||||
}
|
||||
|
||||
se::Expected<se::GlobalDeviceMemoryBase>
|
||||
allocateDeviceMemory(size_t ByteCount) override {
|
||||
return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
|
||||
}
|
||||
|
||||
se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
|
||||
std::free(const_cast<void *>(Memory.getHandle()));
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
|
||||
return std::malloc(ByteCount);
|
||||
}
|
||||
|
||||
se::Error freeHostMemory(void *Memory) override {
|
||||
std::free(Memory);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error registerHostMemory(void *, size_t) override {
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error unregisterHostMemory(void *) override {
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
|
||||
size_t SrcByteOffset, void *HostDst,
|
||||
size_t DstByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
|
||||
static_cast<const char *>(DeviceSrc.getHandle()) +
|
||||
SrcByteOffset,
|
||||
ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
|
||||
se::GlobalDeviceMemoryBase DeviceDst,
|
||||
size_t DstByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
|
||||
DstByteOffset,
|
||||
static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
|
||||
size_t DstByteOffset,
|
||||
const se::GlobalDeviceMemoryBase &DeviceSrc,
|
||||
size_t SrcByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
|
||||
DstByteOffset,
|
||||
static_cast<const char *>(DeviceSrc.getHandle()) +
|
||||
SrcByteOffset,
|
||||
ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
};
|
||||
|
||||
/// Test fixture to hold objects used by tests.
|
||||
class DeviceTest : public ::testing::Test {
|
||||
public:
|
||||
DeviceTest()
|
||||
: HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
|
||||
HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
|
||||
DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
|
||||
DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
|
||||
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
|
||||
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
|
||||
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
|
||||
Device(&PDevice) {}
|
||||
|
||||
// Device memory is backed by host arrays.
|
||||
int HostA5[5];
|
||||
int HostB5[5];
|
||||
int HostA7[7];
|
||||
int HostB7[7];
|
||||
se::GlobalDeviceMemory<int> DeviceA5;
|
||||
se::GlobalDeviceMemory<int> DeviceB5;
|
||||
se::GlobalDeviceMemory<int> DeviceA7;
|
||||
se::GlobalDeviceMemory<int> DeviceB7;
|
||||
|
||||
// Host memory to be used as actual host memory.
|
||||
int Host5[5];
|
||||
int Host7[7];
|
||||
|
||||
MockPlatformDevice PDevice;
|
||||
se::Device Device;
|
||||
};
|
||||
|
||||
#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
|
||||
#define EXPECT_ERROR(E) \
|
||||
do { \
|
||||
se::Error E__ = E; \
|
||||
EXPECT_TRUE(static_cast<bool>(E__)); \
|
||||
consumeError(std::move(E__)); \
|
||||
} while (false)
|
||||
|
||||
using llvm::ArrayRef;
|
||||
using llvm::MutableArrayRef;
|
||||
|
||||
TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
|
||||
se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
|
||||
Device.allocateDeviceMemory<int>(10);
|
||||
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
|
||||
EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, AllocateAndFreeHostMemory) {
|
||||
se::Expected<int *> MaybeMemory = Device.allocateHostMemory<int>(10);
|
||||
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
|
||||
EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
|
||||
std::vector<int> Data(10);
|
||||
EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10));
|
||||
EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data()));
|
||||
}
|
||||
|
||||
// D2H tests
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HToPointer) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2H(
|
||||
DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
|
||||
MutableArrayRef<int>(Host5), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
|
||||
MutableArrayRef<int>(Host5), 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host7), 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host5), 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
|
||||
MutableArrayRef<int>(Host5)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA7[I + 1], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
|
||||
MutableArrayRef<int>(Host5)));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host7)));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
|
||||
Host5 + 1, 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
|
||||
}
|
||||
|
||||
// H2D tests
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DToPointer) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(
|
||||
ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(
|
||||
ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
|
||||
}
|
||||
|
||||
// D2D tests
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2DByCount) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2D) {
|
||||
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4));
|
||||
for (int I = 0; I < 4; ++I) {
|
||||
EXPECT_EQ(HostA5[I + 1], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopySliceD2D) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB7[I + 2]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopyD2DSlice) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
|
||||
|
||||
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
|
||||
EXPECT_NO_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,478 +0,0 @@
|
||||
//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file contains the unit tests for Executor code.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "streamexecutor/Executor.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace se = ::streamexecutor;
|
||||
|
||||
class MockPlatformExecutor : public se::PlatformExecutor {
|
||||
public:
|
||||
~MockPlatformExecutor() override {}
|
||||
|
||||
std::string getName() const override { return "MockPlatformExecutor"; }
|
||||
|
||||
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
|
||||
createStream() override {
|
||||
return se::make_error("not implemented");
|
||||
}
|
||||
|
||||
se::Expected<se::GlobalDeviceMemoryBase>
|
||||
allocateDeviceMemory(size_t ByteCount) override {
|
||||
return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
|
||||
}
|
||||
|
||||
se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
|
||||
std::free(const_cast<void *>(Memory.getHandle()));
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
|
||||
return std::malloc(ByteCount);
|
||||
}
|
||||
|
||||
se::Error freeHostMemory(void *Memory) override {
|
||||
std::free(Memory);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error registerHostMemory(void *, size_t) override {
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error unregisterHostMemory(void *) override {
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
|
||||
size_t SrcByteOffset, void *HostDst,
|
||||
size_t DstByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
|
||||
static_cast<const char *>(DeviceSrc.getHandle()) +
|
||||
SrcByteOffset,
|
||||
ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
|
||||
se::GlobalDeviceMemoryBase DeviceDst,
|
||||
size_t DstByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
|
||||
DstByteOffset,
|
||||
static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
|
||||
se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
|
||||
size_t DstByteOffset,
|
||||
const se::GlobalDeviceMemoryBase &DeviceSrc,
|
||||
size_t SrcByteOffset,
|
||||
size_t ByteCount) override {
|
||||
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
|
||||
DstByteOffset,
|
||||
static_cast<const char *>(DeviceSrc.getHandle()) +
|
||||
SrcByteOffset,
|
||||
ByteCount);
|
||||
return se::Error::success();
|
||||
}
|
||||
};
|
||||
|
||||
/// Test fixture to hold objects used by tests.
|
||||
class ExecutorTest : public ::testing::Test {
|
||||
public:
|
||||
ExecutorTest()
|
||||
: HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
|
||||
HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
|
||||
DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
|
||||
DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
|
||||
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
|
||||
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
|
||||
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
|
||||
Executor(&PExecutor) {}
|
||||
|
||||
// Device memory is backed by host arrays.
|
||||
int HostA5[5];
|
||||
int HostB5[5];
|
||||
int HostA7[7];
|
||||
int HostB7[7];
|
||||
se::GlobalDeviceMemory<int> DeviceA5;
|
||||
se::GlobalDeviceMemory<int> DeviceB5;
|
||||
se::GlobalDeviceMemory<int> DeviceA7;
|
||||
se::GlobalDeviceMemory<int> DeviceB7;
|
||||
|
||||
// Host memory to be used as actual host memory.
|
||||
int Host5[5];
|
||||
int Host7[7];
|
||||
|
||||
MockPlatformExecutor PExecutor;
|
||||
se::Executor Executor;
|
||||
};
|
||||
|
||||
#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
|
||||
#define EXPECT_ERROR(E) \
|
||||
do { \
|
||||
se::Error E__ = E; \
|
||||
EXPECT_TRUE(static_cast<bool>(E__)); \
|
||||
consumeError(std::move(E__)); \
|
||||
} while (false)
|
||||
|
||||
using llvm::ArrayRef;
|
||||
using llvm::MutableArrayRef;
|
||||
|
||||
TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) {
|
||||
se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
|
||||
Executor.allocateDeviceMemory<int>(10);
|
||||
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
|
||||
EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, AllocateAndFreeHostMemory) {
|
||||
se::Expected<int *> MaybeMemory = Executor.allocateHostMemory<int>(10);
|
||||
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
|
||||
EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) {
|
||||
std::vector<int> Data(10);
|
||||
EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10));
|
||||
EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data()));
|
||||
}
|
||||
|
||||
// D2H tests
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HToPointer) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(
|
||||
DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
|
||||
MutableArrayRef<int>(Host5), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(),
|
||||
MutableArrayRef<int>(Host5), 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host7), 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host5), 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
|
||||
MutableArrayRef<int>(Host5)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA7[I + 1], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
|
||||
MutableArrayRef<int>(Host5)));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
|
||||
MutableArrayRef<int>(Host7)));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
|
||||
Host5 + 1, 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
|
||||
}
|
||||
|
||||
// H2D tests
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DToPointer) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
|
||||
ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
|
||||
for (int I = 1; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
|
||||
ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostB5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], Host5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
|
||||
}
|
||||
|
||||
// D2D tests
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2DByCount) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2D) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopySliceD2DByCount) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1),
|
||||
DeviceB5, 4));
|
||||
for (int I = 0; I < 4; ++I) {
|
||||
EXPECT_EQ(HostA5[I + 1], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1),
|
||||
DeviceB7, 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopySliceD2D) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) {
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
|
||||
DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB7[I + 2]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
|
||||
DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopyD2DSlice) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
|
||||
|
||||
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
EXPECT_EQ(HostA7[I], HostB7[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, SyncCopySliceD2DSlice) {
|
||||
EXPECT_NO_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
|
||||
for (int I = 0; I < 5; ++I) {
|
||||
EXPECT_EQ(HostA5[I], HostB5[I]);
|
||||
}
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
|
||||
|
||||
EXPECT_ERROR(
|
||||
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "streamexecutor/Executor.h"
|
||||
#include "streamexecutor/Device.h"
|
||||
#include "streamexecutor/Kernel.h"
|
||||
#include "streamexecutor/KernelSpec.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
@@ -27,7 +27,7 @@ namespace {
|
||||
|
||||
namespace se = ::streamexecutor;
|
||||
|
||||
// An Executor that returns a dummy KernelInterface.
|
||||
// A Device that returns a dummy KernelInterface.
|
||||
//
|
||||
// During construction it creates a unique_ptr to a dummy KernelInterface and it
|
||||
// also stores a separate copy of the raw pointer that is stored by that
|
||||
@@ -39,10 +39,10 @@ namespace se = ::streamexecutor;
|
||||
// object. The raw pointer copy can then be used to identify the unique_ptr in
|
||||
// its new location (by comparing the raw pointer with unique_ptr::get), to
|
||||
// verify that the unique_ptr ended up where it was supposed to be.
|
||||
class MockExecutor : public se::Executor {
|
||||
class MockDevice : public se::Device {
|
||||
public:
|
||||
MockExecutor()
|
||||
: se::Executor(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
|
||||
MockDevice()
|
||||
: se::Device(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
|
||||
Raw(Unique.get()) {}
|
||||
|
||||
// Moves the unique pointer into the returned se::Expected instance.
|
||||
@@ -51,7 +51,7 @@ public:
|
||||
// out.
|
||||
se::Expected<std::unique_ptr<se::KernelInterface>>
|
||||
getKernelImplementation(const se::MultiKernelLoaderSpec &) override {
|
||||
assert(Unique && "MockExecutor getKernelImplementation should not be "
|
||||
assert(Unique && "MockDevice getKernelImplementation should not be "
|
||||
"called more than once");
|
||||
return std::move(Unique);
|
||||
}
|
||||
@@ -79,15 +79,15 @@ TYPED_TEST_CASE(GetImplementationTest, GetImplementationTypes);
|
||||
|
||||
// Tests that the kernel create functions properly fetch the implementation
|
||||
// pointers for the kernel objects they construct from the passed-in
|
||||
// Executor objects.
|
||||
// Device objects.
|
||||
TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) {
|
||||
se::MultiKernelLoaderSpec Spec;
|
||||
MockExecutor MockExecutor;
|
||||
MockDevice Dev;
|
||||
|
||||
auto MaybeKernel = TypeParam::create(&MockExecutor, Spec);
|
||||
auto MaybeKernel = TypeParam::create(&Dev, Spec);
|
||||
EXPECT_TRUE(static_cast<bool>(MaybeKernel));
|
||||
se::KernelInterface *Implementation = MaybeKernel->getImplementation();
|
||||
EXPECT_EQ(MockExecutor.getRaw(), Implementation);
|
||||
EXPECT_EQ(Dev.getRaw(), Implementation);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "streamexecutor/Executor.h"
|
||||
#include "streamexecutor/Device.h"
|
||||
#include "streamexecutor/Kernel.h"
|
||||
#include "streamexecutor/KernelSpec.h"
|
||||
#include "streamexecutor/PlatformInterfaces.h"
|
||||
@@ -26,14 +26,14 @@ namespace {
|
||||
|
||||
namespace se = ::streamexecutor;
|
||||
|
||||
/// Mock PlatformExecutor that performs asynchronous memcpy operations by
|
||||
/// Mock PlatformDevice that performs asynchronous memcpy operations by
|
||||
/// ignoring the stream argument and calling std::memcpy on device memory
|
||||
/// handles.
|
||||
class MockPlatformExecutor : public se::PlatformExecutor {
|
||||
class MockPlatformDevice : public se::PlatformDevice {
|
||||
public:
|
||||
~MockPlatformExecutor() override {}
|
||||
~MockPlatformDevice() override {}
|
||||
|
||||
std::string getName() const override { return "MockPlatformExecutor"; }
|
||||
std::string getName() const override { return "MockPlatformDevice"; }
|
||||
|
||||
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
|
||||
createStream() override {
|
||||
@@ -83,7 +83,7 @@ public:
|
||||
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
|
||||
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
|
||||
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
|
||||
Stream(llvm::make_unique<se::PlatformStreamHandle>(&PExecutor)) {}
|
||||
Stream(llvm::make_unique<se::PlatformStreamHandle>(&PDevice)) {}
|
||||
|
||||
protected:
|
||||
// Device memory is backed by host arrays.
|
||||
@@ -100,7 +100,7 @@ protected:
|
||||
int Host5[5];
|
||||
int Host7[7];
|
||||
|
||||
MockPlatformExecutor PExecutor;
|
||||
MockPlatformDevice PDevice;
|
||||
se::Stream Stream;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user