[StreamExecutor] Rename Executor to Device

Summary: This more clearly describes what the class is.

Reviewers: jlebar

Subscribers: jprice, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23851

llvm-svn: 279669
This commit is contained in:
Jason Henline
2016-08-24 21:31:53 +00:00
parent 571a647853
commit bcc77b6249
14 changed files with 575 additions and 580 deletions

View File

@@ -1,4 +1,4 @@
//===-- Executor.h - The Executor class -------------------------*- C++ -*-===//
//===-- Device.h - The Device class -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// The Executor class which represents a single device of a specific platform.
/// The Device class which represents a single device of a specific platform.
///
//===----------------------------------------------------------------------===//
#ifndef STREAMEXECUTOR_EXECUTOR_H
#define STREAMEXECUTOR_EXECUTOR_H
#ifndef STREAMEXECUTOR_DEVICE_H
#define STREAMEXECUTOR_DEVICE_H
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -24,10 +24,10 @@ namespace streamexecutor {
class KernelInterface;
class Stream;
class Executor {
class Device {
public:
explicit Executor(PlatformExecutor *PExecutor);
virtual ~Executor();
explicit Device(PlatformDevice *PDevice);
virtual ~Device();
/// Gets the kernel implementation for the underlying platform.
virtual Expected<std::unique_ptr<KernelInterface>>
@@ -42,7 +42,7 @@ public:
template <typename T>
Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
Expected<GlobalDeviceMemoryBase> MaybeBase =
PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
PDevice->allocateDeviceMemory(ElementCount * sizeof(T));
if (!MaybeBase)
return MaybeBase.takeError();
return GlobalDeviceMemory<T>(*MaybeBase);
@@ -50,7 +50,7 @@ public:
/// Frees memory previously allocated with allocateDeviceMemory.
template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
return PExecutor->freeDeviceMemory(Memory);
return PDevice->freeDeviceMemory(Memory);
}
/// Allocates an array of ElementCount entries of type T in host memory.
@@ -59,7 +59,7 @@ public:
/// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
Expected<void *> MaybeMemory =
PExecutor->allocateHostMemory(ElementCount * sizeof(T));
PDevice->allocateHostMemory(ElementCount * sizeof(T));
if (!MaybeMemory)
return MaybeMemory.takeError();
return static_cast<T *>(*MaybeMemory);
@@ -67,7 +67,7 @@ public:
/// Frees memory previously allocated with allocateHostMemory.
template <typename T> Error freeHostMemory(T *Memory) {
return PExecutor->freeHostMemory(Memory);
return PDevice->freeHostMemory(Memory);
}
/// Registers a previously allocated host array of type T for asynchronous
@@ -77,15 +77,15 @@ public:
/// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T>
Error registerHostMemory(T *Memory, size_t ElementCount) {
return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T));
}
/// Unregisters host memory previously registered by registerHostMemory.
template <typename T> Error unregisterHostMemory(T *Memory) {
return PExecutor->unregisterHostMemory(Memory);
return PDevice->unregisterHostMemory(Memory);
}
/// \anchor ExecutorHostSyncCopyGroup
/// \anchor DeviceHostSyncCopyGroup
/// \name Host-synchronous device memory copying functions
///
/// These methods block the calling host thread while copying data to or from
@@ -125,9 +125,9 @@ public:
return make_error(
"copying too many elements, " + llvm::Twine(ElementCount) +
", to a host array of element count " + llvm::Twine(Dst.size()));
return PExecutor->synchronousCopyD2H(
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
ElementCount * sizeof(T));
return PDevice->synchronousCopyD2H(Src.getBaseMemory(),
Src.getElementOffset() * sizeof(T),
Dst.data(), 0, ElementCount * sizeof(T));
}
template <typename T>
@@ -179,9 +179,9 @@ public:
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
}
template <typename T>
@@ -234,7 +234,7 @@ public:
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
return PExecutor->synchronousCopyD2D(
return PDevice->synchronousCopyD2D(
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
@@ -292,9 +292,9 @@ public:
///@} End host-synchronous device memory copying functions
private:
PlatformExecutor *PExecutor;
PlatformDevice *PDevice;
};
} // namespace streamexecutor
#endif // STREAMEXECUTOR_EXECUTOR_H
#endif // STREAMEXECUTOR_DEVICE_H

View File

@@ -54,13 +54,13 @@
/// function as follows:
/// \code
/// namespace ccn = compiler_cuda_namespace;
/// // Assumes Executor is a pointer to the StreamExecutor on which to
/// // launch the kernel.
/// // Assumes Device is a pointer to the Device on which to launch the
/// // kernel.
/// //
/// // See KernelSpec.h for details on how the compiler can create a
/// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
/// Expected<ccn::SaxpyKernel> MaybeKernel =
/// ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec);
/// ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec);
/// if (!MaybeKernel) { /* Handle error */ }
/// ccn::SaxpyKernel SaxpyKernel = *MaybeKernel;
/// Launch(SaxpyKernel, A, X, Y);
@@ -84,7 +84,7 @@
namespace streamexecutor {
class Executor;
class Device;
class KernelInterface;
/// The base class for device kernel functions.
@@ -100,13 +100,13 @@ public:
KernelBase &operator=(KernelBase &&) = default;
~KernelBase();
/// Creates a kernel object from an Executor and a MultiKernelLoaderSpec.
/// Creates a kernel object from a Device and a MultiKernelLoaderSpec.
///
/// The Executor knows which platform it belongs to and the
/// The Device knows which platform it belongs to and the
/// MultiKernelLoaderSpec knows how to find the kernel code for different
/// platforms, so the combined information is enough to get the kernel code
/// for the appropriate platform.
static Expected<KernelBase> create(Executor *ParentExecutor,
static Expected<KernelBase> create(Device *Dev,
const MultiKernelLoaderSpec &Spec);
const std::string &getName() const { return Name; }
@@ -116,11 +116,11 @@ public:
KernelInterface *getImplementation() { return Implementation.get(); }
private:
KernelBase(Executor *ParentExecutor, const std::string &Name,
KernelBase(Device *Dev, const std::string &Name,
const std::string &DemangledName,
std::unique_ptr<KernelInterface> Implementation);
Executor *ParentExecutor;
Device *TheDevice;
std::string Name;
std::string DemangledName;
std::unique_ptr<KernelInterface> Implementation;
@@ -136,9 +136,9 @@ public:
TypedKernel &operator=(TypedKernel &&) = default;
/// Parameters here have the same meaning as in KernelBase::create.
static Expected<TypedKernel> create(Executor *ParentExecutor,
static Expected<TypedKernel> create(Device *Dev,
const MultiKernelLoaderSpec &Spec) {
auto MaybeBase = KernelBase::create(ParentExecutor, Spec);
auto MaybeBase = KernelBase::create(Dev, Spec);
if (!MaybeBase) {
return MaybeBase.takeError();
}

View File

@@ -31,7 +31,7 @@
namespace streamexecutor {
class PlatformExecutor;
class PlatformDevice;
/// Methods supported by device kernel function objects on all platforms.
class KernelInterface {
@@ -41,15 +41,14 @@ class KernelInterface {
/// Platform-specific stream handle.
class PlatformStreamHandle {
public:
explicit PlatformStreamHandle(PlatformExecutor *PExecutor)
: PExecutor(PExecutor) {}
explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {}
virtual ~PlatformStreamHandle();
PlatformExecutor *getExecutor() { return PExecutor; }
PlatformDevice *getDevice() { return PDevice; }
private:
PlatformExecutor *PExecutor;
PlatformDevice *PDevice;
};
/// Raw executor methods that must be implemented by each platform.
@@ -57,11 +56,11 @@ private:
/// This class defines the platform interface that supports executing work on a
/// device.
///
/// The public Executor and Stream classes have the type-safe versions of the
/// The public Device and Stream classes have the type-safe versions of the
/// functions in this interface.
class PlatformExecutor {
class PlatformDevice {
public:
virtual ~PlatformExecutor();
virtual ~PlatformDevice();
virtual std::string getName() const = 0;

View File

@@ -12,19 +12,18 @@
/// A Stream instance represents a queue of sequential, host-asynchronous work
/// to be performed on a device.
///
/// To enqueue work on a device, first create a Executor instance for a
/// given device and then use that Executor to create a Stream instance.
/// The Stream instance will perform its work on the device managed by the
/// Executor that created it.
/// To enqueue work on a device, first create a Device instance then use that
/// Device to create a Stream instance. The Stream instance will perform its
/// work on the device managed by the Device object that created it.
///
/// The various "then" methods of the Stream object, such as thenCopyH2D and
/// thenLaunch, may be used to enqueue work on the Stream, and the
/// blockHostUntilDone() method may be used to block the host code until the
/// Stream has completed all its work.
///
/// Multiple Stream instances can be created for the same Executor. This
/// allows several independent streams of computation to be performed
/// simultaneously on a single device.
/// Multiple Stream instances can be created for the same Device. This allows
/// several independent streams of computation to be performed simultaneously on
/// a single device.
///
//===----------------------------------------------------------------------===//
@@ -94,8 +93,8 @@ public:
const ParameterTs &... Arguments) {
auto ArgumentArray =
make_kernel_argument_pack<ParameterTs...>(Arguments...);
setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize,
Kernel, ArgumentArray));
setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize,
Kernel, ArgumentArray));
return *this;
}
@@ -105,13 +104,13 @@ public:
/// return without waiting for the operation to complete.
///
/// Any host memory used as a source or destination for one of these
/// operations must be allocated with Executor::allocateHostMemory or
/// registered with Executor::registerHostMemory. Otherwise, the enqueuing
/// operation may block until the copy operation is fully complete.
/// operations must be allocated with Device::allocateHostMemory or registered
/// with Device::registerHostMemory. Otherwise, the enqueuing operation may
/// block until the copy operation is fully complete.
///
/// The arguments and bounds checking for these methods match the API of the
/// \ref ExecutorHostSyncCopyGroup
/// "host-synchronous device memory copying functions" of Executor.
/// \ref DeviceHostSyncCopyGroup
/// "host-synchronous device memory copying functions" of Device.
///@{
template <typename T>
@@ -125,9 +124,9 @@ public:
setError("copying too many elements, " + llvm::Twine(ElementCount) +
", to a host array of element count " + llvm::Twine(Dst.size()));
else
setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
Src.getElementOffset() * sizeof(T),
Dst.data(), 0, ElementCount * sizeof(T)));
setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
Src.getElementOffset() * sizeof(T), Dst.data(),
0, ElementCount * sizeof(T)));
return *this;
}
@@ -182,7 +181,7 @@ public:
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
else
setError(PExecutor->copyH2D(
setError(PDevice->copyH2D(
ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
return *this;
@@ -238,7 +237,7 @@ public:
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
else
setError(PExecutor->copyD2D(
setError(PDevice->copyD2D(
ThePlatformStream.get(), Src.getBaseMemory(),
Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
@@ -322,8 +321,8 @@ private:
ErrorMessage = Message.str();
}
/// The PlatformExecutor that supports the operations of this stream.
PlatformExecutor *PExecutor;
/// The PlatformDevice that supports the operations of this stream.
PlatformDevice *PDevice;
/// The platform-specific stream handle for this instance.
std::unique_ptr<PlatformStreamHandle> ThePlatformStream;

View File

@@ -6,7 +6,7 @@ add_library(
add_library(
streamexecutor
$<TARGET_OBJECTS:utils>
Executor.cpp
Device.cpp
Kernel.cpp
KernelSpec.cpp
PackedKernelArgumentArray.cpp

View File

@@ -1,4 +1,4 @@
//===-- Executor.cpp - Executor implementation ----------------------------===//
//===-- Device.cpp - Device implementation --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,11 +8,11 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// Implementation of Executor class internals.
/// Implementation of Device class internals.
///
//===----------------------------------------------------------------------===//
#include "streamexecutor/Executor.h"
#include "streamexecutor/Device.h"
#include <cassert>
@@ -23,17 +23,17 @@
namespace streamexecutor {
Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {}
Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
Executor::~Executor() = default;
Device::~Device() = default;
Expected<std::unique_ptr<Stream>> Executor::createStream() {
Expected<std::unique_ptr<Stream>> Device::createStream() {
Expected<std::unique_ptr<PlatformStreamHandle>> MaybePlatformStream =
PExecutor->createStream();
PDevice->createStream();
if (!MaybePlatformStream) {
return MaybePlatformStream.takeError();
}
assert((*MaybePlatformStream)->getExecutor() == PExecutor &&
assert((*MaybePlatformStream)->getDevice() == PDevice &&
"an executor created a stream with a different stored executor");
return llvm::make_unique<Stream>(std::move(*MaybePlatformStream));
}

View File

@@ -13,31 +13,31 @@
//===----------------------------------------------------------------------===//
#include "streamexecutor/Kernel.h"
#include "streamexecutor/Executor.h"
#include "streamexecutor/Device.h"
#include "streamexecutor/PlatformInterfaces.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
namespace streamexecutor {
KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name,
KernelBase::KernelBase(Device *Dev, const std::string &Name,
const std::string &DemangledName,
std::unique_ptr<KernelInterface> Implementation)
: ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName),
: TheDevice(Dev), Name(Name), DemangledName(DemangledName),
Implementation(std::move(Implementation)) {}
KernelBase::~KernelBase() = default;
Expected<KernelBase> KernelBase::create(Executor *ParentExecutor,
Expected<KernelBase> KernelBase::create(Device *Dev,
const MultiKernelLoaderSpec &Spec) {
auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec);
auto MaybeImplementation = Dev->getKernelImplementation(Spec);
if (!MaybeImplementation) {
return MaybeImplementation.takeError();
}
std::string Name = Spec.getKernelName();
std::string DemangledName =
llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
KernelBase Instance(ParentExecutor, Name, DemangledName,
KernelBase Instance(Dev, Name, DemangledName,
std::move(*MaybeImplementation));
return std::move(Instance);
}

View File

@@ -18,6 +18,6 @@ namespace streamexecutor {
PlatformStreamHandle::~PlatformStreamHandle() = default;
PlatformExecutor::~PlatformExecutor() = default;
PlatformDevice::~PlatformDevice() = default;
} // namespace streamexecutor

View File

@@ -17,8 +17,7 @@
namespace streamexecutor {
Stream::Stream(std::unique_ptr<PlatformStreamHandle> PStream)
: PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) {
}
: PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {}
Stream::~Stream() = default;

View File

@@ -1,12 +1,12 @@
add_executable(
executor_test
ExecutorTest.cpp)
device_test
DeviceTest.cpp)
target_link_libraries(
executor_test
device_test
streamexecutor
${GTEST_BOTH_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT})
add_test(ExecutorTest executor_test)
add_test(DeviceTest device_test)
add_executable(
kernel_test

View File

@@ -0,0 +1,476 @@
//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the unit tests for Device code.
///
//===----------------------------------------------------------------------===//
#include <cstdlib>
#include <cstring>
#include "streamexecutor/Device.h"
#include "streamexecutor/PlatformInterfaces.h"
#include "gtest/gtest.h"
namespace {
namespace se = ::streamexecutor;
class MockPlatformDevice : public se::PlatformDevice {
public:
~MockPlatformDevice() override {}
std::string getName() const override { return "MockPlatformDevice"; }
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
createStream() override {
return se::make_error("not implemented");
}
se::Expected<se::GlobalDeviceMemoryBase>
allocateDeviceMemory(size_t ByteCount) override {
return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
}
se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
std::free(const_cast<void *>(Memory.getHandle()));
return se::Error::success();
}
se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
return std::malloc(ByteCount);
}
se::Error freeHostMemory(void *Memory) override {
std::free(Memory);
return se::Error::success();
}
se::Error registerHostMemory(void *, size_t) override {
return se::Error::success();
}
se::Error unregisterHostMemory(void *) override {
return se::Error::success();
}
se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
size_t SrcByteOffset, void *HostDst,
size_t DstByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
static_cast<const char *>(DeviceSrc.getHandle()) +
SrcByteOffset,
ByteCount);
return se::Error::success();
}
se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
se::GlobalDeviceMemoryBase DeviceDst,
size_t DstByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
DstByteOffset,
static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
return se::Error::success();
}
se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
size_t DstByteOffset,
const se::GlobalDeviceMemoryBase &DeviceSrc,
size_t SrcByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
DstByteOffset,
static_cast<const char *>(DeviceSrc.getHandle()) +
SrcByteOffset,
ByteCount);
return se::Error::success();
}
};
/// Test fixture to hold objects used by tests.
class DeviceTest : public ::testing::Test {
public:
DeviceTest()
: HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
Device(&PDevice) {}
// Device memory is backed by host arrays.
int HostA5[5];
int HostB5[5];
int HostA7[7];
int HostB7[7];
se::GlobalDeviceMemory<int> DeviceA5;
se::GlobalDeviceMemory<int> DeviceB5;
se::GlobalDeviceMemory<int> DeviceA7;
se::GlobalDeviceMemory<int> DeviceB7;
// Host memory to be used as actual host memory.
int Host5[5];
int Host7[7];
MockPlatformDevice PDevice;
se::Device Device;
};
#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
#define EXPECT_ERROR(E) \
do { \
se::Error E__ = E; \
EXPECT_TRUE(static_cast<bool>(E__)); \
consumeError(std::move(E__)); \
} while (false)
using llvm::ArrayRef;
using llvm::MutableArrayRef;
TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
Device.allocateDeviceMemory<int>(10);
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory));
}
TEST_F(DeviceTest, AllocateAndFreeHostMemory) {
se::Expected<int *> MaybeMemory = Device.allocateHostMemory<int>(10);
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory));
}
TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
std::vector<int> Data(10);
EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10));
EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data()));
}
// D2H tests
TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(
Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
EXPECT_ERROR(
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
EXPECT_ERROR(
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
}
TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
EXPECT_ERROR(
Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
}
TEST_F(DeviceTest, SyncCopyD2HToPointer) {
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
}
TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
EXPECT_NO_ERROR(Device.synchronousCopyD2H(
DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
MutableArrayRef<int>(Host5), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
MutableArrayRef<int>(Host5), 7));
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host7), 7));
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host5), 7));
}
TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
MutableArrayRef<int>(Host5)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA7[I + 1], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
MutableArrayRef<int>(Host5)));
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host7)));
}
TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
Host5 + 1, 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
}
// H2D tests
TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
}
TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
}
TEST_F(DeviceTest, SyncCopyH2DToPointer) {
EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
}
TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
EXPECT_NO_ERROR(Device.synchronousCopyH2D(
ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(Device.synchronousCopyH2D(
ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
EXPECT_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
EXPECT_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
}
TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
EXPECT_NO_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
EXPECT_ERROR(
Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
}
TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
}
// D2D tests
TEST_F(DeviceTest, SyncCopyD2DByCount) {
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
}
TEST_F(DeviceTest, SyncCopyD2D) {
EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
}
TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4));
for (int I = 0; I < 4; ++I) {
EXPECT_EQ(HostA5[I + 1], HostB5[I]);
}
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
}
TEST_F(DeviceTest, SyncCopySliceD2D) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA7[I], HostB5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
}
TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB7[I + 2]);
}
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
}
TEST_F(DeviceTest, SyncCopyD2DSlice) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB7[I]);
}
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
}
TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
}
TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
EXPECT_NO_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
EXPECT_ERROR(
Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
}
} // namespace

View File

@@ -1,478 +0,0 @@
//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the unit tests for Executor code.
///
//===----------------------------------------------------------------------===//
#include <cstdlib>
#include <cstring>
#include "streamexecutor/Executor.h"
#include "streamexecutor/PlatformInterfaces.h"
#include "gtest/gtest.h"
namespace {
namespace se = ::streamexecutor;
class MockPlatformExecutor : public se::PlatformExecutor {
public:
~MockPlatformExecutor() override {}
std::string getName() const override { return "MockPlatformExecutor"; }
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
createStream() override {
return se::make_error("not implemented");
}
se::Expected<se::GlobalDeviceMemoryBase>
allocateDeviceMemory(size_t ByteCount) override {
return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
}
se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
std::free(const_cast<void *>(Memory.getHandle()));
return se::Error::success();
}
se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
return std::malloc(ByteCount);
}
se::Error freeHostMemory(void *Memory) override {
std::free(Memory);
return se::Error::success();
}
se::Error registerHostMemory(void *, size_t) override {
return se::Error::success();
}
se::Error unregisterHostMemory(void *) override {
return se::Error::success();
}
se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
size_t SrcByteOffset, void *HostDst,
size_t DstByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
static_cast<const char *>(DeviceSrc.getHandle()) +
SrcByteOffset,
ByteCount);
return se::Error::success();
}
se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
se::GlobalDeviceMemoryBase DeviceDst,
size_t DstByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
DstByteOffset,
static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
return se::Error::success();
}
se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
size_t DstByteOffset,
const se::GlobalDeviceMemoryBase &DeviceSrc,
size_t SrcByteOffset,
size_t ByteCount) override {
std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
DstByteOffset,
static_cast<const char *>(DeviceSrc.getHandle()) +
SrcByteOffset,
ByteCount);
return se::Error::success();
}
};
/// Test fixture to hold objects used by tests.
class ExecutorTest : public ::testing::Test {
public:
ExecutorTest()
: HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
Executor(&PExecutor) {}
// Device memory is backed by host arrays.
int HostA5[5];
int HostB5[5];
int HostA7[7];
int HostB7[7];
se::GlobalDeviceMemory<int> DeviceA5;
se::GlobalDeviceMemory<int> DeviceB5;
se::GlobalDeviceMemory<int> DeviceA7;
se::GlobalDeviceMemory<int> DeviceB7;
// Host memory to be used as actual host memory.
int Host5[5];
int Host7[7];
MockPlatformExecutor PExecutor;
se::Executor Executor;
};
#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
#define EXPECT_ERROR(E) \
do { \
se::Error E__ = E; \
EXPECT_TRUE(static_cast<bool>(E__)); \
consumeError(std::move(E__)); \
} while (false)
using llvm::ArrayRef;
using llvm::MutableArrayRef;
TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) {
se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
Executor.allocateDeviceMemory<int>(10);
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory));
}
TEST_F(ExecutorTest, AllocateAndFreeHostMemory) {
se::Expected<int *> MaybeMemory = Executor.allocateHostMemory<int>(10);
EXPECT_TRUE(static_cast<bool>(MaybeMemory));
EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory));
}
TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) {
std::vector<int> Data(10);
EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10));
EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data()));
}
// D2H tests
TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(
Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
EXPECT_ERROR(
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
EXPECT_ERROR(
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
}
TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
EXPECT_ERROR(
Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
}
TEST_F(ExecutorTest, SyncCopyD2HToPointer) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7));
}
TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(
DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
MutableArrayRef<int>(Host5), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(),
MutableArrayRef<int>(Host5), 7));
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host7), 7));
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host5), 7));
}
TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
MutableArrayRef<int>(Host5)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA7[I + 1], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
MutableArrayRef<int>(Host5)));
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
MutableArrayRef<int>(Host7)));
}
TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
Host5 + 1, 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
}
// H2D tests
TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) {
EXPECT_NO_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
}
TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) {
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
}
TEST_F(ExecutorTest, SyncCopyH2DToPointer) {
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7));
}
TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) {
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
for (int I = 1; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostB5[I], Host5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
EXPECT_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
EXPECT_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
}
TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) {
EXPECT_NO_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
EXPECT_ERROR(
Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
}
TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) {
EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], Host5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
}
// D2D tests
TEST_F(ExecutorTest, SyncCopyD2DByCount) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
}
TEST_F(ExecutorTest, SyncCopyD2D) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7));
}
TEST_F(ExecutorTest, SyncCopySliceD2DByCount) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1),
DeviceB5, 4));
for (int I = 0; I < 4; ++I) {
EXPECT_EQ(HostA5[I + 1], HostB5[I]);
}
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1),
DeviceB7, 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
}
TEST_F(ExecutorTest, SyncCopySliceD2D) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA7[I], HostB5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
}
TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) {
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB7[I + 2]);
}
EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
}
TEST_F(ExecutorTest, SyncCopyD2DSlice) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB7[I]);
}
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
}
TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_NO_ERROR(
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
for (int I = 0; I < 2; ++I) {
EXPECT_EQ(HostA7[I], HostB7[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
}
TEST_F(ExecutorTest, SyncCopySliceD2DSlice) {
EXPECT_NO_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
for (int I = 0; I < 5; ++I) {
EXPECT_EQ(HostA5[I], HostB5[I]);
}
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
EXPECT_ERROR(
Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
}
} // namespace

View File

@@ -14,7 +14,7 @@
#include <cassert>
#include "streamexecutor/Executor.h"
#include "streamexecutor/Device.h"
#include "streamexecutor/Kernel.h"
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -27,7 +27,7 @@ namespace {
namespace se = ::streamexecutor;
// An Executor that returns a dummy KernelInterface.
// A Device that returns a dummy KernelInterface.
//
// During construction it creates a unique_ptr to a dummy KernelInterface and it
// also stores a separate copy of the raw pointer that is stored by that
@@ -39,10 +39,10 @@ namespace se = ::streamexecutor;
// object. The raw pointer copy can then be used to identify the unique_ptr in
// its new location (by comparing the raw pointer with unique_ptr::get), to
// verify that the unique_ptr ended up where it was supposed to be.
class MockExecutor : public se::Executor {
class MockDevice : public se::Device {
public:
MockExecutor()
: se::Executor(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
MockDevice()
: se::Device(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
Raw(Unique.get()) {}
// Moves the unique pointer into the returned se::Expected instance.
@@ -51,7 +51,7 @@ public:
// out.
se::Expected<std::unique_ptr<se::KernelInterface>>
getKernelImplementation(const se::MultiKernelLoaderSpec &) override {
assert(Unique && "MockExecutor getKernelImplementation should not be "
assert(Unique && "MockDevice getKernelImplementation should not be "
"called more than once");
return std::move(Unique);
}
@@ -79,15 +79,15 @@ TYPED_TEST_CASE(GetImplementationTest, GetImplementationTypes);
// Tests that the kernel create functions properly fetch the implementation
// pointers for the kernel objects they construct from the passed-in
// Executor objects.
// Device objects.
TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) {
se::MultiKernelLoaderSpec Spec;
MockExecutor MockExecutor;
MockDevice Dev;
auto MaybeKernel = TypeParam::create(&MockExecutor, Spec);
auto MaybeKernel = TypeParam::create(&Dev, Spec);
EXPECT_TRUE(static_cast<bool>(MaybeKernel));
se::KernelInterface *Implementation = MaybeKernel->getImplementation();
EXPECT_EQ(MockExecutor.getRaw(), Implementation);
EXPECT_EQ(Dev.getRaw(), Implementation);
}
} // namespace

View File

@@ -14,7 +14,7 @@
#include <cstring>
#include "streamexecutor/Executor.h"
#include "streamexecutor/Device.h"
#include "streamexecutor/Kernel.h"
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -26,14 +26,14 @@ namespace {
namespace se = ::streamexecutor;
/// Mock PlatformExecutor that performs asynchronous memcpy operations by
/// Mock PlatformDevice that performs asynchronous memcpy operations by
/// ignoring the stream argument and calling std::memcpy on device memory
/// handles.
class MockPlatformExecutor : public se::PlatformExecutor {
class MockPlatformDevice : public se::PlatformDevice {
public:
~MockPlatformExecutor() override {}
~MockPlatformDevice() override {}
std::string getName() const override { return "MockPlatformExecutor"; }
std::string getName() const override { return "MockPlatformDevice"; }
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
createStream() override {
@@ -83,7 +83,7 @@ public:
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
Stream(llvm::make_unique<se::PlatformStreamHandle>(&PExecutor)) {}
Stream(llvm::make_unique<se::PlatformStreamHandle>(&PDevice)) {}
protected:
// Device memory is backed by host arrays.
@@ -100,7 +100,7 @@ protected:
int Host5[5];
int Host7[7];
MockPlatformExecutor PExecutor;
MockPlatformDevice PDevice;
se::Stream Stream;
};