Blame - test/gpu_basic.cu - platform/external/eigen

blob: 4298da3bb6e12138cbd3e2e3ccb77e42e2df7c39 [file] [log] [blame]

Yi Kong	2aab794	2022-02-25 16:32:14 +0800	[diff] [blame]	1	// This file is part of Eigen, a lightweight C++ template library
				2	// for linear algebra.
				3	//
				4	// Copyright (C) 2015-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
				5	//
				6	// This Source Code Form is subject to the terms of the Mozilla
				7	// Public License v. 2.0. If a copy of the MPL was not distributed
				8	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
				9
				10	// workaround issue between gcc >= 4.7 and cuda 5.5
				11	#if (defined __GNUC__) && (__GNUC__>4 \|\| __GNUC_MINOR__>=7)
				12	#undef _GLIBCXX_ATOMIC_BUILTINS
				13	#undef _GLIBCXX_USE_INT128
				14	#endif
				15
				16	#define EIGEN_TEST_NO_LONGDOUBLE
				17	#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
				18
				19	#include "main.h"
				20	#include "gpu_common.h"
				21
				22	// Check that dense modules can be properly parsed by nvcc
				23	#include <Eigen/Dense>
				24
				25	// struct Foo{
				26	// EIGEN_DEVICE_FUNC
				27	// void operator()(int i, const float* mats, float* vecs) const {
				28	// using namespace Eigen;
				29	// // Matrix3f M(data);
				30	// // Vector3f x(data+9);
				31	// // Map<Vector3f>(data+9) = M.inverse() * x;
				32	// Matrix3f M(mats+i/16);
				33	// Vector3f x(vecs+i*3);
				34	// // using std::min;
				35	// // using std::sqrt;
				36	// Map<Vector3f>(vecs+i3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() x) / x.x();
				37	// //x = x2 + x.y() x + x * x.maxCoeff() - x / x.sum();
				38	// }
				39	// };
				40
				41	template<typename T>
				42	struct coeff_wise {
				43	EIGEN_DEVICE_FUNC
				44	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				45	{
				46	using namespace Eigen;
				47	T x1(in+i);
				48	T x2(in+i+1);
				49	T x3(in+i+2);
				50	Map<T> res(out+i*T::MaxSizeAtCompileTime);
				51
				52	res.array() += (in[0] * x1 + x2).array() * x3.array();
				53	}
				54	};
				55
				56	template<typename T>
				57	struct complex_sqrt {
				58	EIGEN_DEVICE_FUNC
				59	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				60	{
				61	using namespace Eigen;
				62	typedef typename T::Scalar ComplexType;
				63	typedef typename T::Scalar::value_type ValueType;
				64	const int num_special_inputs = 18;
				65
				66	if (i == 0) {
				67	const ValueType nan = std::numeric_limits<ValueType>::quiet_NaN();
				68	typedef Eigen::Vector<ComplexType, num_special_inputs> SpecialInputs;
				69	SpecialInputs special_in;
				70	special_in.setZero();
				71	int idx = 0;
				72	special_in[idx++] = ComplexType(0, 0);
				73	special_in[idx++] = ComplexType(-0, 0);
				74	special_in[idx++] = ComplexType(0, -0);
				75	special_in[idx++] = ComplexType(-0, -0);
				76	// GCC's fallback sqrt implementation fails for inf inputs.
				77	// It is called when _GLIBCXX_USE_C99_COMPLEX is false or if
				78	// clang includes the GCC header (which temporarily disables
				79	// _GLIBCXX_USE_C99_COMPLEX)
				80	#if !defined(_GLIBCXX_COMPLEX) \|\| \
				81	(_GLIBCXX_USE_C99_COMPLEX && !defined(__CLANG_CUDA_WRAPPERS_COMPLEX))
				82	const ValueType inf = std::numeric_limits<ValueType>::infinity();
				83	special_in[idx++] = ComplexType(1.0, inf);
				84	special_in[idx++] = ComplexType(nan, inf);
				85	special_in[idx++] = ComplexType(1.0, -inf);
				86	special_in[idx++] = ComplexType(nan, -inf);
				87	special_in[idx++] = ComplexType(-inf, 1.0);
				88	special_in[idx++] = ComplexType(inf, 1.0);
				89	special_in[idx++] = ComplexType(-inf, -1.0);
				90	special_in[idx++] = ComplexType(inf, -1.0);
				91	special_in[idx++] = ComplexType(-inf, nan);
				92	special_in[idx++] = ComplexType(inf, nan);
				93	#endif
				94	special_in[idx++] = ComplexType(1.0, nan);
				95	special_in[idx++] = ComplexType(nan, 1.0);
				96	special_in[idx++] = ComplexType(nan, -1.0);
				97	special_in[idx++] = ComplexType(nan, nan);
				98
				99	Map<SpecialInputs> special_out(out);
				100	special_out = special_in.cwiseSqrt();
				101	}
				102
				103	T x1(in + i);
				104	Map<T> res(out + num_special_inputs + i*T::MaxSizeAtCompileTime);
				105	res = x1.cwiseSqrt();
				106	}
				107	};
				108
				109	template<typename T>
				110	struct complex_operators {
				111	EIGEN_DEVICE_FUNC
				112	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				113	{
				114	using namespace Eigen;
				115	typedef typename T::Scalar ComplexType;
				116	typedef typename T::Scalar::value_type ValueType;
				117	const int num_scalar_operators = 24;
				118	const int num_vector_operators = 23; // no unary + operator.
				119	int out_idx = i * (num_scalar_operators + num_vector_operators * T::MaxSizeAtCompileTime);
				120
				121	// Scalar operators.
				122	const ComplexType a = in[i];
				123	const ComplexType b = in[i + 1];
				124
				125	out[out_idx++] = +a;
				126	out[out_idx++] = -a;
				127
				128	out[out_idx++] = a + b;
				129	out[out_idx++] = a + numext::real(b);
				130	out[out_idx++] = numext::real(a) + b;
				131	out[out_idx++] = a - b;
				132	out[out_idx++] = a - numext::real(b);
				133	out[out_idx++] = numext::real(a) - b;
				134	out[out_idx++] = a * b;
				135	out[out_idx++] = a * numext::real(b);
				136	out[out_idx++] = numext::real(a) * b;
				137	out[out_idx++] = a / b;
				138	out[out_idx++] = a / numext::real(b);
				139	out[out_idx++] = numext::real(a) / b;
				140
				141	out[out_idx] = a; out[out_idx++] += b;
				142	out[out_idx] = a; out[out_idx++] -= b;
				143	out[out_idx] = a; out[out_idx++] *= b;
				144	out[out_idx] = a; out[out_idx++] /= b;
				145
				146	const ComplexType true_value = ComplexType(ValueType(1), ValueType(0));
				147	const ComplexType false_value = ComplexType(ValueType(0), ValueType(0));
				148	out[out_idx++] = (a == b ? true_value : false_value);
				149	out[out_idx++] = (a == numext::real(b) ? true_value : false_value);
				150	out[out_idx++] = (numext::real(a) == b ? true_value : false_value);
				151	out[out_idx++] = (a != b ? true_value : false_value);
				152	out[out_idx++] = (a != numext::real(b) ? true_value : false_value);
				153	out[out_idx++] = (numext::real(a) != b ? true_value : false_value);
				154
				155	// Vector versions.
				156	T x1(in + i);
				157	T x2(in + i + 1);
				158	const int res_size = T::MaxSizeAtCompileTime * num_scalar_operators;
				159	const int size = T::MaxSizeAtCompileTime;
				160	int block_idx = 0;
				161
				162	Map<VectorX<ComplexType>> res(out + out_idx, res_size);
				163	res.segment(block_idx, size) = -x1;
				164	block_idx += size;
				165
				166	res.segment(block_idx, size) = x1 + x2;
				167	block_idx += size;
				168	res.segment(block_idx, size) = x1 + x2.real();
				169	block_idx += size;
				170	res.segment(block_idx, size) = x1.real() + x2;
				171	block_idx += size;
				172	res.segment(block_idx, size) = x1 - x2;
				173	block_idx += size;
				174	res.segment(block_idx, size) = x1 - x2.real();
				175	block_idx += size;
				176	res.segment(block_idx, size) = x1.real() - x2;
				177	block_idx += size;
				178	res.segment(block_idx, size) = x1.array() * x2.array();
				179	block_idx += size;
				180	res.segment(block_idx, size) = x1.array() * x2.real().array();
				181	block_idx += size;
				182	res.segment(block_idx, size) = x1.real().array() * x2.array();
				183	block_idx += size;
				184	res.segment(block_idx, size) = x1.array() / x2.array();
				185	block_idx += size;
				186	res.segment(block_idx, size) = x1.array() / x2.real().array();
				187	block_idx += size;
				188	res.segment(block_idx, size) = x1.real().array() / x2.array();
				189	block_idx += size;
				190
				191	res.segment(block_idx, size) = x1; res.segment(block_idx, size) += x2;
				192	block_idx += size;
				193	res.segment(block_idx, size) = x1; res.segment(block_idx, size) -= x2;
				194	block_idx += size;
				195	res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() *= x2.array();
				196	block_idx += size;
				197	res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() /= x2.array();
				198	block_idx += size;
				199
				200	const T true_vector = T::Constant(true_value);
				201	const T false_vector = T::Constant(false_value);
				202	res.segment(block_idx, size) = (x1 == x2 ? true_vector : false_vector);
				203	block_idx += size;
				204	// Mixing types in equality comparison does not work.
				205	// res.segment(block_idx, size) = (x1 == x2.real() ? true_vector : false_vector);
				206	// block_idx += size;
				207	// res.segment(block_idx, size) = (x1.real() == x2 ? true_vector : false_vector);
				208	// block_idx += size;
				209	res.segment(block_idx, size) = (x1 != x2 ? true_vector : false_vector);
				210	block_idx += size;
				211	// res.segment(block_idx, size) = (x1 != x2.real() ? true_vector : false_vector);
				212	// block_idx += size;
				213	// res.segment(block_idx, size) = (x1.real() != x2 ? true_vector : false_vector);
				214	// block_idx += size;
				215	}
				216	};
				217
				218	template<typename T>
				219	struct replicate {
				220	EIGEN_DEVICE_FUNC
				221	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				222	{
				223	using namespace Eigen;
				224	T x1(in+i);
				225	int step = x1.size() * 4;
				226	int stride = 3 * step;
				227
				228	typedef Map<Array<typename T::Scalar,Dynamic,Dynamic> > MapType;
				229	MapType(out+istride+0step, x1.rows()2, x1.cols()2) = x1.replicate(2,2);
				230	MapType(out+istride+1step, x1.rows()3, x1.cols()) = in[i] x1.colwise().replicate(3);
				231	MapType(out+istride+2step, x1.rows(), x1.cols()3) = in[i] x1.rowwise().replicate(3);
				232	}
				233	};
				234
				235	template<typename T>
				236	struct alloc_new_delete {
				237	EIGEN_DEVICE_FUNC
				238	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				239	{
				240	int offset = 2iT::MaxSizeAtCompileTime;
				241	T* x = new T(in + offset);
				242	Eigen::Map<T> u(out + offset);
				243	u = *x;
				244	delete x;
				245
				246	offset += T::MaxSizeAtCompileTime;
				247	T* y = new T[1];
				248	y[0] = T(in + offset);
				249	Eigen::Map<T> v(out + offset);
				250	v = y[0];
				251	delete[] y;
				252	}
				253	};
				254
				255	template<typename T>
				256	struct redux {
				257	EIGEN_DEVICE_FUNC
				258	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				259	{
				260	using namespace Eigen;
				261	int N = 10;
				262	T x1(in+i);
				263	out[i*N+0] = x1.minCoeff();
				264	out[i*N+1] = x1.maxCoeff();
				265	out[i*N+2] = x1.sum();
				266	out[i*N+3] = x1.prod();
				267	out[i*N+4] = x1.matrix().squaredNorm();
				268	out[i*N+5] = x1.matrix().norm();
				269	out[i*N+6] = x1.colwise().sum().maxCoeff();
				270	out[i*N+7] = x1.rowwise().maxCoeff().sum();
				271	out[i*N+8] = x1.matrix().colwise().squaredNorm().sum();
				272	}
				273	};
				274
				275	template<typename T1, typename T2>
				276	struct prod_test {
				277	EIGEN_DEVICE_FUNC
				278	void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
				279	{
				280	using namespace Eigen;
				281	typedef Matrix<typename T1::Scalar, T1::RowsAtCompileTime, T2::ColsAtCompileTime> T3;
				282	T1 x1(in+i);
				283	T2 x2(in+i+1);
				284	Map<T3> res(out+i*T3::MaxSizeAtCompileTime);
				285	res += in[i] * x1 * x2;
				286	}
				287	};
				288
				289	template<typename T1, typename T2>
				290	struct diagonal {
				291	EIGEN_DEVICE_FUNC
				292	void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
				293	{
				294	using namespace Eigen;
				295	T1 x1(in+i);
				296	Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
				297	res += x1.diagonal();
				298	}
				299	};
				300
				301	template<typename T>
				302	struct eigenvalues_direct {
				303	EIGEN_DEVICE_FUNC
				304	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				305	{
				306	using namespace Eigen;
				307	typedef Matrix<typename T::Scalar, T::RowsAtCompileTime, 1> Vec;
				308	T M(in+i);
				309	Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
				310	T A = M*M.adjoint();
				311	SelfAdjointEigenSolver<T> eig;
				312	eig.computeDirect(A);
				313	res = eig.eigenvalues();
				314	}
				315	};
				316
				317	template<typename T>
				318	struct eigenvalues {
				319	EIGEN_DEVICE_FUNC
				320	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				321	{
				322	using namespace Eigen;
				323	typedef Matrix<typename T::Scalar, T::RowsAtCompileTime, 1> Vec;
				324	T M(in+i);
				325	Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
				326	T A = M*M.adjoint();
				327	SelfAdjointEigenSolver<T> eig;
				328	eig.compute(A);
				329	res = eig.eigenvalues();
				330	}
				331	};
				332
				333	template<typename T>
				334	struct matrix_inverse {
				335	EIGEN_DEVICE_FUNC
				336	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				337	{
				338	using namespace Eigen;
				339	T M(in+i);
				340	Map<T> res(out+i*T::MaxSizeAtCompileTime);
				341	res = M.inverse();
				342	}
				343	};
				344
				345	template<typename T>
				346	struct numeric_limits_test {
				347	EIGEN_DEVICE_FUNC
				348	void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
				349	{
				350	EIGEN_UNUSED_VARIABLE(in)
				351	int out_idx = i * 5;
				352	out[out_idx++] = numext::numeric_limits<float>::epsilon();
				353	out[out_idx++] = (numext::numeric_limits<float>::max)();
				354	out[out_idx++] = (numext::numeric_limits<float>::min)();
				355	out[out_idx++] = numext::numeric_limits<float>::infinity();
				356	out[out_idx++] = numext::numeric_limits<float>::quiet_NaN();
				357	}
				358	};
				359
				360	template<typename Type1, typename Type2>
				361	bool verifyIsApproxWithInfsNans(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only
				362	{
				363	if (a.rows() != b.rows()) {
				364	return false;
				365	}
				366	if (a.cols() != b.cols()) {
				367	return false;
				368	}
				369	for (Index r = 0; r < a.rows(); ++r) {
				370	for (Index c = 0; c < a.cols(); ++c) {
				371	if (a(r, c) != b(r, c)
				372	&& !((numext::isnan)(a(r, c)) && (numext::isnan)(b(r, c)))
				373	&& !test_isApprox(a(r, c), b(r, c))) {
				374	return false;
				375	}
				376	}
				377	}
				378	return true;
				379	}
				380
				381	template<typename Kernel, typename Input, typename Output>
				382	void test_with_infs_nans(const Kernel& ker, int n, const Input& in, Output& out)
				383	{
				384	Output out_ref, out_gpu;
				385	#if !defined(EIGEN_GPU_COMPILE_PHASE)
				386	out_ref = out_gpu = out;
				387	#else
				388	EIGEN_UNUSED_VARIABLE(in);
				389	EIGEN_UNUSED_VARIABLE(out);
				390	#endif
				391	run_on_cpu (ker, n, in, out_ref);
				392	run_on_gpu(ker, n, in, out_gpu);
				393	#if !defined(EIGEN_GPU_COMPILE_PHASE)
				394	verifyIsApproxWithInfsNans(out_ref, out_gpu);
				395	#endif
				396	}
				397
				398	EIGEN_DECLARE_TEST(gpu_basic)
				399	{
				400	ei_test_init_gpu();
				401
				402	int nthreads = 100;
				403	Eigen::VectorXf in, out;
				404	Eigen::VectorXcf cfin, cfout;
				405
				406	#if !defined(EIGEN_GPU_COMPILE_PHASE)
				407	int data_size = nthreads * 512;
				408	in.setRandom(data_size);
				409	out.setConstant(data_size, -1);
				410	cfin.setRandom(data_size);
				411	cfout.setConstant(data_size, -1);
				412	#endif
				413
				414	CALL_SUBTEST( run_and_compare_to_gpu(coeff_wise<Vector3f>(), nthreads, in, out) );
				415	CALL_SUBTEST( run_and_compare_to_gpu(coeff_wise<Array44f>(), nthreads, in, out) );
				416
				417	#if !defined(EIGEN_USE_HIP)
				418	// FIXME
				419	// These subtests result in a compile failure on the HIP platform
				420	//
				421	// eigen-upstream/Eigen/src/Core/Replicate.h:61:65: error:
				422	// base class 'internal::dense_xpr_base<Replicate<Array<float, 4, 1, 0, 4, 1>, -1, -1> >::type'
				423	// (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor
				424	CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) );
				425	CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) );
				426
				427	// HIP does not support new/delete on device.
				428	CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
				429	#endif
				430
				431	CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) );
				432	CALL_SUBTEST( run_and_compare_to_gpu(redux<Matrix3f>(), nthreads, in, out) );
				433
				434	CALL_SUBTEST( run_and_compare_to_gpu(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) );
				435	CALL_SUBTEST( run_and_compare_to_gpu(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) );
				436
				437	CALL_SUBTEST( run_and_compare_to_gpu(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
				438	CALL_SUBTEST( run_and_compare_to_gpu(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
				439
				440	CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix2f>(), nthreads, in, out) );
				441	CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix3f>(), nthreads, in, out) );
				442	CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix4f>(), nthreads, in, out) );
				443
				444	CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix3f>(), nthreads, in, out) );
				445	CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix2f>(), nthreads, in, out) );
				446
				447	// Test std::complex.
				448	CALL_SUBTEST( run_and_compare_to_gpu(complex_operators<Vector3cf>(), nthreads, cfin, cfout) );
				449	CALL_SUBTEST( test_with_infs_nans(complex_sqrt<Vector3cf>(), nthreads, cfin, cfout) );
				450
				451	// numeric_limits
				452	CALL_SUBTEST( test_with_infs_nans(numeric_limits_test<Vector3f>(), 1, in, out) );
				453
				454	#if defined(__NVCC__)
				455	// FIXME
				456	// These subtests compiles only with nvcc and fail with HIPCC and clang-cuda
				457	CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix4f>(), nthreads, in, out) );
				458	typedef Matrix<float,6,6> Matrix6f;
				459	CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix6f>(), nthreads, in, out) );
				460	#endif
				461	}