aten/src/ATen/native/TopKImpl.h - platform/external/pytorch - Git at Google

 #pragma once
 #include <ATen/core/TensorAccessor.h>
 #include <ATen/NumericUtils.h>

 namespace at::native {

 #ifdef CPU_CAPABILITY
 inline namespace CPU_CAPABILITY {
 #else
 inline namespace DEFAULT {
 #endif

 // Core topk loop, shared between CPU and QuantizedCPU
 template <typename scalar_t, typename accscalar_t>
 void topk_impl_loop(
     const int64_t mode_values_stride,
     const int64_t mode_indices_stride,
     const int64_t tmp_values_stride,
     const int64_t k,
     const int64_t dim_size,
     const bool largest,
     const bool sorted,
     char** data, const int64_t* strides, const int64_t n) {

   // If k is zero, then output values and indices are empty tensors
   // So iterating over other dims is pointless
   if (k == 0) {
     return;
   }
   using elem_t = std::pair<accscalar_t, int64_t>;
   std::vector<elem_t> queue(dim_size);
   for (const auto i : c10::irange(n)) {
     TensorAccessor<scalar_t, 1> mode_values(
         reinterpret_cast<scalar_t*>(data[0] + i * strides[0]),
         &k, &mode_values_stride);
     TensorAccessor<int64_t, 1> mode_indices(
         reinterpret_cast<int64_t*>(data[1] + i * strides[1]),
         &k, &mode_indices_stride);
     TensorAccessor<const scalar_t, 1> tmp_values(
         reinterpret_cast<scalar_t*>(data[2] + i * strides[2]),
         &dim_size, &tmp_values_stride);

     auto n_2 = dim_size;
     auto use_partial_sort = k * 64 <= n_2;

     for (const auto j : c10::irange(n_2)) {
       queue[j].first = tmp_values[j];
       queue[j].second = j;
     }

     // we want nan to be sorted as top for numpy compatibility
     if (use_partial_sort) {
       if (largest) {
         std::partial_sort(queue.begin(), queue.begin() + k, queue.end(),
           [](const elem_t& x, const elem_t& y) -> bool {
             return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) || (x.first > y.first));
           });
       } else {
         std::partial_sort(queue.begin(), queue.begin() + k, queue.end(),
           [](const elem_t& x, const elem_t& y) -> bool {
             return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) || (x.first < y.first));
           });
       }
     } else {
       if (largest) {
         std::nth_element(queue.begin(), queue.begin() + k - 1, queue.end(),
           [](const elem_t& x, const elem_t& y) -> bool {
             return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) || (x.first > y.first));
           });
         if (sorted) {
           std::sort(queue.begin(), queue.begin() + k - 1,
             [](const elem_t& x, const elem_t& y) -> bool {
               return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) || (x.first > y.first));
             });
         }
       } else {
         std::nth_element(queue.begin(), queue.begin() + k -1, queue.end(),
           [](const elem_t& x, const elem_t& y) -> bool {
             return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) || (x.first < y.first));
           });
         if (sorted) {
           std::sort(queue.begin(), queue.begin() + k -1,
             [](const elem_t& x, const elem_t& y) -> bool {
               return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) || (x.first < y.first));
             });
         }
       }
     }

     for (const auto j : c10::irange(k)) {
       mode_values[j] = queue[j].first;
       mode_indices[j] = queue[j].second;
     }
   }
 }

 } // namespace CPU_CAPABILITY
 } // namespace at::native
	#pragma once
	#include <ATen/core/TensorAccessor.h>
	#include <ATen/NumericUtils.h>

	namespace at::native {

	#ifdef CPU_CAPABILITY
	inline namespace CPU_CAPABILITY {
	#else
	inline namespace DEFAULT {
	#endif

	// Core topk loop, shared between CPU and QuantizedCPU
	template <typename scalar_t, typename accscalar_t>
	void topk_impl_loop(
	const int64_t mode_values_stride,
	const int64_t mode_indices_stride,
	const int64_t tmp_values_stride,
	const int64_t k,
	const int64_t dim_size,
	const bool largest,
	const bool sorted,
	char** data, const int64_t* strides, const int64_t n) {

	// If k is zero, then output values and indices are empty tensors
	// So iterating over other dims is pointless
	if (k == 0) {
	return;
	}
	using elem_t = std::pair<accscalar_t, int64_t>;
	std::vector<elem_t> queue(dim_size);
	for (const auto i : c10::irange(n)) {
	TensorAccessor<scalar_t, 1> mode_values(
	reinterpret_cast<scalar_t>(data[0] + i strides[0]),
	&k, &mode_values_stride);
	TensorAccessor<int64_t, 1> mode_indices(
	reinterpret_cast<int64_t>(data[1] + i strides[1]),
	&k, &mode_indices_stride);
	TensorAccessor<const scalar_t, 1> tmp_values(
	reinterpret_cast<scalar_t>(data[2] + i strides[2]),
	&dim_size, &tmp_values_stride);

	auto n_2 = dim_size;
	auto use_partial_sort = k * 64 <= n_2;

	for (const auto j : c10::irange(n_2)) {
	queue[j].first = tmp_values[j];
	queue[j].second = j;
	}

	// we want nan to be sorted as top for numpy compatibility
	if (use_partial_sort) {
	if (largest) {
	std::partial_sort(queue.begin(), queue.begin() + k, queue.end(),
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) \|\| (x.first > y.first));
	});
	} else {
	std::partial_sort(queue.begin(), queue.begin() + k, queue.end(),
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) \|\| (x.first < y.first));
	});
	}
	} else {
	if (largest) {
	std::nth_element(queue.begin(), queue.begin() + k - 1, queue.end(),
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) \|\| (x.first > y.first));
	});
	if (sorted) {
	std::sort(queue.begin(), queue.begin() + k - 1,
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((_isnan<accscalar_t>(x.first) && !_isnan<accscalar_t>(y.first)) \|\| (x.first > y.first));
	});
	}
	} else {
	std::nth_element(queue.begin(), queue.begin() + k -1, queue.end(),
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) \|\| (x.first < y.first));
	});
	if (sorted) {
	std::sort(queue.begin(), queue.begin() + k -1,
	[](const elem_t& x, const elem_t& y) -> bool {
	return ((!_isnan<accscalar_t>(x.first) && _isnan<accscalar_t>(y.first)) \|\| (x.first < y.first));
	});
	}
	}
	}

	for (const auto j : c10::irange(k)) {
	mode_values[j] = queue[j].first;
	mode_indices[j] = queue[j].second;
	}
	}
	}

	} // namespace CPU_CAPABILITY
	} // namespace at::native