| // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <unistd.h> |
| #ifdef __APPLE__ |
| #include <sys/time.h> |
| #endif |
| |
| #include <cstdint> |
| #include <cstdlib> |
| #include <ctime> |
| #include <iomanip> |
| #include <iostream> |
| #include <map> |
| #include <memory> |
| #include <vector> |
| |
| #include "streams.h" |
| |
| #define MUL_OFFSET (3) |
| #define ADD_OFFSET (100) |
| |
| using namespace gemmlowp::meta; |
| |
| void prepare_row_major_data(int rows, int elements, int stride, std::uint8_t* data) { |
| for (int i = 0; i < rows * stride; ++i) { |
| data[i] = 255; |
| } |
| for (int i = 0; i < rows; ++i) { |
| for (int j = 0; j < elements; ++j) { |
| data[i * stride + j] = j % 256; |
| } |
| } |
| } |
| |
| void prepare_column_major_data(int columns, int elements, int stride, |
| std::uint8_t* data) { |
| for (int i = 0; i < elements * stride; ++i) { |
| data[i] = 255; |
| } |
| for (int i = 0; i < elements; ++i) { |
| for (int j = 0; j < columns; ++j) { |
| data[i * stride + j] = i % 256; |
| } |
| } |
| } |
| |
| void print_out(std::uint8_t* result, int rows, int elements) { |
| int size = rows * ((elements + 7) / 8) * 8; |
| for (int i = 0; i < size; ++i) { |
| std::cout << static_cast<int>(result[i]) << " "; |
| } |
| std::cout << std::endl << std::flush; |
| } |
| |
| bool check(std::uint8_t* result, int rows, int elements) { |
| int chunks = elements / 8; |
| int leftover = elements % 8; |
| for (int i = 0; i < chunks; ++i) { |
| int chunk_index = i * rows * 8; |
| int chunk_start_value = i * 8; |
| for (int j = 0; j < rows; ++j) { |
| for (int k = 0; k < 8; ++k) { |
| if (result[chunk_index + j * 8 + k] != chunk_start_value + k) { |
| return false; |
| } |
| } |
| } |
| } |
| |
| int leftover_index = chunks * rows * 8; |
| int leftover_start_value = chunks * 8; |
| for (int i = 0; i < rows; ++i) { |
| for (int j = 0; j < leftover; ++j) { |
| if (result[leftover_index + i * 8 + j] != leftover_start_value + j) { |
| return false; |
| } |
| } |
| } |
| |
| int expected_sum = |
| ((elements * (elements - 1)) / 2) * MUL_OFFSET + ADD_OFFSET; |
| int sums_offset = rows * ((elements + 7) / 8) * 8; |
| std::int32_t* sums = reinterpret_cast<std::int32_t*>(result + sums_offset); |
| for (int i = 0; i < rows; ++i) { |
| if (sums[i] != expected_sum) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| template <int lanes, int leftover> |
| void test_2(std::uint8_t* in, std::uint8_t* out) { |
| for (int elements = 8; elements < 64; elements += 8) { |
| int all_elements = elements + leftover; |
| for (int stride = all_elements; stride < all_elements + 4; ++stride) { |
| RowMajorWithSum params; |
| params.count = all_elements; |
| params.stride = stride; |
| params.multiplicative_sum_offset = MUL_OFFSET; |
| params.additive_sum_offset = ADD_OFFSET; |
| |
| prepare_row_major_data(lanes, all_elements, stride, in); |
| Stream<std::uint8_t, lanes, 8, leftover, RowMajorWithSum>::Pack(in, params, |
| out); |
| if (check(out, lanes, all_elements)) { |
| // std::cout << "Row: " << lanes << "x8x" << leftover << " : " |
| // << all_elements << "@" << stride << " -- OK" << |
| // std::endl; |
| } else { |
| std::cout << "Row: " << lanes << "x8x" << leftover << " : " |
| << all_elements << "@" << stride << " -- ERROR" << std::endl; |
| std::cout << "Exiting." << std::endl; |
| std::exit(1); |
| } |
| } |
| |
| for (int stride = lanes; stride < lanes + 4; ++stride) { |
| ColumnMajorWithSum params; |
| params.count = all_elements; |
| params.stride = stride; |
| params.multiplicative_sum_offset = MUL_OFFSET; |
| params.additive_sum_offset = ADD_OFFSET; |
| |
| prepare_column_major_data(lanes, all_elements, stride, in); |
| Stream<std::uint8_t, lanes, 8, leftover, ColumnMajorWithSum>::Pack(in, params, |
| out); |
| if (check(out, lanes, all_elements)) { |
| // std::cout << "Column: " << lanes << "x8x" << leftover << " : " |
| // << all_elements << "@" << stride << " -- OK" << |
| // std::endl; |
| } else { |
| std::cout << "Column: " << lanes << "x8x" << leftover << " : " |
| << all_elements << "@" << stride << " -- ERROR" << std::endl; |
| std::cout << "Exiting." << std::endl; |
| std::exit(1); |
| } |
| } |
| } |
| } |
| |
| template <int lanes> |
| void test(std::uint8_t* in, std::uint8_t* out) { |
| test_2<lanes, 0>(in, out); |
| test_2<lanes, 1>(in, out); |
| test_2<lanes, 2>(in, out); |
| test_2<lanes, 3>(in, out); |
| test_2<lanes, 4>(in, out); |
| test_2<lanes, 5>(in, out); |
| test_2<lanes, 6>(in, out); |
| test_2<lanes, 7>(in, out); |
| } |
| |
| int main() { |
| std::unique_ptr<std::uint8_t> in(new std::uint8_t[128 * 1024]); |
| std::unique_ptr<std::uint8_t> out(new std::uint8_t[128 * 1024]); |
| |
| test<1>(in.get(), out.get()); |
| test<2>(in.get(), out.get()); |
| test<3>(in.get(), out.get()); |
| test<4>(in.get(), out.get()); |
| test<5>(in.get(), out.get()); |
| test<6>(in.get(), out.get()); |
| test<7>(in.get(), out.get()); |
| test<8>(in.get(), out.get()); |
| |
| std::cout << "Ok." << std::endl; |
| return 0; |
| } |