| // Adapted from https://github.com/Alexhuszagh/rust-lexical. |
| |
| //! Building-blocks for arbitrary-precision math. |
| //! |
| //! These algorithms assume little-endian order for the large integer |
| //! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb, |
| //! and `0` is the least significant limb. |
| |
| use super::large_powers; |
| use super::num::*; |
| use super::small_powers::*; |
| use alloc::vec::Vec; |
| use core::{cmp, iter, mem}; |
| |
| // ALIASES |
| // ------- |
| |
| // Type for a single limb of the big integer. |
| // |
| // A limb is analogous to a digit in base10, except, it stores 32-bit |
| // or 64-bit numbers instead. |
| // |
| // This should be all-known 64-bit platforms supported by Rust. |
| // https://forge.rust-lang.org/platform-support.html |
| // |
| // Platforms where native 128-bit multiplication is explicitly supported: |
| // - x86_64 (Supported via `MUL`). |
| // - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from). |
| // |
| // Platforms where native 64-bit multiplication is supported and |
| // you can extract hi-lo for 64-bit multiplications. |
| // aarch64 (Requires `UMULH` and `MUL` to capture high and low bits). |
| // powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits). |
| // |
| // Platforms where native 128-bit multiplication is not supported, |
| // requiring software emulation. |
| // sparc64 (`UMUL` only supported double-word arguments). |
| |
| // 32-BIT LIMB |
| #[cfg(limb_width_32)] |
| pub type Limb = u32; |
| |
| #[cfg(limb_width_32)] |
| pub const POW5_LIMB: &[Limb] = &POW5_32; |
| |
| #[cfg(limb_width_32)] |
| pub const POW10_LIMB: &[Limb] = &POW10_32; |
| |
| #[cfg(limb_width_32)] |
| type Wide = u64; |
| |
| // 64-BIT LIMB |
| #[cfg(limb_width_64)] |
| pub type Limb = u64; |
| |
| #[cfg(limb_width_64)] |
| pub const POW5_LIMB: &[Limb] = &POW5_64; |
| |
| #[cfg(limb_width_64)] |
| pub const POW10_LIMB: &[Limb] = &POW10_64; |
| |
| #[cfg(limb_width_64)] |
| type Wide = u128; |
| |
| /// Cast to limb type. |
| #[inline] |
| pub(crate) fn as_limb<T: Integer>(t: T) -> Limb { |
| Limb::as_cast(t) |
| } |
| |
| /// Cast to wide type. |
| #[inline] |
| fn as_wide<T: Integer>(t: T) -> Wide { |
| Wide::as_cast(t) |
| } |
| |
| // SPLIT |
| // ----- |
| |
| /// Split u64 into limbs, in little-endian order. |
| #[inline] |
| #[cfg(limb_width_32)] |
| fn split_u64(x: u64) -> [Limb; 2] { |
| [as_limb(x), as_limb(x >> 32)] |
| } |
| |
| /// Split u64 into limbs, in little-endian order. |
| #[inline] |
| #[cfg(limb_width_64)] |
| fn split_u64(x: u64) -> [Limb; 1] { |
| [as_limb(x)] |
| } |
| |
| // HI64 |
| // ---- |
| |
| // NONZERO |
| |
| /// Check if any of the remaining bits are non-zero. |
| #[inline] |
| pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool { |
| let len = x.len(); |
| let slc = &x[..len - rindex]; |
| slc.iter().rev().any(|&x| x != T::ZERO) |
| } |
| |
| /// Shift 64-bit integer to high 64-bits. |
| #[inline] |
| fn u64_to_hi64_1(r0: u64) -> (u64, bool) { |
| debug_assert!(r0 != 0); |
| let ls = r0.leading_zeros(); |
| (r0 << ls, false) |
| } |
| |
| /// Shift 2 64-bit integers to high 64-bits. |
| #[inline] |
| fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) { |
| debug_assert!(r0 != 0); |
| let ls = r0.leading_zeros(); |
| let rs = 64 - ls; |
| let v = match ls { |
| 0 => r0, |
| _ => (r0 << ls) | (r1 >> rs), |
| }; |
| let n = r1 << ls != 0; |
| (v, n) |
| } |
| |
| /// Trait to export the high 64-bits from a little-endian slice. |
| trait Hi64<T>: AsRef<[T]> { |
| /// Get the hi64 bits from a 1-limb slice. |
| fn hi64_1(&self) -> (u64, bool); |
| |
| /// Get the hi64 bits from a 2-limb slice. |
| fn hi64_2(&self) -> (u64, bool); |
| |
| /// Get the hi64 bits from a 3-limb slice. |
| fn hi64_3(&self) -> (u64, bool); |
| |
| /// High-level exporter to extract the high 64 bits from a little-endian slice. |
| #[inline] |
| fn hi64(&self) -> (u64, bool) { |
| match self.as_ref().len() { |
| 0 => (0, false), |
| 1 => self.hi64_1(), |
| 2 => self.hi64_2(), |
| _ => self.hi64_3(), |
| } |
| } |
| } |
| |
| impl Hi64<u32> for [u32] { |
| #[inline] |
| fn hi64_1(&self) -> (u64, bool) { |
| debug_assert!(self.len() == 1); |
| let r0 = self[0] as u64; |
| u64_to_hi64_1(r0) |
| } |
| |
| #[inline] |
| fn hi64_2(&self) -> (u64, bool) { |
| debug_assert!(self.len() == 2); |
| let r0 = (self[1] as u64) << 32; |
| let r1 = self[0] as u64; |
| u64_to_hi64_1(r0 | r1) |
| } |
| |
| #[inline] |
| fn hi64_3(&self) -> (u64, bool) { |
| debug_assert!(self.len() >= 3); |
| let r0 = self[self.len() - 1] as u64; |
| let r1 = (self[self.len() - 2] as u64) << 32; |
| let r2 = self[self.len() - 3] as u64; |
| let (v, n) = u64_to_hi64_2(r0, r1 | r2); |
| (v, n || nonzero(self, 3)) |
| } |
| } |
| |
| impl Hi64<u64> for [u64] { |
| #[inline] |
| fn hi64_1(&self) -> (u64, bool) { |
| debug_assert!(self.len() == 1); |
| let r0 = self[0]; |
| u64_to_hi64_1(r0) |
| } |
| |
| #[inline] |
| fn hi64_2(&self) -> (u64, bool) { |
| debug_assert!(self.len() >= 2); |
| let r0 = self[self.len() - 1]; |
| let r1 = self[self.len() - 2]; |
| let (v, n) = u64_to_hi64_2(r0, r1); |
| (v, n || nonzero(self, 2)) |
| } |
| |
| #[inline] |
| fn hi64_3(&self) -> (u64, bool) { |
| self.hi64_2() |
| } |
| } |
| |
| // SCALAR |
| // ------ |
| |
| // Scalar-to-scalar operations, for building-blocks for arbitrary-precision |
| // operations. |
| |
| mod scalar { |
| use super::*; |
| |
| // ADDITION |
| |
| /// Add two small integers and return the resulting value and if overflow happens. |
| #[inline] |
| pub fn add(x: Limb, y: Limb) -> (Limb, bool) { |
| x.overflowing_add(y) |
| } |
| |
| /// AddAssign two small integers and return if overflow happens. |
| #[inline] |
| pub fn iadd(x: &mut Limb, y: Limb) -> bool { |
| let t = add(*x, y); |
| *x = t.0; |
| t.1 |
| } |
| |
| // SUBTRACTION |
| |
| /// Subtract two small integers and return the resulting value and if overflow happens. |
| #[inline] |
| pub fn sub(x: Limb, y: Limb) -> (Limb, bool) { |
| x.overflowing_sub(y) |
| } |
| |
| /// SubAssign two small integers and return if overflow happens. |
| #[inline] |
| pub fn isub(x: &mut Limb, y: Limb) -> bool { |
| let t = sub(*x, y); |
| *x = t.0; |
| t.1 |
| } |
| |
| // MULTIPLICATION |
| |
| /// Multiply two small integers (with carry) (and return the overflow contribution). |
| /// |
| /// Returns the (low, high) components. |
| #[inline] |
| pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) { |
| // Cannot overflow, as long as wide is 2x as wide. This is because |
| // the following is always true: |
| // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()` |
| let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry); |
| let bits = mem::size_of::<Limb>() * 8; |
| (as_limb(z), as_limb(z >> bits)) |
| } |
| |
| /// Multiply two small integers (with carry) (and return if overflow happens). |
| #[inline] |
| pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb { |
| let t = mul(*x, y, carry); |
| *x = t.0; |
| t.1 |
| } |
| } // scalar |
| |
| // SMALL |
| // ----- |
| |
| // Large-to-small operations, to modify a big integer from a native scalar. |
| |
| mod small { |
| use super::*; |
| |
| // MULTIPLICATIION |
| |
| /// ADDITION |
| |
| /// Implied AddAssign implementation for adding a small integer to bigint. |
| /// |
| /// Allows us to choose a start-index in x to store, to allow incrementing |
| /// from a non-zero start. |
| #[inline] |
| pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) { |
| if x.len() <= xstart { |
| x.push(y); |
| } else { |
| // Initial add |
| let mut carry = scalar::iadd(&mut x[xstart], y); |
| |
| // Increment until overflow stops occurring. |
| let mut size = xstart + 1; |
| while carry && size < x.len() { |
| carry = scalar::iadd(&mut x[size], 1); |
| size += 1; |
| } |
| |
| // If we overflowed the buffer entirely, need to add 1 to the end |
| // of the buffer. |
| if carry { |
| x.push(1); |
| } |
| } |
| } |
| |
| /// AddAssign small integer to bigint. |
| #[inline] |
| pub fn iadd(x: &mut Vec<Limb>, y: Limb) { |
| iadd_impl(x, y, 0); |
| } |
| |
| // SUBTRACTION |
| |
| /// SubAssign small integer to bigint. |
| /// Does not do overflowing subtraction. |
| #[inline] |
| pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) { |
| debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1)); |
| |
| // Initial subtraction |
| let mut carry = scalar::isub(&mut x[xstart], y); |
| |
| // Increment until overflow stops occurring. |
| let mut size = xstart + 1; |
| while carry && size < x.len() { |
| carry = scalar::isub(&mut x[size], 1); |
| size += 1; |
| } |
| normalize(x); |
| } |
| |
| // MULTIPLICATION |
| |
| /// MulAssign small integer to bigint. |
| #[inline] |
| pub fn imul(x: &mut Vec<Limb>, y: Limb) { |
| // Multiply iteratively over all elements, adding the carry each time. |
| let mut carry: Limb = 0; |
| for xi in x.iter_mut() { |
| carry = scalar::imul(xi, y, carry); |
| } |
| |
| // Overflow of value, add to end. |
| if carry != 0 { |
| x.push(carry); |
| } |
| } |
| |
| /// Mul small integer to bigint. |
| #[inline] |
| pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> { |
| let mut z = Vec::<Limb>::default(); |
| z.extend_from_slice(x); |
| imul(&mut z, y); |
| z |
| } |
| |
| /// MulAssign by a power. |
| /// |
| /// Theoretically... |
| /// |
| /// Use an exponentiation by squaring method, since it reduces the time |
| /// complexity of the multiplication to ~`O(log(n))` for the squaring, |
| /// and `O(n*m)` for the result. Since `m` is typically a lower-order |
| /// factor, this significantly reduces the number of multiplications |
| /// we need to do. Iteratively multiplying by small powers follows |
| /// the nth triangular number series, which scales as `O(p^2)`, but |
| /// where `p` is `n+m`. In short, it scales very poorly. |
| /// |
| /// Practically.... |
| /// |
| /// Exponentiation by Squaring: |
| /// running 2 tests |
| /// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78) |
| /// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007) |
| /// |
| /// Exponentiation by Iterative Small Powers: |
| /// running 2 tests |
| /// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31) |
| /// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47) |
| /// |
| /// Exponentiation by Iterative Large Powers (of 2): |
| /// running 2 tests |
| /// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31) |
| /// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47) |
| /// |
| /// Even using worst-case scenarios, exponentiation by squaring is |
| /// significantly slower for our workloads. Just multiply by small powers, |
| /// in simple cases, and use precalculated large powers in other cases. |
| pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) { |
| use super::large::KARATSUBA_CUTOFF; |
| |
| let small_powers = POW5_LIMB; |
| let large_powers = large_powers::POW5; |
| |
| if n == 0 { |
| // No exponent, just return. |
| // The 0-index of the large powers is `2^0`, which is 1, so we want |
| // to make sure we don't take that path with a literal 0. |
| return; |
| } |
| |
| // We want to use the asymptotically faster algorithm if we're going |
| // to be using Karabatsu multiplication sometime during the result, |
| // otherwise, just use exponentiation by squaring. |
| let bit_length = 32 - n.leading_zeros() as usize; |
| debug_assert!(bit_length != 0 && bit_length <= large_powers.len()); |
| if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF { |
| // We can use iterative small powers to make this faster for the |
| // easy cases. |
| |
| // Multiply by the largest small power until n < step. |
| let step = small_powers.len() - 1; |
| let power = small_powers[step]; |
| let mut n = n as usize; |
| while n >= step { |
| imul(x, power); |
| n -= step; |
| } |
| |
| // Multiply by the remainder. |
| imul(x, small_powers[n]); |
| } else { |
| // In theory, this code should be asymptotically a lot faster, |
| // in practice, our small::imul seems to be the limiting step, |
| // and large imul is slow as well. |
| |
| // Multiply by higher order powers. |
| let mut idx: usize = 0; |
| let mut bit: usize = 1; |
| let mut n = n as usize; |
| while n != 0 { |
| if n & bit != 0 { |
| debug_assert!(idx < large_powers.len()); |
| large::imul(x, large_powers[idx]); |
| n ^= bit; |
| } |
| idx += 1; |
| bit <<= 1; |
| } |
| } |
| } |
| |
| // BIT LENGTH |
| |
| /// Get number of leading zero bits in the storage. |
| #[inline] |
| pub fn leading_zeros(x: &[Limb]) -> usize { |
| x.last().map_or(0, |x| x.leading_zeros() as usize) |
| } |
| |
| /// Calculate the bit-length of the big-integer. |
| #[inline] |
| pub fn bit_length(x: &[Limb]) -> usize { |
| let bits = mem::size_of::<Limb>() * 8; |
| // Avoid overflowing, calculate via total number of bits |
| // minus leading zero bits. |
| let nlz = leading_zeros(x); |
| bits.checked_mul(x.len()) |
| .map_or_else(usize::max_value, |v| v - nlz) |
| } |
| |
| // SHL |
| |
| /// Shift-left bits inside a buffer. |
| /// |
| /// Assumes `n < Limb::BITS`, IE, internally shifting bits. |
| #[inline] |
| pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) { |
| // Need to shift by the number of `bits % Limb::BITS)`. |
| let bits = mem::size_of::<Limb>() * 8; |
| debug_assert!(n < bits); |
| if n == 0 { |
| return; |
| } |
| |
| // Internally, for each item, we shift left by n, and add the previous |
| // right shifted limb-bits. |
| // For example, we transform (for u8) shifted left 2, to: |
| // b10100100 b01000010 |
| // b10 b10010001 b00001000 |
| let rshift = bits - n; |
| let lshift = n; |
| let mut prev: Limb = 0; |
| for xi in x.iter_mut() { |
| let tmp = *xi; |
| *xi <<= lshift; |
| *xi |= prev >> rshift; |
| prev = tmp; |
| } |
| |
| // Always push the carry, even if it creates a non-normal result. |
| let carry = prev >> rshift; |
| if carry != 0 { |
| x.push(carry); |
| } |
| } |
| |
| /// Shift-left `n` digits inside a buffer. |
| /// |
| /// Assumes `n` is not 0. |
| #[inline] |
| pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) { |
| debug_assert!(n != 0); |
| if !x.is_empty() { |
| x.reserve(n); |
| x.splice(..0, iter::repeat(0).take(n)); |
| } |
| } |
| |
| /// Shift-left buffer by n bits. |
| #[inline] |
| pub fn ishl(x: &mut Vec<Limb>, n: usize) { |
| let bits = mem::size_of::<Limb>() * 8; |
| // Need to pad with zeros for the number of `bits / Limb::BITS`, |
| // and shift-left with carry for `bits % Limb::BITS`. |
| let rem = n % bits; |
| let div = n / bits; |
| ishl_bits(x, rem); |
| if div != 0 { |
| ishl_limbs(x, div); |
| } |
| } |
| |
| // NORMALIZE |
| |
| /// Normalize the container by popping any leading zeros. |
| #[inline] |
| pub fn normalize(x: &mut Vec<Limb>) { |
| // Remove leading zero if we cause underflow. Since we're dividing |
| // by a small power, we have at max 1 int removed. |
| while x.last() == Some(&0) { |
| x.pop(); |
| } |
| } |
| } // small |
| |
| // LARGE |
| // ----- |
| |
| // Large-to-large operations, to modify a big integer from a native scalar. |
| |
| mod large { |
| use super::*; |
| |
| // RELATIVE OPERATORS |
| |
| /// Compare `x` to `y`, in little-endian order. |
| #[inline] |
| pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering { |
| if x.len() > y.len() { |
| cmp::Ordering::Greater |
| } else if x.len() < y.len() { |
| cmp::Ordering::Less |
| } else { |
| let iter = x.iter().rev().zip(y.iter().rev()); |
| for (&xi, &yi) in iter { |
| if xi > yi { |
| return cmp::Ordering::Greater; |
| } else if xi < yi { |
| return cmp::Ordering::Less; |
| } |
| } |
| // Equal case. |
| cmp::Ordering::Equal |
| } |
| } |
| |
| /// Check if x is less than y. |
| #[inline] |
| pub fn less(x: &[Limb], y: &[Limb]) -> bool { |
| compare(x, y) == cmp::Ordering::Less |
| } |
| |
| /// Check if x is greater than or equal to y. |
| #[inline] |
| pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool { |
| !less(x, y) |
| } |
| |
| // ADDITION |
| |
| /// Implied AddAssign implementation for bigints. |
| /// |
| /// Allows us to choose a start-index in x to store, so we can avoid |
| /// padding the buffer with zeros when not needed, optimized for vectors. |
| pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) { |
| // The effective x buffer is from `xstart..x.len()`, so we need to treat |
| // that as the current range. If the effective y buffer is longer, need |
| // to resize to that, + the start index. |
| if y.len() > x.len() - xstart { |
| x.resize(y.len() + xstart, 0); |
| } |
| |
| // Iteratively add elements from y to x. |
| let mut carry = false; |
| for (xi, yi) in x[xstart..].iter_mut().zip(y.iter()) { |
| // Only one op of the two can overflow, since we added at max |
| // Limb::max_value() + Limb::max_value(). Add the previous carry, |
| // and store the current carry for the next. |
| let mut tmp = scalar::iadd(xi, *yi); |
| if carry { |
| tmp |= scalar::iadd(xi, 1); |
| } |
| carry = tmp; |
| } |
| |
| // Overflow from the previous bit. |
| if carry { |
| small::iadd_impl(x, 1, y.len() + xstart); |
| } |
| } |
| |
| /// AddAssign bigint to bigint. |
| #[inline] |
| pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) { |
| iadd_impl(x, y, 0); |
| } |
| |
| /// Add bigint to bigint. |
| #[inline] |
| pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> { |
| let mut z = Vec::<Limb>::default(); |
| z.extend_from_slice(x); |
| iadd(&mut z, y); |
| z |
| } |
| |
| // SUBTRACTION |
| |
| /// SubAssign bigint to bigint. |
| pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) { |
| // Basic underflow checks. |
| debug_assert!(greater_equal(x, y)); |
| |
| // Iteratively add elements from y to x. |
| let mut carry = false; |
| for (xi, yi) in x.iter_mut().zip(y.iter()) { |
| // Only one op of the two can overflow, since we added at max |
| // Limb::max_value() + Limb::max_value(). Add the previous carry, |
| // and store the current carry for the next. |
| let mut tmp = scalar::isub(xi, *yi); |
| if carry { |
| tmp |= scalar::isub(xi, 1); |
| } |
| carry = tmp; |
| } |
| |
| if carry { |
| small::isub_impl(x, 1, y.len()); |
| } else { |
| small::normalize(x); |
| } |
| } |
| |
| // MULTIPLICATION |
| |
| /// Number of digits to bottom-out to asymptotically slow algorithms. |
| /// |
| /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits, |
| /// so we go halfway, while Newton division tends to out-perform |
| /// Algorithm D at ~1024 bits. We can toggle this for optimal performance. |
| pub const KARATSUBA_CUTOFF: usize = 32; |
| |
| /// Grade-school multiplication algorithm. |
| /// |
| /// Slow, naive algorithm, using limb-bit bases and just shifting left for |
| /// each iteration. This could be optimized with numerous other algorithms, |
| /// but it's extremely simple, and works in O(n*m) time, which is fine |
| /// by me. Each iteration, of which there are `m` iterations, requires |
| /// `n` multiplications, and `n` additions, or grade-school multiplication. |
| fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> { |
| // Using the immutable value, multiply by all the scalars in y, using |
| // the algorithm defined above. Use a single buffer to avoid |
| // frequent reallocations. Handle the first case to avoid a redundant |
| // addition, since we know y.len() >= 1. |
| let mut z: Vec<Limb> = small::mul(x, y[0]); |
| z.resize(x.len() + y.len(), 0); |
| |
| // Handle the iterative cases. |
| for (i, &yi) in y[1..].iter().enumerate() { |
| let zi: Vec<Limb> = small::mul(x, yi); |
| iadd_impl(&mut z, &zi, i + 1); |
| } |
| |
| small::normalize(&mut z); |
| |
| z |
| } |
| |
| /// Split two buffers into halfway, into (lo, hi). |
| #[inline] |
| pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) { |
| (&z[..m], &z[m..]) |
| } |
| |
| /// Karatsuba multiplication algorithm with roughly equal input sizes. |
| /// |
| /// Assumes `y.len() >= x.len()`. |
| fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> { |
| if y.len() <= KARATSUBA_CUTOFF { |
| // Bottom-out to long division for small cases. |
| long_mul(x, y) |
| } else if x.len() < y.len() / 2 { |
| karatsuba_uneven_mul(x, y) |
| } else { |
| // Do our 3 multiplications. |
| let m = y.len() / 2; |
| let (xl, xh) = karatsuba_split(x, m); |
| let (yl, yh) = karatsuba_split(y, m); |
| let sumx = add(xl, xh); |
| let sumy = add(yl, yh); |
| let z0 = karatsuba_mul(xl, yl); |
| let mut z1 = karatsuba_mul(&sumx, &sumy); |
| let z2 = karatsuba_mul(xh, yh); |
| // Properly scale z1, which is `z1 - z2 - zo`. |
| isub(&mut z1, &z2); |
| isub(&mut z1, &z0); |
| |
| // Create our result, which is equal to, in little-endian order: |
| // [z0, z1 - z2 - z0, z2] |
| // z1 must be shifted m digits (2^(32m)) over. |
| // z2 must be shifted 2*m digits (2^(64m)) over. |
| let len = z0.len().max(m + z1.len()).max(2 * m + z2.len()); |
| let mut result = z0; |
| result.reserve_exact(len - result.len()); |
| iadd_impl(&mut result, &z1, m); |
| iadd_impl(&mut result, &z2, 2 * m); |
| |
| result |
| } |
| } |
| |
| /// Karatsuba multiplication algorithm where y is substantially larger than x. |
| /// |
| /// Assumes `y.len() >= x.len()`. |
| fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> { |
| let mut result = Vec::<Limb>::default(); |
| result.resize(x.len() + y.len(), 0); |
| |
| // This effectively is like grade-school multiplication between |
| // two numbers, except we're using splits on `y`, and the intermediate |
| // step is a Karatsuba multiplication. |
| let mut start = 0; |
| while !y.is_empty() { |
| let m = x.len().min(y.len()); |
| let (yl, yh) = karatsuba_split(y, m); |
| let prod = karatsuba_mul(x, yl); |
| iadd_impl(&mut result, &prod, start); |
| y = yh; |
| start += m; |
| } |
| small::normalize(&mut result); |
| |
| result |
| } |
| |
| /// Forwarder to the proper Karatsuba algorithm. |
| #[inline] |
| fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> { |
| if x.len() < y.len() { |
| karatsuba_mul(x, y) |
| } else { |
| karatsuba_mul(y, x) |
| } |
| } |
| |
| /// MulAssign bigint to bigint. |
| #[inline] |
| pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) { |
| if y.len() == 1 { |
| small::imul(x, y[0]); |
| } else { |
| // We're not really in a condition where using Karatsuba |
| // multiplication makes sense, so we're just going to use long |
| // division. ~20% speedup compared to: |
| // *x = karatsuba_mul_fwd(x, y); |
| *x = karatsuba_mul_fwd(x, y); |
| } |
| } |
| } // large |
| |
| // TRAITS |
| // ------ |
| |
| /// Traits for shared operations for big integers. |
| /// |
| /// None of these are implemented using normal traits, since these |
| /// are very expensive operations, and we want to deliberately |
| /// and explicitly use these functions. |
| pub(crate) trait Math: Clone + Sized + Default { |
| // DATA |
| |
| /// Get access to the underlying data |
| fn data(&self) -> &Vec<Limb>; |
| |
| /// Get access to the underlying data |
| fn data_mut(&mut self) -> &mut Vec<Limb>; |
| |
| // RELATIVE OPERATIONS |
| |
| /// Compare self to y. |
| #[inline] |
| fn compare(&self, y: &Self) -> cmp::Ordering { |
| large::compare(self.data(), y.data()) |
| } |
| |
| // PROPERTIES |
| |
| /// Get the high 64-bits from the bigint and if there are remaining bits. |
| #[inline] |
| fn hi64(&self) -> (u64, bool) { |
| self.data().as_slice().hi64() |
| } |
| |
| /// Calculate the bit-length of the big-integer. |
| /// Returns usize::max_value() if the value overflows, |
| /// IE, if `self.data().len() > usize::max_value() / 8`. |
| #[inline] |
| fn bit_length(&self) -> usize { |
| small::bit_length(self.data()) |
| } |
| |
| // INTEGER CONVERSIONS |
| |
| /// Create new big integer from u64. |
| #[inline] |
| fn from_u64(x: u64) -> Self { |
| let mut v = Self::default(); |
| let slc = split_u64(x); |
| v.data_mut().extend_from_slice(&slc); |
| v.normalize(); |
| v |
| } |
| |
| // NORMALIZE |
| |
| /// Normalize the integer, so any leading zero values are removed. |
| #[inline] |
| fn normalize(&mut self) { |
| small::normalize(self.data_mut()); |
| } |
| |
| // ADDITION |
| |
| /// AddAssign small integer. |
| #[inline] |
| fn iadd_small(&mut self, y: Limb) { |
| small::iadd(self.data_mut(), y); |
| } |
| |
| // MULTIPLICATION |
| |
| /// MulAssign small integer. |
| #[inline] |
| fn imul_small(&mut self, y: Limb) { |
| small::imul(self.data_mut(), y); |
| } |
| |
| /// Multiply by a power of 2. |
| #[inline] |
| fn imul_pow2(&mut self, n: u32) { |
| self.ishl(n as usize); |
| } |
| |
| /// Multiply by a power of 5. |
| #[inline] |
| fn imul_pow5(&mut self, n: u32) { |
| small::imul_pow5(self.data_mut(), n); |
| } |
| |
| /// MulAssign by a power of 10. |
| #[inline] |
| fn imul_pow10(&mut self, n: u32) { |
| self.imul_pow5(n); |
| self.imul_pow2(n); |
| } |
| |
| // SHIFTS |
| |
| /// Shift-left the entire buffer n bits. |
| #[inline] |
| fn ishl(&mut self, n: usize) { |
| small::ishl(self.data_mut(), n); |
| } |
| } |