strength_reduce/
long_division.rs1extern crate core;
2
3const U32_MAX: u64 = core::u32::MAX as u64;
4const U64_MAX: u128 = core::u64::MAX as u128;
5
6use ::StrengthReducedU64;
7use ::long_multiplication;
8
9#[inline]
12fn divide_128_by_64_preshifted(numerator_hi: u64, numerator_lo: u64, divisor: u64) -> u64 {
13 let numerator_mid = (numerator_lo >> 32) as u128;
14 let numerator_lo = numerator_lo as u32 as u128;
15 let divisor_full_128 = divisor as u128;
16 let divisor_hi = divisor >> 32;
17
18 let full_upper_numerator = ((numerator_hi as u128) << 32) | numerator_mid;
22 let mut quotient_hi = core::cmp::min(numerator_hi / divisor_hi, U32_MAX);
23 let mut product_hi = quotient_hi as u128 * divisor_full_128;
24
25 while product_hi > full_upper_numerator {
28 quotient_hi -= 1;
29 product_hi -= divisor_full_128;
30 }
31 let remainder_hi = full_upper_numerator - product_hi;
32
33
34 let full_lower_numerator = (remainder_hi << 32) | numerator_lo;
36 let mut quotient_lo = core::cmp::min((remainder_hi as u64) / divisor_hi, U32_MAX);
37 let mut product_lo = quotient_lo as u128 * divisor_full_128;
38
39 while product_lo > full_lower_numerator {
41 quotient_lo -= 1;
42 product_lo -= divisor_full_128;
43 }
44
45 (quotient_hi << 32) | quotient_lo
47}
48
49#[inline]
53fn divide_128_by_64_preshifted_reduced(numerator_hi: u64, numerator_lo: u64, divisor_hi: StrengthReducedU64, divisor_full: u64) -> u64 {
54 let numerator_mid = (numerator_lo >> 32) as u128;
55 let numerator_lo = numerator_lo as u32 as u128;
56 let divisor_full_128 = divisor_full as u128;
57
58 let full_upper_numerator = ((numerator_hi as u128) << 32) | numerator_mid;
62 let mut quotient_hi = core::cmp::min(numerator_hi / divisor_hi, U32_MAX);
63 let mut product_hi = quotient_hi as u128 * divisor_full_128;
64
65 while product_hi > full_upper_numerator {
68 quotient_hi -= 1;
69 product_hi -= divisor_full_128;
70 }
71 let full_upper_remainder = full_upper_numerator - product_hi;
72
73
74 let full_lower_numerator = (full_upper_remainder << 32) | numerator_lo;
76 let mut quotient_lo = core::cmp::min((full_upper_remainder as u64) / divisor_hi, U32_MAX);
77 let mut product_lo = quotient_lo as u128 * divisor_full_128;
78
79 while product_lo > full_lower_numerator {
81 quotient_lo -= 1;
82 product_lo -= divisor_full_128;
83 }
84
85 (quotient_hi << 32) | quotient_lo
87}
88
89pub fn divide_128(numerator: u128, divisor: u128) -> u128 {
91 if divisor <= U64_MAX {
92 let divisor64 = divisor as u64;
93 let upper_numerator = (numerator >> 64) as u64;
94 if divisor64 > upper_numerator {
95 divide_128_by_64_helper(numerator, divisor64) as u128
96 }
97 else {
98 let upper_quotient = upper_numerator / divisor64;
99 let upper_remainder = upper_numerator - upper_quotient * divisor64;
100
101 let intermediate_numerator = ((upper_remainder as u128) << 64) | (numerator as u64 as u128);
102 let lower_quotient = divide_128_by_64_helper(intermediate_numerator, divisor64);
103
104 ((upper_quotient as u128) << 64) | (lower_quotient as u128)
105 }
106 }
107 else {
108 let shift_size = divisor.leading_zeros();
109 let shifted_divisor = divisor << shift_size;
110
111 let shifted_numerator = numerator >> 1;
112
113 let upper_quotient = divide_128_by_64_helper(shifted_numerator, (shifted_divisor >> 64) as u64);
114 let mut quotient = upper_quotient >> (63 - shift_size);
115 if quotient > 0 {
116 quotient -= 1;
117 }
118
119 let remainder = numerator - quotient as u128 * divisor;
120 if remainder >= divisor {
121 quotient += 1;
122 }
123 quotient as u128
124 }
125}
126
127fn divide_128_by_64_helper(numerator: u128, divisor: u64) -> u64 {
129 assert!(divisor > (numerator >> 64) as u64, "The numerator is too large for the denominator; the quotient might not fit inside a u64.");
133
134 if divisor <= U32_MAX {
135 return divide_128_by_32_helper(numerator, divisor as u32);
136 }
137
138 let shift_size = divisor.leading_zeros();
139 let shifted_divisor = divisor << shift_size;
140 let shifted_numerator = numerator << shift_size;
141 let divisor_hi = shifted_divisor >> 32;
142 let divisor_lo = shifted_divisor as u32 as u64;
143
144 let numerator_hi : u64 = (shifted_numerator >> 64) as u64;
146 let numerator_mid : u64 = (shifted_numerator >> 32) as u32 as u64;
147 let numerator_lo : u64 = shifted_numerator as u32 as u64;
148
149 let mut quotient_hi = core::cmp::min(numerator_hi / divisor_hi, U32_MAX);
161 let mut partial_remainder_hi = numerator_hi - quotient_hi * divisor_hi;
162
163 while partial_remainder_hi <= U32_MAX && quotient_hi * divisor_lo > (partial_remainder_hi << 32) | numerator_mid {
169 quotient_hi -= 1;
170 partial_remainder_hi += divisor_hi;
171 }
172
173 let full_remainder_hi = ((partial_remainder_hi << 32) | numerator_mid).wrapping_sub(quotient_hi * divisor_lo);
181
182 let mut quotient_lo = core::cmp::min(full_remainder_hi / divisor_hi, U32_MAX);
183 let mut partial_remainder_lo = full_remainder_hi - quotient_lo * divisor_hi;
184
185 while partial_remainder_lo <= U32_MAX && quotient_lo * divisor_lo > (partial_remainder_lo << 32) | numerator_lo {
187 quotient_lo -= 1;
188 partial_remainder_lo += divisor_hi;
189 }
190
191 (quotient_hi << 32) | quotient_lo
193}
194
195
196fn divide_128_by_32_helper(numerator: u128, divisor: u32) -> u64 {
198 assert!(divisor as u64 > (numerator >> 64) as u64, "The numerator is too large for the denominator; the quotient might not fit inside a u64.");
202
203 let shift_size = divisor.leading_zeros();
204 let shifted_divisor = (divisor << shift_size) as u64;
205 let shifted_numerator = numerator << (shift_size + 32);
206
207 let numerator_hi : u64 = (shifted_numerator >> 64) as u64;
209 let numerator_mid : u64 = (shifted_numerator >> 32) as u32 as u64;
210
211 let quotient_hi = numerator_hi / shifted_divisor;
223 let remainder_hi = numerator_hi - quotient_hi * shifted_divisor;
224
225 let final_numerator = (remainder_hi) << 32 | numerator_mid;
233 let quotient_lo = final_numerator / shifted_divisor;
234
235 (quotient_hi << 32) | quotient_lo
237}
238
239#[inline(never)]
240fn long_division(numerator_slice: &[u64], reduced_divisor: &StrengthReducedU64, quotient: &mut [u64]) {
241 let mut remainder = 0;
242 for (numerator_element, quotient_element) in numerator_slice.iter().zip(quotient.iter_mut()).rev() {
243 if remainder > 0 {
244 let upper_numerator = (remainder << 32) | (*numerator_element >> 32);
247 let (upper_quotient, upper_remainder) = StrengthReducedU64::div_rem(upper_numerator, *reduced_divisor);
248
249 let lower_numerator = (upper_remainder << 32) | (*numerator_element as u32 as u64);
250 let (lower_quotient, lower_remainder) = StrengthReducedU64::div_rem(lower_numerator, *reduced_divisor);
251
252 *quotient_element = (upper_quotient << 32) | lower_quotient;
253 remainder = lower_remainder;
254 } else {
255 let (digit_quotient, digit_remainder) = StrengthReducedU64::div_rem(*numerator_element, *reduced_divisor);
257
258 *quotient_element = digit_quotient;
259 remainder = digit_remainder;
260 }
261 }
262}
263
264#[inline]
265fn normalize_slice(input: &mut [u64]) -> &mut [u64] {
266 let input_len = input.len();
267 let trailing_zero_chunks = input.iter().rev().take_while(|e| **e == 0).count();
268 &mut input[..input_len - trailing_zero_chunks]
269}
270
271#[inline]
272fn is_slice_greater(a: &[u64], b: &[u64]) -> bool {
273 if a.len() > b.len() {
274 return true;
275 }
276 if b.len() > a.len() {
277 return false;
278 }
279
280 for (&ai, &bi) in a.iter().zip(b.iter()).rev() {
281 if ai < bi {
282 return false;
283 }
284 if ai > bi {
285 return true;
286 }
287 }
288 false
289}
290#[inline]
292fn sub_assign(a: &mut [u64], b: &[u64]) {
293 let mut borrow: i128 = 0;
294
295 let (a_lo, a_hi) = a.split_at_mut(b.len());
297
298 for (a, b) in a_lo.iter_mut().zip(b) {
299 borrow += *a as i128;
300 borrow -= *b as i128;
301 *a = borrow as u64;
302 borrow >>= 64;
303 }
304
305 let mut a_element = a_hi.iter_mut();
307 while borrow != 0 {
308 let a_element = a_element.next().expect("borrow underflow during sub_assign");
309 borrow += *a_element as i128;
310
311 *a_element = borrow as u64;
312 borrow >>= 64;
313 }
314}
315
316pub(crate) fn divide_128_max_by_64(divisor: u64) -> u128 {
317 let quotient_hi = core::u64::MAX / divisor;
318 let remainder_hi = core::u64::MAX - quotient_hi * divisor;
319
320 let leading_zeros = divisor.leading_zeros();
321 let quotient_lo = if leading_zeros >= 32 {
322 let numerator_mid = (remainder_hi << 32) | core::u32::MAX as u64;
323 let quotient_mid = numerator_mid / divisor;
324 let remainder_mid = numerator_mid - quotient_mid * divisor;
325
326 let numerator_lo = (remainder_mid << 32) | core::u32::MAX as u64;
327 let quotient_lo = numerator_lo / divisor;
328
329 (quotient_mid << 32) | quotient_lo
330 }
331 else {
332 let numerator_hi = if leading_zeros > 0 { (remainder_hi << leading_zeros) | (core::u64::MAX >> (64 - leading_zeros)) } else { remainder_hi };
333 let numerator_lo = core::u64::MAX << leading_zeros;
334
335 divide_128_by_64_preshifted(numerator_hi, numerator_lo, divisor << leading_zeros)
336 };
337 ((quotient_hi as u128) << 64) | (quotient_lo as u128)
338}
339
340fn divide_256_max_by_32(divisor: u32) -> (u128, u128) {
341 let reduced_divisor = StrengthReducedU64::new(divisor as u64);
342 let mut numerator_chunks = [core::u64::MAX; 4];
343 let mut quotient_chunks = [0; 4];
344 long_division(&mut numerator_chunks, &reduced_divisor, &mut quotient_chunks);
345
346 let quotient_lo = (quotient_chunks[0] as u128) | ((quotient_chunks[1] as u128) << 64);
348 let quotient_hi = (quotient_chunks[2] as u128) | ((quotient_chunks[3] as u128) << 64);
349
350 (quotient_hi, quotient_lo)
351}
352
353pub(crate) fn divide_256_max_by_128(divisor: u128) -> (u128, u128) {
354 let leading_zeros = divisor.leading_zeros();
355
356 if leading_zeros >= 96 {
358 return divide_256_max_by_32(divisor as u32);
359 }
360
361 let empty_divisor_chunks = (leading_zeros / 64) as usize;
362 let shift_amount = leading_zeros % 64;
363
364 let divisor_shifted = divisor << shift_amount;
366 let divisor_chunks = [
367 divisor_shifted as u64,
368 (divisor_shifted >> 64) as u64,
369 ];
370 let divisor_slice = &divisor_chunks[..(divisor_chunks.len() - empty_divisor_chunks)];
371
372 let reduced_divisor_hi = StrengthReducedU64::new(*divisor_slice.last().unwrap() >> 32);
375 let divisor_hi = *divisor_slice.last().unwrap();
376
377 let mut numerator_chunks = [core::u64::MAX; 5];
379 let mut numerator_max_idx = if shift_amount > 0 {
380 numerator_chunks[4] >>= 64 - shift_amount;
381 numerator_chunks[0] <<= shift_amount;
382 5
383 }
384 else {
385 4
386 };
387
388 let num_quotient_chunks = 3 + empty_divisor_chunks;
390 let mut quotient_chunks = [0; 4];
391 for quotient_idx in (0..num_quotient_chunks).rev() {
392 let numerator_slice = &mut numerator_chunks[..numerator_max_idx];
400 let numerator_start_idx = quotient_idx + divisor_slice.len() - 1;
401 if numerator_start_idx >= numerator_slice.len() {
402 continue;
403 }
404
405
406 {
408 let numerator_hi = if numerator_slice.len() - numerator_start_idx > 1 { numerator_slice[numerator_start_idx + 1] } else { 0 };
410 let numerator_lo = numerator_slice[numerator_start_idx];
411 let mut sub_quotient = divide_128_by_64_preshifted_reduced(numerator_hi, numerator_lo, reduced_divisor_hi, divisor_hi);
412
413 let mut tmp_product = [0; 3];
414 long_multiplication::long_multiply(&divisor_slice, sub_quotient, &mut tmp_product);
415 let sub_product = normalize_slice(&mut tmp_product);
416
417 while is_slice_greater(sub_product, &numerator_slice[quotient_idx..]) {
420 sub_assign(sub_product, &divisor_slice);
421 sub_quotient -= 1;
422 }
423
424 quotient_chunks[quotient_idx] = sub_quotient;
426 sub_assign(&mut numerator_slice[quotient_idx..], sub_product);
427 }
428
429
430 numerator_max_idx -= numerator_slice.iter().rev().take_while(|e| **e == 0).count();
432 }
433
434
435 let quotient_lo = (quotient_chunks[0] as u128)
437 | ((quotient_chunks[1] as u128) << 64);
438 let quotient_hi = (quotient_chunks[2] as u128)
439 | ((quotient_chunks[3] as u128) << 64);
440
441 (quotient_hi, quotient_lo)
442}
443
444
445
446#[cfg(test)]
447mod unit_tests {
448 use num_bigint::BigUint;
449
450 #[test]
451 fn test_divide_128_by_64() {
452 for divisor in core::u64::MAX..=core::u64::MAX {
453 let divisor_128 = core::u64::MAX as u128;
454
455 let numerator = divisor_128 * divisor_128 + (divisor_128 - 1);
456 let expected_quotient = numerator / divisor as u128;
458 assert!(expected_quotient == core::u64::MAX as u128);
459
460 let actual_quotient = super::divide_128_by_64_helper(numerator as u128, divisor);
461
462
463
464 let expected_upper = (expected_quotient >> 32) as u64;
465 let expected_lower = expected_quotient as u32 as u64;
466 let actual_upper = (actual_quotient >> 32) as u64;
467 let actual_lower = actual_quotient as u32 as u64;
468
469 assert_eq!(expected_upper, actual_upper, "wrong quotient for {}/{}", numerator, divisor);
470 assert_eq!(expected_lower, actual_lower, "wrong quotient for {}/{}", numerator, divisor);
471 }
473 }
474
475 fn test_divisor_128(divisor: u128) {
476 let big_numerator = BigUint::from_slice(&[core::u32::MAX; 8]);
477 let big_quotient = big_numerator / divisor;
478
479 let (actual64_hi, actual64_lo) = super::divide_256_max_by_128(divisor);
481
482 let actual64_big = (BigUint::from(actual64_hi) << 128) | BigUint::from(actual64_lo);
484
485 assert_eq!(big_quotient, actual64_big, "Actual64 quotient didn't match expected quotient for max/{}", divisor);
487 }
488
489 #[allow(unused_imports)]
490 use rand::{rngs::StdRng, SeedableRng, distributions::Distribution, distributions::Uniform};
491
492 #[test]
493 fn test_max_256() {
494 let log2_tests_per_bit = 6;
495
496 for divisor in 1..(1 << log2_tests_per_bit) {
497 test_divisor_128(divisor);
498 }
499
500 let mut gen = StdRng::seed_from_u64(5673573);
501 for bits in log2_tests_per_bit..128 {
502 let lower_start = 1 << bits;
503 let lower_stop = lower_start + (1 << (log2_tests_per_bit - 3));
504 let upper_stop = 1u128.checked_shl(bits + 1).map_or(core::u128::MAX, |v| v - 1);
505 let upper_start = upper_stop - (1 << (log2_tests_per_bit - 3)) + 1;
506
507 for divisor in lower_start..lower_stop {
508 test_divisor_128(divisor);
509 }
510 for divisor in upper_start..=upper_stop {
511 test_divisor_128(divisor);
512 }
513
514 let random_count = 1 << log2_tests_per_bit;
515 let dist = Uniform::new(lower_stop + 1, upper_start);
516 for _ in 0..random_count {
517 let divisor = dist.sample(&mut gen);
518 test_divisor_128(divisor);
519 }
520 }
521 }
522}