|
8 | 8 |
|
9 | 9 | #include "bv_utils.h"
|
10 | 10 |
|
| 11 | +#include <util/arith_tools.h> |
| 12 | + |
11 | 13 | #include <utility>
|
12 | 14 |
|
13 | 15 | bvt bv_utilst::build_constant(const mp_integer &n, std::size_t width)
|
@@ -782,7 +784,7 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
782 | 784 | // trees (and also the default addition scheme), but isn't consistently more
|
783 | 785 | // performant with simple partial-product generation. Only when using
|
784 | 786 | // higher-radix multipliers the combination appears to perform better.
|
785 |
| -#define DADDA_TREE |
| 787 | +// #define DADDA_TREE |
786 | 788 |
|
787 | 789 | // The following examples demonstrate the performance differences (with a
|
788 | 790 | // time-out of 7200 seconds):
|
@@ -924,7 +926,7 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
924 | 926 | // with Dadda's reduction yields the most consistent performance improvement
|
925 | 927 | // while not regressing substantially in the matrix of different benchmarks and
|
926 | 928 | // CaDiCaL and MiniSat2 as solvers.
|
927 |
| -#define RADIX_MULTIPLIER 8 |
| 929 | +// #define RADIX_MULTIPLIER 8 |
928 | 930 |
|
929 | 931 | #ifdef RADIX_MULTIPLIER
|
930 | 932 | static bvt unsigned_multiply_by_3(propt &prop, const bvt &op)
|
@@ -1861,6 +1863,155 @@ bvt bv_utilst::unsigned_karatsuba_multiplier(const bvt &_op0, const bvt &_op1)
|
1861 | 1863 | return add(z0, z1);
|
1862 | 1864 | }
|
1863 | 1865 |
|
| 1866 | +bvt bv_utilst::unsigned_toom_cook_multiplier(const bvt &_op0, const bvt &_op1) |
| 1867 | +{ |
| 1868 | + PRECONDITION(!_op0.empty()); |
| 1869 | + PRECONDITION(!_op1.empty()); |
| 1870 | + |
| 1871 | + if(_op1.size() == 1) |
| 1872 | + return unsigned_multiplier(_op0, _op1); |
| 1873 | + |
| 1874 | + // break up _op0, _op1 in groups of at most GROUP_SIZE bits |
| 1875 | + PRECONDITION(_op0.size() == _op1.size()); |
| 1876 | +#define GROUP_SIZE 8 |
| 1877 | + const std::size_t d_bits = |
| 1878 | + 2 * GROUP_SIZE + |
| 1879 | + 2 * address_bits((_op0.size() + GROUP_SIZE - 1) / GROUP_SIZE); |
| 1880 | + std::vector<bvt> a, b, c_ops, d; |
| 1881 | + for(std::size_t i = 0; i < _op0.size(); i += GROUP_SIZE) |
| 1882 | + { |
| 1883 | + std::size_t u = std::min(i + GROUP_SIZE, _op0.size()); |
| 1884 | + a.emplace_back(_op0.begin() + i, _op0.begin() + u); |
| 1885 | + b.emplace_back(_op1.begin() + i, _op1.begin() + u); |
| 1886 | + |
| 1887 | + c_ops.push_back(zeros(i)); |
| 1888 | + d.push_back(prop.new_variables(d_bits)); |
| 1889 | + c_ops.back().insert(c_ops.back().end(), d.back().begin(), d.back().end()); |
| 1890 | + c_ops.back() = zero_extension(c_ops.back(), _op0.size()); |
| 1891 | + } |
| 1892 | + for(std::size_t i = a.size(); i < 2 * a.size() - 1; ++i) |
| 1893 | + { |
| 1894 | + d.push_back(prop.new_variables(d_bits)); |
| 1895 | + } |
| 1896 | + |
| 1897 | + // r(0) |
| 1898 | + bvt r_0 = d[0]; |
| 1899 | + prop.l_set_to_true(equal( |
| 1900 | + r_0, |
| 1901 | + unsigned_multiplier( |
| 1902 | + zero_extension(a[0], r_0.size()), zero_extension(b[0], r_0.size())))); |
| 1903 | + |
| 1904 | + for(std::size_t j = 1; j < a.size(); ++j) |
| 1905 | + { |
| 1906 | + // r(2^(j-1)) |
| 1907 | + bvt r_j = zero_extension( |
| 1908 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 1909 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 1910 | + { |
| 1911 | + r_j = add( |
| 1912 | + r_j, |
| 1913 | + shift( |
| 1914 | + zero_extension(d[i], r_j.size()), shiftt::SHIFT_LEFT, (j - 1) * i)); |
| 1915 | + } |
| 1916 | + |
| 1917 | + bvt a_even = zero_extension(a[0], r_j.size()); |
| 1918 | + for(std::size_t i = 2; i < a.size(); i += 2) |
| 1919 | + { |
| 1920 | + a_even = add( |
| 1921 | + a_even, |
| 1922 | + shift( |
| 1923 | + zero_extension(a[i], a_even.size()), |
| 1924 | + shiftt::SHIFT_LEFT, |
| 1925 | + (j - 1) * i)); |
| 1926 | + } |
| 1927 | + bvt a_odd = zero_extension(a[1], r_j.size()); |
| 1928 | + for(std::size_t i = 3; i < a.size(); i += 2) |
| 1929 | + { |
| 1930 | + a_odd = add( |
| 1931 | + a_odd, |
| 1932 | + shift( |
| 1933 | + zero_extension(a[i], a_odd.size()), |
| 1934 | + shiftt::SHIFT_LEFT, |
| 1935 | + (j - 1) * (i - 1))); |
| 1936 | + } |
| 1937 | + bvt b_even = zero_extension(b[0], r_j.size()); |
| 1938 | + for(std::size_t i = 2; i < b.size(); i += 2) |
| 1939 | + { |
| 1940 | + b_even = add( |
| 1941 | + b_even, |
| 1942 | + shift( |
| 1943 | + zero_extension(b[i], b_even.size()), |
| 1944 | + shiftt::SHIFT_LEFT, |
| 1945 | + (j - 1) * i)); |
| 1946 | + } |
| 1947 | + bvt b_odd = zero_extension(b[1], r_j.size()); |
| 1948 | + for(std::size_t i = 3; i < b.size(); i += 2) |
| 1949 | + { |
| 1950 | + b_odd = add( |
| 1951 | + b_odd, |
| 1952 | + shift( |
| 1953 | + zero_extension(b[i], b_odd.size()), |
| 1954 | + shiftt::SHIFT_LEFT, |
| 1955 | + (j - 1) * (i - 1))); |
| 1956 | + } |
| 1957 | + |
| 1958 | + prop.l_set_to_true(equal( |
| 1959 | + r_j, |
| 1960 | + unsigned_multiplier( |
| 1961 | + add(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 1962 | + add(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 1963 | + |
| 1964 | + // r(-2^(j-1)) |
| 1965 | + bvt r_minus_j = zero_extension( |
| 1966 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 1967 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 1968 | + { |
| 1969 | + if(i % 2 == 1) |
| 1970 | + { |
| 1971 | + r_minus_j = sub( |
| 1972 | + r_minus_j, |
| 1973 | + shift( |
| 1974 | + zero_extension(d[i], r_minus_j.size()), |
| 1975 | + shiftt::SHIFT_LEFT, |
| 1976 | + (j - 1) * i)); |
| 1977 | + } |
| 1978 | + else |
| 1979 | + { |
| 1980 | + r_minus_j = add( |
| 1981 | + r_minus_j, |
| 1982 | + shift( |
| 1983 | + zero_extension(d[i], r_minus_j.size()), |
| 1984 | + shiftt::SHIFT_LEFT, |
| 1985 | + (j - 1) * i)); |
| 1986 | + } |
| 1987 | + } |
| 1988 | + |
| 1989 | + prop.l_set_to_true(equal( |
| 1990 | + r_minus_j, |
| 1991 | + unsigned_multiplier( |
| 1992 | + sub(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 1993 | + sub(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 1994 | + } |
| 1995 | + |
| 1996 | + if(c_ops.empty()) |
| 1997 | + return zeros(_op0.size()); |
| 1998 | + else |
| 1999 | + { |
| 2000 | +#ifdef WALLACE_TREE |
| 2001 | + return wallace_tree(c_ops); |
| 2002 | +#elif defined(DADDA_TREE) |
| 2003 | + return dadda_tree(c_ops); |
| 2004 | +#else |
| 2005 | + bvt product = c_ops.front(); |
| 2006 | + |
| 2007 | + for(auto it = std::next(c_ops.begin()); it != c_ops.end(); ++it) |
| 2008 | + product = add(product, *it); |
| 2009 | + |
| 2010 | + return product; |
| 2011 | +#endif |
| 2012 | + } |
| 2013 | +} |
| 2014 | + |
1864 | 2015 | bvt bv_utilst::unsigned_multiplier_no_overflow(
|
1865 | 2016 | const bvt &op0,
|
1866 | 2017 | const bvt &op1)
|
@@ -1913,6 +2064,8 @@ bvt bv_utilst::signed_multiplier(const bvt &op0, const bvt &op1)
|
1913 | 2064 |
|
1914 | 2065 | #ifdef USE_KARATSUBA
|
1915 | 2066 | bvt result = unsigned_karatsuba_multiplier(neg0, neg1);
|
| 2067 | +#elif defined(USE_TOOM_COOK) |
| 2068 | + bvt result = unsigned_toom_cook_multiplier(neg0, neg1); |
1916 | 2069 | #else
|
1917 | 2070 | bvt result=unsigned_multiplier(neg0, neg1);
|
1918 | 2071 | #endif
|
@@ -1986,6 +2139,9 @@ bvt bv_utilst::multiplier(
|
1986 | 2139 | #ifdef USE_KARATSUBA
|
1987 | 2140 | case representationt::UNSIGNED:
|
1988 | 2141 | return unsigned_karatsuba_multiplier(op0, op1);
|
| 2142 | +#elif defined(USE_TOOM_COOK) |
| 2143 | + case representationt::UNSIGNED: |
| 2144 | + return unsigned_toom_cook_multiplier(op0, op1); |
1989 | 2145 | #else
|
1990 | 2146 | case representationt::UNSIGNED: return unsigned_multiplier(op0, op1);
|
1991 | 2147 | #endif
|
|
0 commit comments