Skip to content

Commit

Permalink
Use libc's lgamma/tgamma instead of custom implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
skirpichev committed Feb 8, 2023
1 parent d202b5c commit 7d14105
Showing 1 changed file with 12 additions and 264 deletions.
276 changes: 12 additions & 264 deletions Modules/mathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,6 @@ get_math_module_state(PyObject *module)
return (math_module_state *)state;
}

/*
sin(pi*x), giving accurate results for all finite x (especially x
integral or close to an integer). This is here for use in the
reflection formula for the gamma function. It conforms to IEEE
754-2008 for finite arguments, but not for infinities or nans.
*/

static const double pi = 3.141592653589793238462643383279502884197;
static const double logpi = 1.144729885849400174143427351353058711647;

/* Version of PyFloat_AsDouble() with in-line fast paths
for exact floats and integers. Gives a substantial
speed improvement for extracting float arguments.
Expand All @@ -124,162 +114,6 @@ static const double logpi = 1.144729885849400174143427351353058711647;
} \
}

static double
m_sinpi(double x)
{
double y, r;
int n;
/* this function should only ever be called for finite arguments */
assert(Py_IS_FINITE(x));
y = fmod(fabs(x), 2.0);
n = (int)round(2.0*y);
assert(0 <= n && n <= 4);
switch (n) {
case 0:
r = sin(pi*y);
break;
case 1:
r = cos(pi*(y-0.5));
break;
case 2:
/* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
-0.0 instead of 0.0 when y == 1.0. */
r = sin(pi*(1.0-y));
break;
case 3:
r = -cos(pi*(y-1.5));
break;
case 4:
r = sin(pi*(y-2.0));
break;
default:
Py_UNREACHABLE();
}
return copysign(1.0, x)*r;
}

/* Implementation of the real gamma function. In extensive but non-exhaustive
random tests, this function proved accurate to within <= 10 ulps across the
entire float domain. Note that accuracy may depend on the quality of the
system math functions, the pow function in particular. Special cases
follow C99 annex F. The parameters and method are tailored to platforms
whose double format is the IEEE 754 binary64 format.
Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
and g=6.024680040776729583740234375; these parameters are amongst those
used by the Boost library. Following Boost (again), we re-express the
Lanczos sum as a rational function, and compute it that way. The
coefficients below were computed independently using MPFR, and have been
double-checked against the coefficients in the Boost source code.
For x < 0.0 we use the reflection formula.
There's one minor tweak that deserves explanation: Lanczos' formula for
Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
values, x+g-0.5 can be represented exactly. However, in cases where it
can't be represented exactly the small error in x+g-0.5 can be magnified
significantly by the pow and exp calls, especially for large x. A cheap
correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
involved in the computation of x+g-0.5 (that is, e = computed value of
x+g-0.5 - exact value of x+g-0.5). Here's the proof:
Correction factor
-----------------
Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
double, and e is tiny. Then:
pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
= pow(y, x-0.5)/exp(y) * C,
where the correction_factor C is given by
C = pow(1-e/y, x-0.5) * exp(e)
Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
Note that for accuracy, when computing r*C it's better to do
r + e*g/y*r;
than
r * (1 + e*g/y);
since the addition in the latter throws away most of the bits of
information in e*g/y.
*/

#define LANCZOS_N 13
static const double lanczos_g = 6.024680040776729583740234375;
static const double lanczos_g_minus_half = 5.524680040776729583740234375;
static const double lanczos_num_coeffs[LANCZOS_N] = {
23531376880.410759688572007674451636754734846804940,
42919803642.649098768957899047001988850926355848959,
35711959237.355668049440185451547166705960488635843,
17921034426.037209699919755754458931112671403265390,
6039542586.3520280050642916443072979210699388420708,
1439720407.3117216736632230727949123939715485786772,
248874557.86205415651146038641322942321632125127801,
31426415.585400194380614231628318205362874684987640,
2876370.6289353724412254090516208496135991145378768,
186056.26539522349504029498971604569928220784236328,
8071.6720023658162106380029022722506138218516325024,
210.82427775157934587250973392071336271166969580291,
2.5066282746310002701649081771338373386264310793408
};

/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
static const double lanczos_den_coeffs[LANCZOS_N] = {
0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};

/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
#define NGAMMA_INTEGRAL 23
static const double gamma_integral[NGAMMA_INTEGRAL] = {
1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
1307674368000.0, 20922789888000.0, 355687428096000.0,
6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
51090942171709440000.0, 1124000727777607680000.0,
};

/* Lanczos' sum L_g(x), for positive x */

static double
lanczos_sum(double x)
{
double num = 0.0, den = 0.0;
int i;
assert(x > 0.0);
/* evaluate the rational function lanczos_sum(x). For large
x, the obvious algorithm risks overflow, so we instead
rescale the denominator and numerator of the rational
function by x**(1-LANCZOS_N) and treat this as a
rational function in 1/x. This also reduces the error for
larger x values. The choice of cutoff point (5.0 below) is
somewhat arbitrary; in tests, smaller cutoff values than
this resulted in lower accuracy. */
if (x < 5.0) {
for (i = LANCZOS_N; --i >= 0; ) {
num = num * x + lanczos_num_coeffs[i];
den = den * x + lanczos_den_coeffs[i];
}
}
else {
for (i = 0; i < LANCZOS_N; i++) {
num = num / x + lanczos_num_coeffs[i];
den = den / x + lanczos_den_coeffs[i];
}
}
return num/den;
}

/* Constant for +infinity, generated in the same way as float('inf'). */

static double
Expand Down Expand Up @@ -309,113 +143,46 @@ m_nan(void)

#endif

/*
gamma: the real gamma function.
*/

static double
m_tgamma(double x)
m_gamma(double x)
{
double absx, r, y, z, sqrtpow;

/* special cases */
if (!Py_IS_FINITE(x)) {
if (Py_IS_NAN(x) || x > 0.0)
return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
return x; /* gamma(nan) = nan, gamma(inf) = inf */
else {
errno = EDOM;
return Py_NAN; /* tgamma(-inf) = nan, invalid */
return Py_NAN; /* gamma(-inf) = nan, invalid */
}
}
if (x == 0.0) {
errno = EDOM;
/* tgamma(+-0.0) = +-inf, divide-by-zero */
/* gamma(+-0.0) = +-inf, divide-by-zero */
return copysign(Py_HUGE_VAL, x);
}

/* integer arguments */
if (x == floor(x)) {
if (x < 0.0) {
errno = EDOM; /* tgamma(n) = nan, invalid for */
errno = EDOM; /* gamma(n) = nan, invalid for */
return Py_NAN; /* negative integers n */
}
if (x <= NGAMMA_INTEGRAL)
return gamma_integral[(int)x - 1];
}
absx = fabs(x);

/* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
if (absx < 1e-20) {
r = 1.0/x;
if (Py_IS_INFINITY(r))
errno = ERANGE;
return r;
}

/* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
x > 200, and underflows to +-0.0 for x < -200, not a negative
integer. */
if (absx > 200.0) {
if (x < 0.0) {
return 0.0/m_sinpi(x);
}
else {
errno = ERANGE;
return Py_HUGE_VAL;
}
}

y = absx + lanczos_g_minus_half;
/* compute error in sum */
if (absx > lanczos_g_minus_half) {
/* note: the correction can be foiled by an optimizing
compiler that (incorrectly) thinks that an expression like
a + b - a - b can be optimized to 0.0. This shouldn't
happen in a standards-conforming compiler. */
double q = y - absx;
z = q - lanczos_g_minus_half;
}
else {
double q = y - lanczos_g_minus_half;
z = q - absx;
}
z = z * lanczos_g / y;
if (x < 0.0) {
r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
r -= z * r;
if (absx < 140.0) {
r /= pow(y, absx - 0.5);
}
else {
sqrtpow = pow(y, absx / 2.0 - 0.25);
r /= sqrtpow;
r /= sqrtpow;
}
}
else {
r = lanczos_sum(absx) / exp(y);
r += z * r;
if (absx < 140.0) {
r *= pow(y, absx - 0.5);
}
else {
sqrtpow = pow(y, absx / 2.0 - 0.25);
r *= sqrtpow;
r *= sqrtpow;
}
}
if (Py_IS_INFINITY(r))
errno = ERANGE;
return r;
return tgamma(x);
}

/*
lgamma: natural log of the absolute value of the Gamma function.
For large arguments, Lanczos' formula works extremely well here.
*/

static double
m_lgamma(double x)
{
double r;
double absx;

/* special cases */
if (!Py_IS_FINITE(x)) {
if (Py_IS_NAN(x))
Expand All @@ -430,28 +197,9 @@ m_lgamma(double x)
errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
return Py_HUGE_VAL; /* integers n <= 0 */
}
else {
return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
}
}

absx = fabs(x);
/* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
if (absx < 1e-20)
return -log(absx);

/* Lanczos' formula. We could save a fraction of a ulp in accuracy by
having a second set of numerator coefficients for lanczos_sum that
absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
subtraction below; it's probably not worth it. */
r = log(lanczos_sum(absx)) - lanczos_g;
r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
if (x < 0.0)
/* Use reflection formula to get value for negative x. */
r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
if (Py_IS_INFINITY(r))
errno = ERANGE;
return r;
return lgamma(x);
}

/*
Expand Down Expand Up @@ -1159,7 +907,7 @@ math_floor(PyObject *module, PyObject *number)
return PyLong_FromDouble(floor(x));
}

FUNC1A(gamma, m_tgamma,
FUNC1A(gamma, m_gamma,
"gamma($module, x, /)\n--\n\n"
"Gamma function at x.")
FUNC1A(lgamma, m_lgamma,
Expand Down

0 comments on commit 7d14105

Please sign in to comment.