Skip to main content

rust_igraph/algorithms/properties/
power_law_fit.rs

1//! Power-law fitting (ALGO-PR-019).
2//!
3//! Pure-Rust port of `igraph_power_law_fit()` from
4//! `references/igraph/src/misc/power_law_fit.c`, which wraps the bundled
5//! `plfit` library (`references/igraph/vendor/plfit/`). Implements the
6//! Clauset–Shalizi–Newman maximum-likelihood method for fitting a
7//! power-law tail `P(X = x) ∝ x^(-alpha)` for `x >= xmin`.
8//!
9//! Two regimes, selected exactly as upstream:
10//! - **continuous**: closed-form MLE `alpha = 1 + m / Σ log(x_i / xmin)`.
11//! - **discrete**: `alpha` maximises the zeta log-likelihood
12//!   `-alpha·Σ log(x_i) - m·ln ζ(alpha, xmin)` (upstream uses L-BFGS; we
13//!   use a derivative-free golden-section search on this strictly concave
14//!   objective, which converges to the same maximiser).
15//!
16//! When `xmin < 0` the optimal `xmin` is chosen to minimise the
17//! Kolmogorov–Smirnov statistic `D`, faithfully reproducing the
18//! deterministic continuous stratified-sampling search and the discrete
19//! unique-value scan. The p-value is **not** computed (upstream sets
20//! `PLFIT_P_VALUE_SKIP` in the default fit), matching `igraph`.
21//!
22//! The Hurwitz zeta `ζ(s, q) = Σ_{k>=0} (k + q)^(-s)` is evaluated by the
23//! Euler–Maclaurin summation used in `vendor/plfit/hzeta.c`.
24
25#![allow(
26    // Sample counts and ranks are well below 2^52, so the f64 casts are exact.
27    clippy::cast_precision_loss,
28    // Integrality and exact-cutoff tests are intentional bit-exact comparisons,
29    // matching the upstream plfit logic.
30    clippy::float_cmp,
31    // The Euler–Maclaurin coefficients are transcribed verbatim from
32    // vendor/plfit/hzeta.c; the surplus digits keep the port visually faithful.
33    clippy::excessive_precision,
34    // Faithful numeric ports: terse single-letter names (a/b/c/d, s/q, m/n) and
35    // the coefficient-indexing loop mirror the C source and read clearest as-is.
36    clippy::many_single_char_names,
37    clippy::needless_range_loop
38)]
39
40use crate::core::{IgraphError, IgraphResult};
41
42/// Result of fitting a power-law distribution to a sample.
43///
44/// Mirrors the externally observable fields of `igraph_plfit_result_t`.
45#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct PowerLawFitResult {
47    /// `true` if a continuous model was fitted, `false` for discrete.
48    pub continuous: bool,
49    /// The exponent of the fitted power-law (the scaling parameter).
50    pub alpha: f64,
51    /// The lower cutoff `xmin`: the power-law holds for `x >= xmin`.
52    pub xmin: f64,
53    /// Log-likelihood of the fitted parameters on the sample.
54    pub log_likelihood: f64,
55    /// Kolmogorov–Smirnov test statistic between fit and sample.
56    pub ks_statistic: f64,
57}
58
59// ---------------------------------------------------------------------------
60// Hurwitz zeta via Euler–Maclaurin (port of hsl_sf_hzeta_e, hzeta.c).
61// ---------------------------------------------------------------------------
62
63const EM_SHIFT: usize = 10;
64const EM_ORDER: usize = 32;
65const DBL_EPSILON: f64 = 2.220_446_049_250_313_1e-16;
66const LOG_DBL_MIN: f64 = -7.083_964_185_322_640_8e2;
67const LOG_DBL_MAX: f64 = 7.097_827_128_933_839_7e2;
68
69/// `B_{2j} / (2j)!` Euler–Maclaurin coefficients (`coeffs[0] = 1`).
70const EM_COEFFS: [f64; EM_ORDER + 1] = [
71    1.0,
72    1.0 / 12.0,
73    -1.0 / 720.0,
74    1.0 / 30240.0,
75    -1.0 / 1_209_600.0,
76    1.0 / 47_900_160.0,
77    -691.0 / 1_307_674_368_000.0,
78    1.0 / 74_724_249_600.0,
79    -3.389_680_296_322_582_8e-13,
80    8.586_062_056_277_844_5e-15,
81    -2.174_868_698_558_061_9e-16,
82    5.509_002_828_360_230e-18,
83    -1.395_446_468_581_252_3e-19,
84    3.534_707_039_629_467_5e-21,
85    -8.953_517_427_037_547e-23,
86    2.267_952_452_337_683e-24,
87    -5.744_790_668_872_202_5e-26,
88    1.455_172_475_614_865e-27,
89    -3.685_994_940_665_31e-29,
90    9.336_734_257_095_045e-31,
91    -2.365_022_415_700_63e-32,
92    5.990_671_762_482_134e-34,
93    -1.517_454_884_468_290_3e-35,
94    3.843_758_125_454_188e-37,
95    -9.736_353_072_646_691e-39,
96    2.466_247_044_200_681e-40,
97    -6.247_076_741_820_744e-42,
98    1.582_403_024_464_491_4e-43,
99    -4.008_273_685_948_936e-45,
100    1.015_307_585_556_955_6e-46,
101    -2.571_804_158_241_871_7e-48,
102    6.514_456_035_233_815e-50,
103    -1.650_130_990_689_652_4e-51,
104];
105
106/// Hurwitz zeta `ζ(s, q) = Σ_{k>=0} (k + q)^(-s)` for `s > 1`, `q > 0`.
107fn hzeta(s: f64, q: f64) -> IgraphResult<f64> {
108    if s <= 1.0 || q <= 0.0 {
109        return Err(IgraphError::InvalidArgument(
110            "power_law_fit: Hurwitz zeta requires s > 1 and q > 0".into(),
111        ));
112    }
113
114    let ln_term0 = -s * q.ln();
115    if ln_term0 < LOG_DBL_MIN + 1.0 {
116        return Err(IgraphError::InvalidArgument(
117            "power_law_fit: underflow while evaluating Hurwitz zeta".into(),
118        ));
119    }
120    if ln_term0 > LOG_DBL_MAX - 1.0 {
121        return Err(IgraphError::InvalidArgument(
122            "power_law_fit: overflow while evaluating Hurwitz zeta".into(),
123        ));
124    }
125
126    let max_bits = 54.0;
127    // Fast paths for large s / small q (q >= 1 in our callers, so these
128    // are inert there, but kept for faithfulness on general input).
129    if ((max_bits < s) && (q < 1.0)) || ((0.5 * max_bits < s) && (q < 0.25)) {
130        return Ok(q.powf(-s));
131    }
132    if (0.5 * max_bits < s) && (q < 1.0) {
133        let a0 = q.powf(-s);
134        let p1 = (q / (1.0 + q)).powf(s);
135        let p2 = (q / (2.0 + q)).powf(s);
136        return Ok(a0 * (1.0 + p1 + p2));
137    }
138
139    // Euler–Maclaurin summation. Terms are accumulated, then re-summed in
140    // reverse for floating-point accuracy, exactly as in hzeta.c.
141    let qshift = EM_SHIFT as f64 + q;
142    let inv_qshift = 1.0 / qshift;
143    let sqr_inv_qshift = inv_qshift * inv_qshift;
144    let inv_sm1 = 1.0 / (s - 1.0);
145    let pmax = qshift.powf(-s);
146
147    let mut terms: Vec<f64> = Vec::with_capacity(EM_SHIFT + EM_ORDER + 2);
148
149    for j in 0..EM_SHIFT {
150        terms.push((j as f64 + q).powf(-s));
151    }
152    terms.push(0.5 * pmax);
153    terms.push(pmax * qshift * inv_sm1);
154
155    let mut tscp = s;
156    let mut scp = tscp;
157    let mut pcp = pmax * inv_qshift;
158    let mut ratio = scp * pcp;
159    let mut last_j = EM_ORDER;
160    let mut ans: f64 = terms.iter().sum();
161
162    for j in 1..=EM_ORDER {
163        let delta = EM_COEFFS[j] * ratio;
164        terms.push(delta);
165        ans += delta;
166        tscp += 1.0;
167        scp *= tscp;
168        tscp += 1.0;
169        scp *= tscp;
170        pcp *= sqr_inv_qshift;
171        ratio = scp * pcp;
172        if (delta / ans).abs() < 0.5 * DBL_EPSILON {
173            last_j = j;
174            break;
175        }
176    }
177    let _ = last_j;
178
179    let mut acc = 0.0;
180    for &t in terms.iter().rev() {
181        acc += t;
182    }
183    Ok(acc)
184}
185
186/// `ln ζ(s, q)`. For the moderate `(alpha, xmin)` range that arises in
187/// power-law fitting, `ln(hzeta)` is fully accurate.
188fn lnhzeta(s: f64, q: f64) -> IgraphResult<f64> {
189    Ok(hzeta(s, q)?.ln())
190}
191
192// ---------------------------------------------------------------------------
193// Continuous case.
194// ---------------------------------------------------------------------------
195
196/// MLE of `alpha` on a sorted slice whose elements are all `>= xmin`.
197fn estimate_alpha_continuous(cut: &[f64], xmin: f64) -> IgraphResult<f64> {
198    if cut.is_empty() {
199        return Err(IgraphError::InvalidArgument(
200            "power_law_fit: no data point was larger than xmin".into(),
201        ));
202    }
203    let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
204    Ok(1.0 + (cut.len() as f64) / logsum)
205}
206
207/// KS statistic for the continuous fit on a sorted cut slice.
208fn ks_continuous(cut: &[f64], alpha: f64, xmin: f64) -> f64 {
209    let n = cut.len() as f64;
210    let mut d_max = 0.0_f64;
211    for (m, &x) in cut.iter().enumerate() {
212        let d = (1.0 - (xmin / x).powf(alpha - 1.0) - (m as f64) / n).abs();
213        if d > d_max {
214            d_max = d;
215        }
216    }
217    d_max
218}
219
220/// Continuous log-likelihood on a sorted cut slice.
221fn log_likelihood_continuous(cut: &[f64], alpha: f64, xmin: f64) -> f64 {
222    let m = cut.len() as f64;
223    let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
224    let c = (alpha - 1.0) / xmin;
225    -alpha * logsum + c.ln() * m
226}
227
228fn finite_size_correction(alpha: f64, n: usize) -> f64 {
229    let nf = n as f64;
230    alpha * (nf - 1.0) / nf + 1.0 / nf
231}
232
233/// One xmin probe for the continuous search: returns `(alpha, D)`.
234fn eval_continuous_probe(sorted: &[f64], start: usize) -> IgraphResult<(f64, f64)> {
235    let cut = &sorted[start..];
236    let xmin = sorted[start];
237    let alpha = estimate_alpha_continuous(cut, xmin)?;
238    let d = ks_continuous(cut, alpha, xmin);
239    Ok((alpha, d))
240}
241
242/// Deterministic linear scan over candidate start indices; the last probe
243/// is excluded (matches `i < num_probes - 1` upstream). Returns the best
244/// `(start_index, alpha, xmin, D)`.
245fn linear_scan_continuous(
246    sorted: &[f64],
247    probe_starts: &[usize],
248) -> IgraphResult<Option<(usize, f64, f64)>> {
249    let mut best: Option<(usize, f64, f64)> = None;
250    let mut best_d = f64::MAX;
251    if probe_starts.len() < 2 {
252        return Ok(None);
253    }
254    for &start in &probe_starts[..probe_starts.len() - 1] {
255        let (alpha, d) = eval_continuous_probe(sorted, start)?;
256        if d < best_d {
257            best_d = d;
258            best = Some((start, alpha, d));
259        }
260    }
261    Ok(best)
262}
263
264/// Indices in `sorted` where each distinct value first appears.
265fn unique_starts(sorted: &[f64]) -> Vec<usize> {
266    let mut starts = Vec::new();
267    let mut prev = f64::NAN;
268    for (i, &x) in sorted.iter().enumerate() {
269        if i == 0 || x != prev {
270            starts.push(i);
271            prev = x;
272        }
273    }
274    starts
275}
276
277fn fit_continuous_search(sorted: &[f64]) -> IgraphResult<PowerLawFitResult> {
278    let n = sorted.len();
279    let uniques = unique_starts(sorted);
280    let num_uniques = uniques.len();
281
282    // Stratified sampling (PLFIT_STRATIFIED_SAMPLING), deterministic.
283    let mut best: Option<(usize, f64, f64)> = None;
284    if num_uniques >= 50 {
285        let subdivision = 10usize;
286        let num_strata = num_uniques / subdivision;
287        let strata: Vec<usize> = (0..num_strata).map(|i| uniques[i * subdivision]).collect();
288        if let Some((bstart, balpha, bd)) = linear_scan_continuous(sorted, &strata)? {
289            // Locate the winning stratum, then scan the window around it.
290            let si = (0..num_strata)
291                .find(|&i| sorted[strata[i]] == sorted[bstart])
292                .unwrap_or(0);
293            let lo = if si > 0 { (si - 1) * subdivision } else { 0 };
294            let mut count = 0usize;
295            if si != 0 {
296                count += subdivision;
297            }
298            if si != num_strata - 1 {
299                count += subdivision;
300            }
301            if count > 0 {
302                let hi = (lo + count).min(num_uniques);
303                let window = &uniques[lo..hi];
304                best = linear_scan_continuous(sorted, window)?;
305            }
306            if best.is_none() {
307                best = Some((bstart, balpha, bd));
308            }
309        }
310    }
311
312    // Fallback: full linear scan over all unique values.
313    if best.is_none() {
314        best = linear_scan_continuous(sorted, &uniques)?;
315    }
316
317    let (start, alpha, d) = best.ok_or_else(|| {
318        IgraphError::InvalidArgument("power_law_fit: could not fit continuous power-law".into())
319    })?;
320    let xmin = sorted[start];
321    let best_n = n - start;
322    let cut = &sorted[start..];
323
324    let final_alpha = if n < 50 {
325        finite_size_correction(alpha, best_n)
326    } else {
327        alpha
328    };
329    let l = log_likelihood_continuous(cut, final_alpha, xmin);
330
331    Ok(PowerLawFitResult {
332        continuous: true,
333        alpha: final_alpha,
334        xmin,
335        log_likelihood: l,
336        ks_statistic: d,
337    })
338}
339
340fn fit_continuous_fixed(sorted: &[f64], xmin: f64) -> IgraphResult<PowerLawFitResult> {
341    if xmin <= 0.0 {
342        return Err(IgraphError::InvalidArgument(
343            "power_law_fit: xmin must be greater than zero".into(),
344        ));
345    }
346    let start = sorted.partition_point(|&x| x < xmin);
347    let cut = &sorted[start..];
348    let alpha = estimate_alpha_continuous(cut, xmin)?;
349    let d = ks_continuous(cut, alpha, xmin);
350    let total_n = sorted.len();
351    let final_alpha = if total_n < 50 {
352        finite_size_correction(alpha, cut.len())
353    } else {
354        alpha
355    };
356    let l = log_likelihood_continuous(cut, final_alpha, xmin);
357    Ok(PowerLawFitResult {
358        continuous: true,
359        alpha: final_alpha,
360        xmin,
361        log_likelihood: l,
362        ks_statistic: d,
363    })
364}
365
366// ---------------------------------------------------------------------------
367// Discrete case.
368// ---------------------------------------------------------------------------
369
370/// Maximise the discrete log-likelihood over `alpha > 1` by golden-section
371/// search on the strictly concave objective. Replaces upstream L-BFGS.
372fn estimate_alpha_discrete(cut: &[f64], xmin: f64) -> IgraphResult<f64> {
373    if cut.is_empty() {
374        return Err(IgraphError::InvalidArgument(
375            "power_law_fit: no data point was larger than xmin".into(),
376        ));
377    }
378    let logsum: f64 = cut.iter().map(|&x| x.ln()).sum();
379    let m = cut.len() as f64;
380
381    // Negative log-likelihood to minimise.
382    let nll = |alpha: f64| -> IgraphResult<f64> { Ok(alpha * logsum + m * lnhzeta(alpha, xmin)?) };
383
384    let inv_phi = (5.0_f64.sqrt() - 1.0) / 2.0; // 1/φ
385    let mut a = 1.0 + 1e-8;
386    let mut b = 64.0;
387    let mut c = b - inv_phi * (b - a);
388    let mut d = a + inv_phi * (b - a);
389    let mut fc = nll(c)?;
390    let mut fd = nll(d)?;
391
392    for _ in 0..200 {
393        if (b - a).abs() < 1e-11 {
394            break;
395        }
396        if fc < fd {
397            b = d;
398            d = c;
399            fd = fc;
400            c = b - inv_phi * (b - a);
401            fc = nll(c)?;
402        } else {
403            a = c;
404            c = d;
405            fc = fd;
406            d = a + inv_phi * (b - a);
407            fd = nll(d)?;
408        }
409    }
410    Ok(f64::midpoint(a, b))
411}
412
413/// KS statistic for the discrete fit on a sorted cut slice.
414fn ks_discrete(cut: &[f64], alpha: f64, xmin: f64) -> IgraphResult<f64> {
415    let n = cut.len() as f64;
416    let lnz_xmin = lnhzeta(alpha, xmin)?;
417    let mut d_max = 0.0_f64;
418    let mut i = 0usize;
419    let mut m = 0usize;
420    while i < cut.len() {
421        let x = cut[i];
422        let d = ((lnhzeta(alpha, x)? - lnz_xmin).exp_m1() + (m as f64) / n).abs();
423        if d > d_max {
424            d_max = d;
425        }
426        while i < cut.len() && cut[i] == x {
427            i += 1;
428            m += 1;
429        }
430    }
431    Ok(d_max)
432}
433
434/// Discrete log-likelihood on a sorted cut slice.
435fn log_likelihood_discrete(cut: &[f64], alpha: f64, xmin: f64) -> IgraphResult<f64> {
436    let logsum: f64 = cut.iter().map(|&x| x.ln()).sum();
437    let m = cut.len() as f64;
438    Ok(-alpha * logsum - m * lnhzeta(alpha, xmin)?)
439}
440
441fn fit_discrete_fixed(sorted: &[f64], xmin: f64) -> IgraphResult<PowerLawFitResult> {
442    if xmin < 1.0 {
443        return Err(IgraphError::InvalidArgument(
444            "power_law_fit: xmin must be at least 1".into(),
445        ));
446    }
447    let start = sorted.partition_point(|&x| x < xmin);
448    let cut = &sorted[start..];
449    let alpha = estimate_alpha_discrete(cut, xmin)?;
450    let d = ks_discrete(cut, alpha, xmin)?;
451    let total_n = sorted.len();
452    let final_alpha = if total_n < 50 {
453        finite_size_correction(alpha, cut.len())
454    } else {
455        alpha
456    };
457    let l = log_likelihood_discrete(cut, final_alpha, xmin)?;
458    Ok(PowerLawFitResult {
459        continuous: false,
460        alpha: final_alpha,
461        xmin,
462        log_likelihood: l,
463        ks_statistic: d,
464    })
465}
466
467fn fit_discrete_search(sorted: &[f64]) -> IgraphResult<PowerLawFitResult> {
468    let n = sorted.len();
469    // Skip values < 1 (invalid as discrete xmin candidates).
470    let first = sorted.partition_point(|&x| x < 1.0);
471    if first >= n {
472        return Err(IgraphError::InvalidArgument(
473            "power_law_fit: no data point was at least 1".into(),
474        ));
475    }
476
477    // Candidate xmin values: the distinct values from `first`, but the top
478    // two distinct values are excluded so at least three remain (mirrors
479    // the `end_xmin` trimming in plfit_discrete).
480    let uniques = unique_starts(&sorted[first..]);
481    // Map back to absolute indices.
482    let abs_uniques: Vec<usize> = uniques.iter().map(|&u| u + first).collect();
483    let num_candidates = abs_uniques.len().saturating_sub(2);
484
485    let mut best: Option<(usize, f64, f64)> = None;
486    let mut best_d = f64::MAX;
487    for &start in &abs_uniques[..num_candidates] {
488        let cut = &sorted[start..];
489        let xmin = sorted[start];
490        let alpha = estimate_alpha_discrete(cut, xmin)?;
491        let d = ks_discrete(cut, alpha, xmin)?;
492        if d < best_d {
493            best_d = d;
494            best = Some((start, alpha, d));
495        }
496    }
497
498    let (start, alpha, d) = best.ok_or_else(|| {
499        IgraphError::InvalidArgument("power_law_fit: could not fit discrete power-law".into())
500    })?;
501    let xmin = sorted[start];
502    let cut = &sorted[start..];
503    let final_alpha = if n < 50 {
504        finite_size_correction(alpha, cut.len())
505    } else {
506        alpha
507    };
508    let l = log_likelihood_discrete(cut, final_alpha, xmin)?;
509    Ok(PowerLawFitResult {
510        continuous: false,
511        alpha: final_alpha,
512        xmin,
513        log_likelihood: l,
514        ks_statistic: d,
515    })
516}
517
518// ---------------------------------------------------------------------------
519// Public entry point.
520// ---------------------------------------------------------------------------
521
522/// Fit a power-law distribution to a sample of numbers.
523///
524/// Counterpart of `igraph_power_law_fit()`. `P(X = x)` is assumed
525/// proportional to `x^(-alpha)` for `x >= xmin`.
526///
527/// - If `xmin >= 0`, that cutoff is fixed and only `alpha` is fitted.
528/// - If `xmin < 0`, the optimal cutoff is found by minimising the
529///   Kolmogorov–Smirnov statistic.
530/// - If `force_continuous` is `false` and every sample is an integer, a
531///   discrete model is fitted; otherwise a continuous one. Setting
532///   `force_continuous = true` always fits the continuous model.
533///
534/// The p-value is not computed (matching `igraph`'s default).
535///
536/// # Errors
537///
538/// - `InvalidArgument` if `data` is empty, if a fixed `xmin` is out of
539///   range (`<= 0` continuous, `< 1` discrete), or if no sample reaches
540///   `xmin`.
541///
542/// # Examples
543///
544/// ```
545/// use rust_igraph::power_law_fit;
546///
547/// // A continuous power-law tail; fit alpha with a fixed cutoff.
548/// let data = vec![1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0];
549/// let fit = power_law_fit(&data, 1.0, true).unwrap();
550/// assert!(fit.continuous);
551/// assert!(fit.alpha > 1.0);
552/// assert_eq!(fit.xmin, 1.0);
553/// ```
554pub fn power_law_fit(
555    data: &[f64],
556    xmin: f64,
557    force_continuous: bool,
558) -> IgraphResult<PowerLawFitResult> {
559    if data.is_empty() {
560        return Err(IgraphError::InvalidArgument(
561            "power_law_fit: no data points".into(),
562        ));
563    }
564
565    // Decide discrete vs continuous exactly as upstream.
566    let discrete = if force_continuous {
567        false
568    } else {
569        data.iter().all(|&x| x.trunc() == x)
570    };
571
572    let mut sorted = data.to_vec();
573    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
574
575    if discrete {
576        if xmin >= 0.0 {
577            fit_discrete_fixed(&sorted, xmin)
578        } else {
579            fit_discrete_search(&sorted)
580        }
581    } else if xmin >= 0.0 {
582        fit_continuous_fixed(&sorted, xmin)
583    } else {
584        fit_continuous_search(&sorted)
585    }
586}
587
588#[cfg(test)]
589mod tests {
590    use super::*;
591
592    const TOL: f64 = 1e-9;
593
594    #[test]
595    fn hzeta_matches_riemann_zeta() {
596        // ζ(s, 1) is the Riemann zeta. Known closed forms:
597        //   ζ(2) = π²/6, ζ(4) = π⁴/90.
598        let z2 = hzeta(2.0, 1.0).expect("zeta(2,1)");
599        let z4 = hzeta(4.0, 1.0).expect("zeta(4,1)");
600        assert!((z2 - std::f64::consts::PI.powi(2) / 6.0).abs() < 1e-12);
601        assert!((z4 - std::f64::consts::PI.powi(4) / 90.0).abs() < 1e-12);
602    }
603
604    #[test]
605    fn hzeta_hurwitz_shift_identity() {
606        // ζ(s, 2) = ζ(s, 1) - 1, since the q=2 series drops the k=0 term.
607        let s = 3.5;
608        let a = hzeta(s, 1.0).expect("zeta(s,1)");
609        let b = hzeta(s, 2.0).expect("zeta(s,2)");
610        assert!((a - b - 1.0).abs() < 1e-12);
611    }
612
613    #[test]
614    fn hzeta_rejects_bad_args() {
615        assert!(hzeta(1.0, 1.0).is_err());
616        assert!(hzeta(2.0, 0.0).is_err());
617    }
618
619    #[test]
620    fn continuous_closed_form_alpha() {
621        // With a fixed xmin and n >= 50 (no finite-size correction), alpha is
622        // the exact closed-form MLE 1 + n / Σ log(x_i / xmin).
623        let data: Vec<f64> = (0..60).map(|i| 1.0 + f64::from(i)).collect();
624        let xmin = 1.0;
625        let cut: Vec<f64> = data.iter().copied().filter(|&x| x >= xmin).collect();
626        let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
627        let expected = 1.0 + (cut.len() as f64) / logsum;
628        let fit = power_law_fit(&data, xmin, true).expect("fit");
629        assert!(fit.continuous);
630        assert!((fit.alpha - expected).abs() < TOL);
631        assert_eq!(fit.xmin, 1.0);
632    }
633
634    #[test]
635    fn finite_size_correction_applies_below_50() {
636        // n < 50 triggers the (n-1)/n alpha rescale on the cut count.
637        let data: Vec<f64> = (0..10).map(|i| 1.0 + f64::from(i)).collect();
638        let xmin = 1.0;
639        let cut: Vec<f64> = data.iter().copied().filter(|&x| x >= xmin).collect();
640        let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
641        let raw = 1.0 + (cut.len() as f64) / logsum;
642        let m = cut.len() as f64;
643        let corrected = raw * (m - 1.0) / m + 1.0 / m;
644        let fit = power_law_fit(&data, xmin, true).expect("fit");
645        assert!((fit.alpha - corrected).abs() < TOL);
646    }
647
648    #[test]
649    fn discrete_detected_for_integer_data() {
650        let data = vec![1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
651        let fit = power_law_fit(&data, 1.0, false).expect("fit");
652        assert!(!fit.continuous);
653        assert!(fit.alpha > 1.0);
654    }
655
656    #[test]
657    fn force_continuous_overrides_integer_detection() {
658        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
659        let fit = power_law_fit(&data, 1.0, true).expect("fit");
660        assert!(fit.continuous);
661    }
662
663    #[test]
664    fn non_integer_forces_continuous() {
665        let data = vec![1.5, 2.0, 3.0, 4.0, 5.5];
666        let fit = power_law_fit(&data, 1.0, false).expect("fit");
667        assert!(fit.continuous);
668    }
669
670    #[test]
671    fn empty_data_errors() {
672        assert!(power_law_fit(&[], -1.0, false).is_err());
673    }
674
675    #[test]
676    fn continuous_xmin_must_be_positive() {
677        let data = vec![1.5, 2.0, 3.0];
678        assert!(power_law_fit(&data, 0.0, true).is_err());
679    }
680
681    #[test]
682    fn discrete_xmin_must_be_at_least_one() {
683        let data = vec![1.0, 2.0, 3.0, 4.0];
684        assert!(power_law_fit(&data, 0.5, false).is_err());
685    }
686
687    #[test]
688    fn ks_statistic_in_unit_interval() {
689        let data: Vec<f64> = (1..=100).map(f64::from).collect();
690        let fit = power_law_fit(&data, -1.0, true).expect("fit");
691        assert!(fit.ks_statistic >= 0.0 && fit.ks_statistic <= 1.0);
692    }
693}