1#![allow(
26 clippy::cast_precision_loss,
28 clippy::float_cmp,
31 clippy::excessive_precision,
34 clippy::many_single_char_names,
37 clippy::needless_range_loop
38)]
39
40use crate::core::{IgraphError, IgraphResult};
41
42#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct PowerLawFitResult {
47 pub continuous: bool,
49 pub alpha: f64,
51 pub xmin: f64,
53 pub log_likelihood: f64,
55 pub ks_statistic: f64,
57}
58
59const EM_SHIFT: usize = 10;
64const EM_ORDER: usize = 32;
65const DBL_EPSILON: f64 = 2.220_446_049_250_313_1e-16;
66const LOG_DBL_MIN: f64 = -7.083_964_185_322_640_8e2;
67const LOG_DBL_MAX: f64 = 7.097_827_128_933_839_7e2;
68
69const EM_COEFFS: [f64; EM_ORDER + 1] = [
71 1.0,
72 1.0 / 12.0,
73 -1.0 / 720.0,
74 1.0 / 30240.0,
75 -1.0 / 1_209_600.0,
76 1.0 / 47_900_160.0,
77 -691.0 / 1_307_674_368_000.0,
78 1.0 / 74_724_249_600.0,
79 -3.389_680_296_322_582_8e-13,
80 8.586_062_056_277_844_5e-15,
81 -2.174_868_698_558_061_9e-16,
82 5.509_002_828_360_230e-18,
83 -1.395_446_468_581_252_3e-19,
84 3.534_707_039_629_467_5e-21,
85 -8.953_517_427_037_547e-23,
86 2.267_952_452_337_683e-24,
87 -5.744_790_668_872_202_5e-26,
88 1.455_172_475_614_865e-27,
89 -3.685_994_940_665_31e-29,
90 9.336_734_257_095_045e-31,
91 -2.365_022_415_700_63e-32,
92 5.990_671_762_482_134e-34,
93 -1.517_454_884_468_290_3e-35,
94 3.843_758_125_454_188e-37,
95 -9.736_353_072_646_691e-39,
96 2.466_247_044_200_681e-40,
97 -6.247_076_741_820_744e-42,
98 1.582_403_024_464_491_4e-43,
99 -4.008_273_685_948_936e-45,
100 1.015_307_585_556_955_6e-46,
101 -2.571_804_158_241_871_7e-48,
102 6.514_456_035_233_815e-50,
103 -1.650_130_990_689_652_4e-51,
104];
105
106fn hzeta(s: f64, q: f64) -> IgraphResult<f64> {
108 if s <= 1.0 || q <= 0.0 {
109 return Err(IgraphError::InvalidArgument(
110 "power_law_fit: Hurwitz zeta requires s > 1 and q > 0".into(),
111 ));
112 }
113
114 let ln_term0 = -s * q.ln();
115 if ln_term0 < LOG_DBL_MIN + 1.0 {
116 return Err(IgraphError::InvalidArgument(
117 "power_law_fit: underflow while evaluating Hurwitz zeta".into(),
118 ));
119 }
120 if ln_term0 > LOG_DBL_MAX - 1.0 {
121 return Err(IgraphError::InvalidArgument(
122 "power_law_fit: overflow while evaluating Hurwitz zeta".into(),
123 ));
124 }
125
126 let max_bits = 54.0;
127 if ((max_bits < s) && (q < 1.0)) || ((0.5 * max_bits < s) && (q < 0.25)) {
130 return Ok(q.powf(-s));
131 }
132 if (0.5 * max_bits < s) && (q < 1.0) {
133 let a0 = q.powf(-s);
134 let p1 = (q / (1.0 + q)).powf(s);
135 let p2 = (q / (2.0 + q)).powf(s);
136 return Ok(a0 * (1.0 + p1 + p2));
137 }
138
139 let qshift = EM_SHIFT as f64 + q;
142 let inv_qshift = 1.0 / qshift;
143 let sqr_inv_qshift = inv_qshift * inv_qshift;
144 let inv_sm1 = 1.0 / (s - 1.0);
145 let pmax = qshift.powf(-s);
146
147 let mut terms: Vec<f64> = Vec::with_capacity(EM_SHIFT + EM_ORDER + 2);
148
149 for j in 0..EM_SHIFT {
150 terms.push((j as f64 + q).powf(-s));
151 }
152 terms.push(0.5 * pmax);
153 terms.push(pmax * qshift * inv_sm1);
154
155 let mut tscp = s;
156 let mut scp = tscp;
157 let mut pcp = pmax * inv_qshift;
158 let mut ratio = scp * pcp;
159 let mut last_j = EM_ORDER;
160 let mut ans: f64 = terms.iter().sum();
161
162 for j in 1..=EM_ORDER {
163 let delta = EM_COEFFS[j] * ratio;
164 terms.push(delta);
165 ans += delta;
166 tscp += 1.0;
167 scp *= tscp;
168 tscp += 1.0;
169 scp *= tscp;
170 pcp *= sqr_inv_qshift;
171 ratio = scp * pcp;
172 if (delta / ans).abs() < 0.5 * DBL_EPSILON {
173 last_j = j;
174 break;
175 }
176 }
177 let _ = last_j;
178
179 let mut acc = 0.0;
180 for &t in terms.iter().rev() {
181 acc += t;
182 }
183 Ok(acc)
184}
185
186fn lnhzeta(s: f64, q: f64) -> IgraphResult<f64> {
189 Ok(hzeta(s, q)?.ln())
190}
191
192fn estimate_alpha_continuous(cut: &[f64], xmin: f64) -> IgraphResult<f64> {
198 if cut.is_empty() {
199 return Err(IgraphError::InvalidArgument(
200 "power_law_fit: no data point was larger than xmin".into(),
201 ));
202 }
203 let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
204 Ok(1.0 + (cut.len() as f64) / logsum)
205}
206
207fn ks_continuous(cut: &[f64], alpha: f64, xmin: f64) -> f64 {
209 let n = cut.len() as f64;
210 let mut d_max = 0.0_f64;
211 for (m, &x) in cut.iter().enumerate() {
212 let d = (1.0 - (xmin / x).powf(alpha - 1.0) - (m as f64) / n).abs();
213 if d > d_max {
214 d_max = d;
215 }
216 }
217 d_max
218}
219
220fn log_likelihood_continuous(cut: &[f64], alpha: f64, xmin: f64) -> f64 {
222 let m = cut.len() as f64;
223 let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
224 let c = (alpha - 1.0) / xmin;
225 -alpha * logsum + c.ln() * m
226}
227
228fn finite_size_correction(alpha: f64, n: usize) -> f64 {
229 let nf = n as f64;
230 alpha * (nf - 1.0) / nf + 1.0 / nf
231}
232
233fn eval_continuous_probe(sorted: &[f64], start: usize) -> IgraphResult<(f64, f64)> {
235 let cut = &sorted[start..];
236 let xmin = sorted[start];
237 let alpha = estimate_alpha_continuous(cut, xmin)?;
238 let d = ks_continuous(cut, alpha, xmin);
239 Ok((alpha, d))
240}
241
242fn linear_scan_continuous(
246 sorted: &[f64],
247 probe_starts: &[usize],
248) -> IgraphResult<Option<(usize, f64, f64)>> {
249 let mut best: Option<(usize, f64, f64)> = None;
250 let mut best_d = f64::MAX;
251 if probe_starts.len() < 2 {
252 return Ok(None);
253 }
254 for &start in &probe_starts[..probe_starts.len() - 1] {
255 let (alpha, d) = eval_continuous_probe(sorted, start)?;
256 if d < best_d {
257 best_d = d;
258 best = Some((start, alpha, d));
259 }
260 }
261 Ok(best)
262}
263
264fn unique_starts(sorted: &[f64]) -> Vec<usize> {
266 let mut starts = Vec::new();
267 let mut prev = f64::NAN;
268 for (i, &x) in sorted.iter().enumerate() {
269 if i == 0 || x != prev {
270 starts.push(i);
271 prev = x;
272 }
273 }
274 starts
275}
276
277fn fit_continuous_search(sorted: &[f64]) -> IgraphResult<PowerLawFitResult> {
278 let n = sorted.len();
279 let uniques = unique_starts(sorted);
280 let num_uniques = uniques.len();
281
282 let mut best: Option<(usize, f64, f64)> = None;
284 if num_uniques >= 50 {
285 let subdivision = 10usize;
286 let num_strata = num_uniques / subdivision;
287 let strata: Vec<usize> = (0..num_strata).map(|i| uniques[i * subdivision]).collect();
288 if let Some((bstart, balpha, bd)) = linear_scan_continuous(sorted, &strata)? {
289 let si = (0..num_strata)
291 .find(|&i| sorted[strata[i]] == sorted[bstart])
292 .unwrap_or(0);
293 let lo = if si > 0 { (si - 1) * subdivision } else { 0 };
294 let mut count = 0usize;
295 if si != 0 {
296 count += subdivision;
297 }
298 if si != num_strata - 1 {
299 count += subdivision;
300 }
301 if count > 0 {
302 let hi = (lo + count).min(num_uniques);
303 let window = &uniques[lo..hi];
304 best = linear_scan_continuous(sorted, window)?;
305 }
306 if best.is_none() {
307 best = Some((bstart, balpha, bd));
308 }
309 }
310 }
311
312 if best.is_none() {
314 best = linear_scan_continuous(sorted, &uniques)?;
315 }
316
317 let (start, alpha, d) = best.ok_or_else(|| {
318 IgraphError::InvalidArgument("power_law_fit: could not fit continuous power-law".into())
319 })?;
320 let xmin = sorted[start];
321 let best_n = n - start;
322 let cut = &sorted[start..];
323
324 let final_alpha = if n < 50 {
325 finite_size_correction(alpha, best_n)
326 } else {
327 alpha
328 };
329 let l = log_likelihood_continuous(cut, final_alpha, xmin);
330
331 Ok(PowerLawFitResult {
332 continuous: true,
333 alpha: final_alpha,
334 xmin,
335 log_likelihood: l,
336 ks_statistic: d,
337 })
338}
339
340fn fit_continuous_fixed(sorted: &[f64], xmin: f64) -> IgraphResult<PowerLawFitResult> {
341 if xmin <= 0.0 {
342 return Err(IgraphError::InvalidArgument(
343 "power_law_fit: xmin must be greater than zero".into(),
344 ));
345 }
346 let start = sorted.partition_point(|&x| x < xmin);
347 let cut = &sorted[start..];
348 let alpha = estimate_alpha_continuous(cut, xmin)?;
349 let d = ks_continuous(cut, alpha, xmin);
350 let total_n = sorted.len();
351 let final_alpha = if total_n < 50 {
352 finite_size_correction(alpha, cut.len())
353 } else {
354 alpha
355 };
356 let l = log_likelihood_continuous(cut, final_alpha, xmin);
357 Ok(PowerLawFitResult {
358 continuous: true,
359 alpha: final_alpha,
360 xmin,
361 log_likelihood: l,
362 ks_statistic: d,
363 })
364}
365
366fn estimate_alpha_discrete(cut: &[f64], xmin: f64) -> IgraphResult<f64> {
373 if cut.is_empty() {
374 return Err(IgraphError::InvalidArgument(
375 "power_law_fit: no data point was larger than xmin".into(),
376 ));
377 }
378 let logsum: f64 = cut.iter().map(|&x| x.ln()).sum();
379 let m = cut.len() as f64;
380
381 let nll = |alpha: f64| -> IgraphResult<f64> { Ok(alpha * logsum + m * lnhzeta(alpha, xmin)?) };
383
384 let inv_phi = (5.0_f64.sqrt() - 1.0) / 2.0; let mut a = 1.0 + 1e-8;
386 let mut b = 64.0;
387 let mut c = b - inv_phi * (b - a);
388 let mut d = a + inv_phi * (b - a);
389 let mut fc = nll(c)?;
390 let mut fd = nll(d)?;
391
392 for _ in 0..200 {
393 if (b - a).abs() < 1e-11 {
394 break;
395 }
396 if fc < fd {
397 b = d;
398 d = c;
399 fd = fc;
400 c = b - inv_phi * (b - a);
401 fc = nll(c)?;
402 } else {
403 a = c;
404 c = d;
405 fc = fd;
406 d = a + inv_phi * (b - a);
407 fd = nll(d)?;
408 }
409 }
410 Ok(f64::midpoint(a, b))
411}
412
413fn ks_discrete(cut: &[f64], alpha: f64, xmin: f64) -> IgraphResult<f64> {
415 let n = cut.len() as f64;
416 let lnz_xmin = lnhzeta(alpha, xmin)?;
417 let mut d_max = 0.0_f64;
418 let mut i = 0usize;
419 let mut m = 0usize;
420 while i < cut.len() {
421 let x = cut[i];
422 let d = ((lnhzeta(alpha, x)? - lnz_xmin).exp_m1() + (m as f64) / n).abs();
423 if d > d_max {
424 d_max = d;
425 }
426 while i < cut.len() && cut[i] == x {
427 i += 1;
428 m += 1;
429 }
430 }
431 Ok(d_max)
432}
433
434fn log_likelihood_discrete(cut: &[f64], alpha: f64, xmin: f64) -> IgraphResult<f64> {
436 let logsum: f64 = cut.iter().map(|&x| x.ln()).sum();
437 let m = cut.len() as f64;
438 Ok(-alpha * logsum - m * lnhzeta(alpha, xmin)?)
439}
440
441fn fit_discrete_fixed(sorted: &[f64], xmin: f64) -> IgraphResult<PowerLawFitResult> {
442 if xmin < 1.0 {
443 return Err(IgraphError::InvalidArgument(
444 "power_law_fit: xmin must be at least 1".into(),
445 ));
446 }
447 let start = sorted.partition_point(|&x| x < xmin);
448 let cut = &sorted[start..];
449 let alpha = estimate_alpha_discrete(cut, xmin)?;
450 let d = ks_discrete(cut, alpha, xmin)?;
451 let total_n = sorted.len();
452 let final_alpha = if total_n < 50 {
453 finite_size_correction(alpha, cut.len())
454 } else {
455 alpha
456 };
457 let l = log_likelihood_discrete(cut, final_alpha, xmin)?;
458 Ok(PowerLawFitResult {
459 continuous: false,
460 alpha: final_alpha,
461 xmin,
462 log_likelihood: l,
463 ks_statistic: d,
464 })
465}
466
467fn fit_discrete_search(sorted: &[f64]) -> IgraphResult<PowerLawFitResult> {
468 let n = sorted.len();
469 let first = sorted.partition_point(|&x| x < 1.0);
471 if first >= n {
472 return Err(IgraphError::InvalidArgument(
473 "power_law_fit: no data point was at least 1".into(),
474 ));
475 }
476
477 let uniques = unique_starts(&sorted[first..]);
481 let abs_uniques: Vec<usize> = uniques.iter().map(|&u| u + first).collect();
483 let num_candidates = abs_uniques.len().saturating_sub(2);
484
485 let mut best: Option<(usize, f64, f64)> = None;
486 let mut best_d = f64::MAX;
487 for &start in &abs_uniques[..num_candidates] {
488 let cut = &sorted[start..];
489 let xmin = sorted[start];
490 let alpha = estimate_alpha_discrete(cut, xmin)?;
491 let d = ks_discrete(cut, alpha, xmin)?;
492 if d < best_d {
493 best_d = d;
494 best = Some((start, alpha, d));
495 }
496 }
497
498 let (start, alpha, d) = best.ok_or_else(|| {
499 IgraphError::InvalidArgument("power_law_fit: could not fit discrete power-law".into())
500 })?;
501 let xmin = sorted[start];
502 let cut = &sorted[start..];
503 let final_alpha = if n < 50 {
504 finite_size_correction(alpha, cut.len())
505 } else {
506 alpha
507 };
508 let l = log_likelihood_discrete(cut, final_alpha, xmin)?;
509 Ok(PowerLawFitResult {
510 continuous: false,
511 alpha: final_alpha,
512 xmin,
513 log_likelihood: l,
514 ks_statistic: d,
515 })
516}
517
518pub fn power_law_fit(
555 data: &[f64],
556 xmin: f64,
557 force_continuous: bool,
558) -> IgraphResult<PowerLawFitResult> {
559 if data.is_empty() {
560 return Err(IgraphError::InvalidArgument(
561 "power_law_fit: no data points".into(),
562 ));
563 }
564
565 let discrete = if force_continuous {
567 false
568 } else {
569 data.iter().all(|&x| x.trunc() == x)
570 };
571
572 let mut sorted = data.to_vec();
573 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
574
575 if discrete {
576 if xmin >= 0.0 {
577 fit_discrete_fixed(&sorted, xmin)
578 } else {
579 fit_discrete_search(&sorted)
580 }
581 } else if xmin >= 0.0 {
582 fit_continuous_fixed(&sorted, xmin)
583 } else {
584 fit_continuous_search(&sorted)
585 }
586}
587
588#[cfg(test)]
589mod tests {
590 use super::*;
591
592 const TOL: f64 = 1e-9;
593
594 #[test]
595 fn hzeta_matches_riemann_zeta() {
596 let z2 = hzeta(2.0, 1.0).expect("zeta(2,1)");
599 let z4 = hzeta(4.0, 1.0).expect("zeta(4,1)");
600 assert!((z2 - std::f64::consts::PI.powi(2) / 6.0).abs() < 1e-12);
601 assert!((z4 - std::f64::consts::PI.powi(4) / 90.0).abs() < 1e-12);
602 }
603
604 #[test]
605 fn hzeta_hurwitz_shift_identity() {
606 let s = 3.5;
608 let a = hzeta(s, 1.0).expect("zeta(s,1)");
609 let b = hzeta(s, 2.0).expect("zeta(s,2)");
610 assert!((a - b - 1.0).abs() < 1e-12);
611 }
612
613 #[test]
614 fn hzeta_rejects_bad_args() {
615 assert!(hzeta(1.0, 1.0).is_err());
616 assert!(hzeta(2.0, 0.0).is_err());
617 }
618
619 #[test]
620 fn continuous_closed_form_alpha() {
621 let data: Vec<f64> = (0..60).map(|i| 1.0 + f64::from(i)).collect();
624 let xmin = 1.0;
625 let cut: Vec<f64> = data.iter().copied().filter(|&x| x >= xmin).collect();
626 let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
627 let expected = 1.0 + (cut.len() as f64) / logsum;
628 let fit = power_law_fit(&data, xmin, true).expect("fit");
629 assert!(fit.continuous);
630 assert!((fit.alpha - expected).abs() < TOL);
631 assert_eq!(fit.xmin, 1.0);
632 }
633
634 #[test]
635 fn finite_size_correction_applies_below_50() {
636 let data: Vec<f64> = (0..10).map(|i| 1.0 + f64::from(i)).collect();
638 let xmin = 1.0;
639 let cut: Vec<f64> = data.iter().copied().filter(|&x| x >= xmin).collect();
640 let logsum: f64 = cut.iter().map(|&x| (x / xmin).ln()).sum();
641 let raw = 1.0 + (cut.len() as f64) / logsum;
642 let m = cut.len() as f64;
643 let corrected = raw * (m - 1.0) / m + 1.0 / m;
644 let fit = power_law_fit(&data, xmin, true).expect("fit");
645 assert!((fit.alpha - corrected).abs() < TOL);
646 }
647
648 #[test]
649 fn discrete_detected_for_integer_data() {
650 let data = vec![1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
651 let fit = power_law_fit(&data, 1.0, false).expect("fit");
652 assert!(!fit.continuous);
653 assert!(fit.alpha > 1.0);
654 }
655
656 #[test]
657 fn force_continuous_overrides_integer_detection() {
658 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
659 let fit = power_law_fit(&data, 1.0, true).expect("fit");
660 assert!(fit.continuous);
661 }
662
663 #[test]
664 fn non_integer_forces_continuous() {
665 let data = vec![1.5, 2.0, 3.0, 4.0, 5.5];
666 let fit = power_law_fit(&data, 1.0, false).expect("fit");
667 assert!(fit.continuous);
668 }
669
670 #[test]
671 fn empty_data_errors() {
672 assert!(power_law_fit(&[], -1.0, false).is_err());
673 }
674
675 #[test]
676 fn continuous_xmin_must_be_positive() {
677 let data = vec![1.5, 2.0, 3.0];
678 assert!(power_law_fit(&data, 0.0, true).is_err());
679 }
680
681 #[test]
682 fn discrete_xmin_must_be_at_least_one() {
683 let data = vec![1.0, 2.0, 3.0, 4.0];
684 assert!(power_law_fit(&data, 0.5, false).is_err());
685 }
686
687 #[test]
688 fn ks_statistic_in_unit_interval() {
689 let data: Vec<f64> = (1..=100).map(f64::from).collect();
690 let fit = power_law_fit(&data, -1.0, true).expect("fit");
691 assert!(fit.ks_statistic >= 0.0 && fit.ks_statistic <= 1.0);
692 }
693}