桌面平台CPU性能测试(单核)裸机程序:C++计算圆周率500位
本帖最后由 星空天神 于 2024-12-14 17:34 编辑看前面有人用js和rust实现,试了试c++. 需要boost库和MPIR c++17
#include <boost/multiprecision/gmp.hpp>
#include <boost/math/special_functions/gamma.hpp>
#include <iostream>
#include <string>
#include <string_view>
#include <chrono>
using namespace boost::multiprecision;
using namespace std::chrono;
void pi500()
{
const auto start = steady_clock::now();
mpf_float::default_precision(1000);
size_t bit = 500;
mpf_float polygon{ 6 }, r{ 1 }, M{1};
auto i = 0;
while(true)
{
mpf_float G = sqrt(pow(r, 2) - pow(M / 2, 2));
mpf_float j = r - G;
mpf_float m = sqrt(pow(M / 2, 2) + pow(j, 2));
polygon *= 2;
mpf_float perimeter = m * polygon;
mpf_float pi = perimeter / 2;
M = m;
i++;
auto piStr = pi.str();
std::string_view piStrV{ piStr.c_str(),std::min(bit + 2,piStr.size())};
constexpr const char* PI{ "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989" };
if (piStrV == std::string_view(PI, bit + 2))
break;
}
const auto end = steady_clock::now();
duration<double, std::milli> diff = end - start;
std::cout <<"step " << i << '\n';
std::cout << "cost " << diff.count() <<"ms"<< '\n';
}
int main()
{
pi500();
std::string str;
std::getline(std::cin, str);
return 0;
}
锐龙5900x
step 830
cost 26.8446ms
M3 max开O3优化是7.75 ms
论坛助手,iPhone 另外老哥你应该循环个三五百次取平均,不然只跑一次十几二十毫秒调度器都反应不过来测不准的
c++: 什么叫真正的高性能语言啊(战术后仰
论坛助手,iPhone 9800X3D PBO+200 金核银核-15,其余-25
step 830
cost 19.9704ms 本帖最后由 Xerxes_2 于 2024-12-15 11:55 编辑
目前大数字运算最快的还是 MPFR 吧,用 Rust 调 MPFR 再优化算法的话可以做到几十 us
一样的暴力算法
use rug::{
Float, Integer,
float::Round,
ops::{AddAssignRound, DivAssignRound},
};
fn main() {
println!("开始计算, 100次……");
let mut res = Vec::with_capacity(100);
const BIT: usize = 500;
for _ in 0..100 {
let start = std::time::Instant::now();
let mut polygon = Integer::from(6);
const R: &Integer = Integer::ONE;
let mut big_m = Float::with_val(1669, 1);
loop {
big_m.square_mut();
big_m.div_assign_round(4, Round::Nearest);
let big_g = (R - big_m.to_owned()).sqrt();
let j = R - big_g;
big_m.add_assign_round(j.square(), Round::Nearest);
big_m.sqrt_mut();
polygon *= 2;
let perimeter = big_m.to_owned() * &polygon;
let mut pi = perimeter;
pi.div_assign_round(2, Round::Nearest);
let pi_string = pi.to_string();
const PI: &str = "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989";
if let Some(slice) = pi_string.get(0..BIT + 2) {
if slice == &PI {
res.push(start.elapsed());
break;
}
}
}
}
let sum = res.iter().sum::<std::time::Duration>();
let min = res.iter().min().unwrap();
let max = res.iter().max().unwrap();
let avg = sum / res.len() as u32;
println!("平均耗时: {:?}", avg);
println!("最短耗时: {:?}", min);
println!("最长耗时: {:?}", max);
let variance = res
.iter()
.map(|x| {
let x = x.as_secs_f64() * 1_000.0;
(x - avg.as_secs_f64() * 1_000.0).powi(2)
})
.sum::<f64>()
/ res.len() as f64;
println!("标准差: {}ms", variance.sqrt());
println!("按Enter退出……");
let _ = std::io::stdin().read_line(&mut String::new());
}
开始计算, 100次……
平均耗时: 3.645485ms
最短耗时: 3.428667ms
最长耗时: 5.163209ms
标准差: 0.30984631479181735ms
按Enter退出……
—— 来自 Google Pixel 9 Pro, Android 15, 鹅球 v3.3.95-debug
最近都把泥潭当成leetcode了?怎么全在算圆周率
本帖最后由 星空天神 于 2024-12-15 16:00 编辑
lshzh.hi 发表于 2024-12-15 06:17
另外老哥你应该循环个三五百次取平均,不然只跑一次十几二十毫秒调度器都反应不过来测不准的
c++: 什么叫 ...
改了改,跑100次. 加了使用mpfr的优化逻辑.不过win上mpfr只有个很老的13年前的库,跑下来平均7.7ms懒得去编译最新版了.
#include <boost/multiprecision/gmp.hpp>
#include <boost/multiprecision/mpfr.hpp>
#include <boost/math/special_functions/gamma.hpp>
#include <iostream>
#include <string>
#include <string_view>
#include <chrono>
using namespace boost::multiprecision;
using namespace std::chrono;
double pi500_v1()
{
const auto start = steady_clock::now();
mpfr_float::default_precision(1000);
size_t bit = 500;
mpfr_floatpolygon{ 6 }, r{ 1 }, M{ 1 };
auto i = 0;
while(true)
{
mpfr_floatG = sqrt(pow(r, 2) - pow(M / 2, 2));
mpfr_floatj = r - G;
mpfr_floatm = sqrt(pow(M / 2, 2) + pow(j, 2));
polygon *= 2;
mpfr_floatperimeter = m * polygon;
mpfr_floatpi = perimeter / 2;
M = m;
i++;
auto piStr = pi.str();
std::string_view piStrV{ piStr.c_str(),std::min(bit + 2,piStr.size())};
constexpr const char* PI{ "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989" };
if (piStrV == std::string_view(PI, bit + 2))
break;
}
const auto end = steady_clock::now();
duration<double, std::milli> diff = end - start;
return diff.count();
}
double pi500_v2()
{
const auto start = steady_clock::now();
mpfr_float::default_precision(1000);
size_t bit = 500;
size_t i{ 0 };
mpfr_float len{ 1 };
mpfr_float a{ 1 };
while (true)
{
len = 2 - sqrt(4 - len);
a *= 2;
mpfr_float pi = sqrt(len) * a * 3;
i++;
auto piStr = pi.str(bit + 2);
std::string_view piStrV{ piStr.c_str(),std::min(bit + 2,piStr.size()) };
constexpr const char* PI{ "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989" };
if (piStrV == std::string_view(PI, bit + 2))
break;
}
const auto end = steady_clock::now();
duration<double, std::milli> diff = end - start;
return diff.count();
}
#include <vector>
#include <numeric>
#include <algorithm>
constexpr uint32_t loop{ 100 };
int main()
{
std::vector<double> results;
results.resize(loop);
for(uint32_t i{0};i < loop;i++)
results = pi500_v2();
std::cout << "max cost " << *std::max_element(results.begin(), results.end()) << "ms" << '\n';
std::cout << "min cost " << *std::min_element(results.begin(), results.end()) << "ms" << '\n';
std::cout << "avg cost " << std::reduce(results.begin(), results.end()) / loop << "ms" << '\n';
std::string str;
std::getline(std::cin, str);
return 0;
} Jet.Black 发表于 2024-12-15 11:47
最近都把泥潭当成leetcode了?怎么全在算圆周率
讲道理,这种帖子在你谭还算高质量有信息量的帖子。 本帖最后由 phorcys02 于 2024-12-15 15:49 编辑
第一版
step 830
cost 19.6639ms//-Ofast
cost 19.8149ms//-O2
cost 20.0936ms//-O3
cost 24.1712ms// 无优化
第二版
//无优化
max cost 19.009ms
min cost 18.2873ms
avg cost 18.3688ms
//-O2
max cost 16.579ms
min cost 15.5659ms
avg cost 15.7237ms
//-O3
max cost 15.6974ms
min cost 15.5868ms
avg cost 15.6042ms
//-Ofast
max cost 15.6873ms
min cost 15.569ms
avg cost 15.5864ms
龙芯3C6000
c/c++ 运行太快,误差其实有点大,大头可能在其他地方咯...
phorcys02 发表于 2024-12-15 15:46
第一版
step 830
cost 19.6639ms//-Ofast
大头其实是在字符串生成和比较上 星空天神 发表于 2024-12-15 15:55
大头其实是在字符串生成和比较上
msys2上的 libgmp 应该都带了吧
https://packages.msys2.org/packages/mingw-w64-x86_64-gmp
Xerxes_2 发表于 2024-12-15 15:58
msys2上的 libgmp 应该都带了吧
https://packages.msys2.org/packages/mingw-w64-x86_64-gmp
不想整了,那位用m3 max的老哥试试吧,估计他能压到3ms以内 星空天神 发表于 2024-12-15 16:03
不想整了,那位用m3 max的老哥试试吧,估计他能压到3ms以内
我不知道他怎么编译的反正我这里编不起来,C++的编译报错也根本看不懂 m4 max
无优化
max cost 5.14612ms
min cost 3.59454ms
avg cost 3.93417ms
phorcys02 发表于 2024-12-15 15:46
第一版
step 830
cost 19.6639ms//-Ofast
龙芯3A4000
无优化
max cost 25.6228ms
min cost 25.2168ms
avg cost 25.3106ms
-O2
max cost 20.1907ms
min cost 19.8703ms
avg cost 19.9706ms
-O3
max cost 20.2468ms
min cost 19.8624ms
avg cost 19.962ms
-Ofast
max cost 20.226ms
min cost 19.904ms
avg cost 19.9884ms 又找了找圆周率算法发现了很多专业选手
专业选手1 梅钦公式
#include <iostream>
#include <string>
#include <chrono>
using namespace std::chrono;
constexpr const int32_t bit{500};
double pi500_pro_v1()
{
const auto start = steady_clock::now();
int32_t a = {956, 80}, b = {57121, 25}, i = 0, j, k, p, q, r, s = 2, t, u, v, N{bit}, M = 10000;
N = N / 4 + 3;
int32_t *pi = new int32_t, *e = new int32_t;
while (i < N)
pi = 0;
while (--s + 1)
{
for (*e = a, i = N; --i;)
e = 0;
for (q = 1; j = i - 1, i < N; e ? 0 : ++i, q += 2, k = !k)
for (r = v = 0; ++j < N; pi += k ? u : -u)
u = (t = v * M + (e = (p = r * M + e) / b)) / q, r = p % b, v = t % q;
}
while (--i)
(pi = (t = pi + s) % M) < 0 ? pi += M, s = t / M - 1 : s = t / M;
// for (std::cout << "3."; ++i < N - 2;)
// printf("%04id", pi);
delete[] pi, delete[] e;
const auto end = steady_clock::now();
duration<double, std::milli> diff = end - start;
return diff.count();
}
#include <vector>
#include <numeric>
#include <algorithm>
constexpr uint32_t loop{100};
int main()
{
std::vector<double> results;
results.resize(loop);
for (uint32_t i{0}; i < loop; i++)
results = pi500_pro_v1();
std::cout << "max cost " << *std::max_element(results.begin(), results.end()) << "ms" << '\n';
std::cout << "min cost " << *std::min_element(results.begin(), results.end()) << "ms" << '\n';
std::cout << "avg cost " << std::reduce(results.begin(), results.end()) / loop << "ms" << '\n';
std::string str;
std::getline(std::cin, str);
return 0;
}6800u
max cost 0.2177ms
min cost 0.1604ms
avg cost 0.169834ms
本帖最后由 0WHan0 于 2024-12-16 16:56 编辑
星空天神 发表于 2024-12-16 15:32
又找了找圆周率算法发现了很多专业选手
专业选手1 梅钦公式
3A4000
g++ 14.2.0 -Ofast
max cost 0.756162ms
min cost 0.702652ms
avg cost 0.704955ms
clang++ 18.1.8 -Ofast
max cost 0.399699ms
min cost 0.388413ms
avg cost 0.389215ms
Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz
第二版:
max cost 13.3654ms
min cost 6.5841ms
avg cost 6.73705ms
编译参数:c++ -O3 -I`brew --prefix boost`/include `pkg-config --cflags --libs mpfr gmp` foo.cpp
clang++ 19.1.5
===
step 830
cost 16.5006ms #M4 Pro
cost 23.7123ms #M2 Max
===
max cost 8.85958ms #M4 Pro
min cost 3.04883ms
avg cost 3.48595ms
max cost 10.331ms #M2 Max
min cost 4.22104ms
avg cost 4.62733ms
===
g++ 14.2.0
===
step 830
cost 12.122ms #M4 Pro
cost 12.699ms #M2 Max
===
max cost 6.224ms #M4 Pro
min cost 3.139ms
avg cost 3.41423ms
max cost 8.466ms #M2 Max
min cost 4.335ms
avg cost 4.62142ms
=== 本帖最后由 Xerxes_2 于 2024-12-16 20:13 编辑
星空天神 发表于 2024-12-16 15:32
又找了找圆周率算法发现了很多专业选手
专业选手1 梅钦公式
为啥比 Rust 慢这么多写傻了
M2 Pro 跑了下已经微秒级了
开始计算, 100次……
平均耗时: 159.107µs
最短耗时: 118.125µs
最长耗时: 241.458µs
标准差: 0.03174605343078096ms
按Enter退出……
use std::time::Duration;
const BIT: usize = 500;
const ITER: usize = 10000;
fn pi500_pro_v1() -> Duration {
let start = std::time::Instant::now();
const A: = ;
const B: = ;
const N: usize = BIT / 4 + 3;
const M: i32 = 10000;
let mut pi = vec!;
let mut e = vec!;
for s in {
let mut k = s;
e.fill(0);
e = A;
let mut q = 1;
let mut i = 0;
while i < N as i32 {
let mut r = 0;
let mut v = 0;
for (e, pi) in e.iter_mut().zip(pi.iter_mut()).skip(i as usize) {
let p = r * M + *e;
*e = p / B;
let t = v * M + *e;
let u = t / q;
r = p % B;
v = t % q;
*pi += if k { u } else { -u };
}
if e == 0 {
i += 1;
}
q += 2;
k = !k;
}
}
let mut s = -1;
for i in pi.iter_mut().skip(1).rev() {
let t = *i + s;
*i = t % M;
if *i < 0 {
*i += M;
s = t / M - 1
} else {
s = t / M
}
}
let elapsed = start.elapsed();
// print!("3.");
// for i in pi.iter().skip(1).take(N - 3) {
// print!("{:04}", i);
// }
elapsed
}
fn main() {
println!("开始计算, {ITER}次……");
let mut res = Vec::with_capacity(ITER);
for _ in 0..ITER {
res.push(pi500_pro_v1());
}
let sum = res.iter().sum::<Duration>();
let min = res.iter().min().unwrap();
let max = res.iter().max().unwrap();
let avg = sum / res.len() as u32;
println!("平均耗时: {:?}", avg);
println!("最短耗时: {:?}", min);
println!("最长耗时: {:?}", max);
let variance = res
.iter()
.map(|x| {
let x = x.as_secs_f64() * 1_000_000.0;
(x - avg.as_secs_f64() * 1_000_000.0).powi(2)
})
.sum::<f64>()
/ res.len() as f64;
println!("标准差: {}µs", variance.sqrt());
println!("按Enter退出……");
let _ = std::io::stdin().read_line(&mut String::new());
}
本帖最后由 星空天神 于 2024-12-16 19:57 编辑
Xerxes_2 发表于 2024-12-16 19:46
为啥比 Rust 慢这么多
M2 Pro 跑了下已经微秒级了
你自己再想想160微秒等于多少毫秒?
5900x
max cost 138.6us
min cost 102.2us
avg cost 107.606us
—— 来自 OnePlus LE2120, Android 14上的 S1Next-鹅版 v2.5.4
本帖最后由 Xerxes_2 于 2024-12-16 20:08 编辑
星空天神 发表于 2024-12-16 19:51
你自己再想想160微秒等于多少毫秒?
—— 来自 OnePlus LE2120, Android 14上的 S1Next-鹅版 v2.5.4 ...
草,脑子写傻了,确实差不多
时间太短了得改到 10000 次了
m2 pro
开始计算, 10000次……
平均耗时: 124.634µs
最短耗时: 117.833µs
最长耗时: 328.917µs
标准差: 13.893461866467945µs
按Enter退出……
5900x
开始计算, 10000次……
平均耗时: 98.975µs
最短耗时: 98.2µs
最长耗时: 220µs
标准差: 5.06429921114463µs
按Enter退出……
hgfdsa 发表于 2024-12-16 20:27
几十年前superpi都是跑1M,你都上c++了,好歹跑个1W位吧。
https://github.com/sekika/compute-pi
On a MacBook Air (Apple M1, 16 GB), pi to 1 million digits was computed in 1.5 seconds, and to 320 million digits in 24 minutes
你要的 1M
页:
[1]