看一个cpp
的例子
考虑以下的代码类似ls
的代码minils.c
#include <dirent.h> //包含使用的Opendir readdir closedir 函数
#include <error.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h> //包含使用的 stat 结构体和函数 用于获取文件或目录的 状态信息void list_dir(const char *path) {DIR *dir_p;struct dirent *dir_entry;struct stat file_stat;dir_p = opendir(path);if (dir_p == NULL) {perror("open path failed");return;}/*文件目录如下其中..是父目录 .是当前目录递归搜索的时候需要跳过父目录和当前目录├── .├── ..├── demo-zeros├── learn_perf│ ├── log.sh│ ├── ls-mini│ ├── ls-mini.c│ └── test.c├── output├── perf.data├── perf.data.old├── test└── test.cc*//*依次读取 demo-zeroslear_perf ├── log.sh ├── ls-mini ├── ls-mini.c └── test.coutput...*/while ((dir_entry = readdir(dir_p)) != NULL) {char *filename = dir_entry->d_name;/// 跳过父目录和当前目录if (strcmp(filename, ".") == 0 || strcmp(filename, "..") == 0) {continue;}// 尝试获取文件状态信息。如果 stat() 返回 -1,表示失败。否则保存在// file_stat结构体中if (stat(filename, &file_stat) == -1) {perror("stat failed");continue;}//打印大小和文件名printf("%lld ", (long long)file_stat.st_size); // 文件大小printf("%s\n", filename);}
}int main() {list_dir(".");return 0;
}
#output
filament@black:~$ ./learn_perf/mini_ls
220 .bash_logout
4096 .debug
94860 perf.data.old
40960000 output
2147483648 demo-zeros
1222 .bash_history
807 .profile
4096 .cache
148828 perf.data
183 .wget-hsts
16464 test
4096 learn_perf
4096 .local
124 test.cc
4096 .vscode-server
20 .lesshst
4096 .dotnet
3771 .bashrc
使用pref进行分析
对我们的./mini_ls
的数据
Performance counter stats for './mini_ls':0.94 msec task-clock # 0.468 CPUs utilized 0 context-switches # 0.000 /sec 0 cpu-migrations # 0.000 /sec 61 page-faults # 64.980 K/sec 737982 cycles # 0.786 GHz 712467 instructions # 0.97 insn per cycle 149577 branches # 159.337 M/sec 5528 branch-misses # 3.70% of all branches 3689890 slots # 3.931 G/sec 795858 topdown-retiring # 21.6% retiring 477515 topdown-bad-spec # 12.9% bad speculation 1562776 topdown-fe-bound # 42.4% frontend bound 853739 topdown-be-bound # 23.1% backend bound 0.002005034 seconds time elapsed0.002259000 seconds user0.000000000 seconds sys
对syscall
的ls
的数据
Performance counter stats for 'ls':1.87 msec task-clock # 0.563 CPUs utilized 0 context-switches # 0.000 /sec 0 cpu-migrations # 0.000 /sec 103 page-faults # 55.160 K/sec 1478451 cycles # 0.792 GHz 1652087 instructions # 1.12 insn per cycle 346600 branches # 185.615 M/sec 11215 branch-misses # 3.24% of all branches 7392255 slots # 3.959 G/sec 1797332 topdown-retiring # 24.3% retiring 1072601 topdown-bad-spec # 14.5% bad speculation 3043869 topdown-fe-bound # 41.2% frontend bound 1478451 topdown-be-bound # 20.0% backend bound 0.003314592 seconds time elapsed0.000000000 seconds user0.003587000 seconds sys
性能对比:./mini_ls
vs. ls
(标准)
统计指标 | ./mini_ls (自定义) |
ls (标准) |
性能差异(mini_ls vs ls ) |
结论/意义 |
---|---|---|---|---|
实际运行时间 (Elapsed Time) | 0.002005034 s | 0.003314592 s | 🔻 快 39.5% | mini_ls 完成任务所需总时间更短。 |
任务时钟 (task-clock) | 0.94 msec | 1.87 msec | 🔻 少 49.8% | 进程在 CPU 上花费的时间更少。 |
指令数 (instructions) | 712,467 | 1,652,087 | 🔻 少 56.9% | mini_ls 执行的操作步骤(指令)远少于 ls 。 |
周期数 (cycles) | 737,982 | 1,478,451 | 🔻 少 50.1% | 占用的 CPU 周期数更少。 |
IPC (insn per cycle) | 0.97 | 1.12 | 🔺 低 13.4% | mini_ls 每个周期执行的指令数较低,效率略低,可能是前端瓶颈(FE-Bound)所致。 |
分支预测错误 (branch-misses) | 5,528 (3.70%) | 11,215 (3.24%) | 🔻 数量少一半,但百分比更高。 | mini_ls 的分支预测失败率略高。 |
前端瓶颈 (topdown-fe-bound) | 42.4% | 41.2% | 🔺 略高 | CPU 等待指令从前端获取或解码的时间占比略高。 |
后端瓶颈 (topdown-be-bound) | 23.1% | 20.0% | 🔺 略高 | CPU 等待后端资源(如内存、执行单元)的时间占比略高。 |
页错误 (page-faults) | 61 | 103 | 🔻 少 40.8% | 程序访问内存页面的次数更少。 |
另一个使用rust的例子
fn test2() {//20万次for _ in 0..200_000 {()}
}
fn test1() {//10万次for _ in 0..100_000 {()}test2();
}
fn main() {for _ in 0..10 {test1();}
}
perf命令
sudo perf record --call-graph=dwarf ./target/debug/mytest
--call-graph
作用
- 启用调用图(call graph)记录
- 记录函数调用关系,而不仅仅是函数执行时间
dwarf
调试格式
- DWARF:Debugging With Attributed Record Formats
- 优势:
- 利用编译器生成的调试信息
- 能够处理复杂的栈帧布局
sudo perf record --call-graph=dwarf ./mytest
sudo perf report --stdio
读取得到的perf.data
数据然后格式化输出
filament@black:~/learn_perf$ sudo perf report --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 146 of event 'cycles'
# Event count (approx.): 34204409
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... .................... .........................................................................................................
#42.01% 42.01% mytest mytest [.] <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next|---<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next33.61% 33.61% mytest mytest [.] <i32 as core::iter::range::Step>::forward_unchecked|---<i32 as core::iter::range::Step>::forward_unchecked10.38% 10.38% mytest mytest [.] core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next|---core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next6.20% 6.20% mytest mytest [.] mytest::test2|---mytest::test23.79% 3.79% mytest mytest [.] mytest::test1|---mytest::test12.57% 0.00% mytest [kernel.kallsyms] [k] entry_SYSCALL_64_after_hwframe|---entry_SYSCALL_64_after_hwframedo_syscall_64| --2.24%--x64_sys_call| --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region2.57% 0.00% mytest [kernel.kallsyms] [k] do_syscall_64|---do_syscall_64| --2.24%--x64_sys_call| --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region2.24% 0.00% mytest [kernel.kallsyms] [k] x64_sys_call|---x64_sys_call| --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region1.96% 0.00% mytest ld-linux-x86-64.so.2 [.] _start|---_start| --1.63%--_dl_start_dl_start_final (inlined)_dl_sysdep_startdl_main| --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_start|---_dl_start_dl_start_final (inlined)_dl_sysdep_startdl_main| --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_sysdep_start|---_dl_sysdep_startdl_main| --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63% 0.00% mytest ld-linux-x86-64.so.2 [.] dl_main|---dl_main| --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_start_final (inlined)|---_dl_start_final (inlined)_dl_sysdep_startdl_main| --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.96% 0.00% mytest mytest [.] std::rt::lang_start_internal|---std::rt::lang_start_internalstd::panic::catch_unwind (inlined)std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::panic::catch_unwind (inlined)|---std::panic::catch_unwind (inlined)std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::panicking::catch_unwind (inlined)|---std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::panicking::catch_unwind::do_call (inlined)|---std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::rt::lang_start_internal::{{closure}} (inlined)|---std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::rt::init (inlined)|---std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::sys::pal::unix::init (inlined)|---std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96% 0.00% mytest mytest [.] std::sys::pal::unix::stack_overflow::imp::init (inlined)|---std::sys::pal::unix::stack_overflow::imp::init (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_map_object_deps|---_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_catch_exception|---_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] openaux|---openaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_map_object|---_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_map_object_from_fd|---_dl_map_object_from_fd_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_map_segments (inlined)|---_dl_map_segments (inlined)0.82% 0.00% mytest ld-linux-x86-64.so.2 [.] __mmap64 (inlined)|---__mmap64 (inlined)__mmap64 (inlined)entry_SYSCALL_64_after_hwframedo_syscall_64x64_sys_call__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] __x64_sys_mmap|---__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] ksys_mmap_pgoff|---ksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] vm_mmap_pgoff|---vm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] do_mmap|---do_mmapmmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] mmap_region|---mmap_region__mmap_region0.82% 0.00% mytest [kernel.kallsyms] [k] __mmap_region|---__mmap_region0.73% 0.00% mytest [kernel.kallsyms] [k] asm_exc_page_fault|---asm_exc_page_fault0.49% 0.49% mytest [kernel.kallsyms] [k] sigaction_compat_abi0.49% 0.00% mytest libc.so.6 [.] __GI___libc_sigaction (inlined)0.49% 0.00% mytest [kernel.kallsyms] [k] __x64_sys_rt_sigaction0.49% 0.00% mytest [kernel.kallsyms] [k] do_sigaction0.47% 0.47% mytest [kernel.kallsyms] [k] down_write_killable0.47% 0.00% mytest mytest [.] std::sys::pal::unix::stack_overflow::imp::install_main_guard (inlined)0.47% 0.00% mytest mytest [.] std::sys::pal::unix::stack_overflow::imp::install_main_guard_linux (inlined)0.47% 0.00% mytest mytest [.] std::sys::pal::unix::stack_overflow::imp::stack_start_aligned (inlined)0.47% 0.00% mytest mytest [.] std::sys::pal::unix::stack_overflow::imp::get_stack_start (inlined)0.47% 0.00% mytest libc.so.6 [.] __pthread_getattr_np (inlined)0.47% 0.00% mytest libc.so.6 [.] _IO_new_fopen (inlined)0.47% 0.00% mytest libc.so.6 [.] __fopen_internal (inlined)0.47% 0.00% mytest libc.so.6 [.] __GI___libc_malloc (inlined)0.47% 0.00% mytest libc.so.6 [.] tcache_init (inlined)0.47% 0.00% mytest libc.so.6 [.] tcache_init (inlined)0.47% 0.00% mytest libc.so.6 [.] _int_malloc0.47% 0.00% mytest libc.so.6 [.] sysmalloc0.47% 0.00% mytest libc.so.6 [.] __glibc_morecore (inlined)0.47% 0.00% mytest libc.so.6 [.] __GI___sbrk (inlined)0.47% 0.00% mytest libc.so.6 [.] __brk0.47% 0.00% mytest [kernel.kallsyms] [k] __x64_sys_brk0.46% 0.00% mytest [kernel.kallsyms] [k] __x64_sys_execve0.46% 0.00% mytest [kernel.kallsyms] [k] do_execveat_common.isra.00.46% 0.00% mytest [kernel.kallsyms] [k] bprm_execve0.46% 0.00% mytest [kernel.kallsyms] [k] bprm_execve.part.00.46% 0.00% mytest [kernel.kallsyms] [k] exec_binprm0.46% 0.00% mytest [kernel.kallsyms] [k] search_binary_handler0.46% 0.00% mytest [kernel.kallsyms] [k] load_elf_binary0.46% 0.46% mytest libc.so.6 [.] __libc_early_init0.44% 0.44% mytest ld-linux-x86-64.so.2 [.] do_lookup_x0.44% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_relocate_object0.44% 0.00% mytest ld-linux-x86-64.so.2 [.] elf_dynamic_do_Rela (inlined)0.44% 0.00% mytest ld-linux-x86-64.so.2 [.] elf_machine_rela (inlined)0.44% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_lookup_symbol_x0.42% 0.42% mytest [kernel.kallsyms] [k] vma_set_page_prot0.40% 0.40% mytest [kernel.kallsyms] [k] vma_interval_tree_insert0.40% 0.00% mytest ld-linux-x86-64.so.2 [.] _dl_map_segment (inlined)0.40% 0.00% mytest [kernel.kallsyms] [k] vma_link0.40% 0.00% mytest [kernel.kallsyms] [k] __vma_link_file0.37% 0.37% mytest [kernel.kallsyms] [k] sync_regs0.37% 0.00% mytest ld-linux-x86-64.so.2 [.] elf_get_dynamic_info (inlined)0.33% 0.33% mytest [kernel.kallsyms] [k] truncate_inode_pages_range0.33% 0.00% mytest [kernel.kallsyms] [k] syscall_exit_to_user_mode0.33% 0.00% mytest [kernel.kallsyms] [k] exit_to_user_mode_prepare0.33% 0.00% mytest [kernel.kallsyms] [k] exit_to_user_mode_loop0.33% 0.00% mytest [kernel.kallsyms] [k] task_work_run0.33% 0.00% mytest [kernel.kallsyms] [k] ____fput0.33% 0.00% mytest [kernel.kallsyms] [k] __fput0.33% 0.00% mytest [kernel.kallsyms] [k] dput0.33% 0.00% mytest [kernel.kallsyms] [k] dentry_kill0.33% 0.00% mytest [kernel.kallsyms] [k] __dentry_kill0.33% 0.00% mytest [kernel.kallsyms] [k] dentry_unlink_inode0.33% 0.00% mytest [kernel.kallsyms] [k] iput0.33% 0.00% mytest [kernel.kallsyms] [k] evict0.33% 0.00% mytest [kernel.kallsyms] [k] truncate_inode_pages_final0.27% 0.27% mytest [kernel.kallsyms] [k] __clear_user0.27% 0.00% mytest [kernel.kallsyms] [k] clear_user0.19% 0.19% mytest [kernel.kallsyms] [k] clear_page_erms0.19% 0.00% mytest [kernel.kallsyms] [k] setup_arg_pages0.19% 0.00% mytest [kernel.kallsyms] [k] shift_arg_pages0.19% 0.00% mytest [kernel.kallsyms] [k] move_page_tables0.19% 0.00% mytest [kernel.kallsyms] [k] move_page_tables.part.00.19% 0.00% mytest [kernel.kallsyms] [k] __pte_alloc0.19% 0.00% mytest [kernel.kallsyms] [k] pte_alloc_one0.19% 0.00% mytest [kernel.kallsyms] [k] alloc_pages0.19% 0.00% mytest [kernel.kallsyms] [k] __alloc_pages0.19% 0.00% mytest [kernel.kallsyms] [k] get_page_from_freelist0.18% 0.00% perf-ex [kernel.kallsyms] [k] entry_SYSCALL_64_after_hwframe0.18% 0.00% perf-ex [kernel.kallsyms] [k] do_syscall_640.18% 0.00% perf-ex [kernel.kallsyms] [k] x64_sys_call0.18% 0.00% perf-ex [kernel.kallsyms] [k] __x64_sys_execve0.18% 0.00% perf-ex [kernel.kallsyms] [k] do_execveat_common.isra.00.18% 0.00% perf-ex [kernel.kallsyms] [k] bprm_execve0.18% 0.00% perf-ex [kernel.kallsyms] [k] bprm_execve.part.00.18% 0.00% perf-ex [kernel.kallsyms] [k] exec_binprm0.18% 0.00% perf-ex [kernel.kallsyms] [k] search_binary_handler0.18% 0.00% perf-ex [kernel.kallsyms] [k] load_elf_binary0.18% 0.00% perf-ex [kernel.kallsyms] [k] begin_new_exec0.18% 0.00% perf-ex [kernel.kallsyms] [k] perf_event_exec0.11% 0.11% perf-ex [kernel.kallsyms] [k] send_call_function_single_ipi0.11% 0.00% perf-ex [kernel.kallsyms] [k] asm_sysvec_apic_timer_interrupt0.11% 0.00% perf-ex [kernel.kallsyms] [k] sysvec_apic_timer_interrupt0.11% 0.00% perf-ex [kernel.kallsyms] [k] __sysvec_apic_timer_interrupt0.11% 0.00% perf-ex [kernel.kallsyms] [k] hrtimer_interrupt0.11% 0.00% perf-ex [kernel.kallsyms] [k] __hrtimer_run_queues0.11% 0.00% perf-ex [kernel.kallsyms] [k] tick_sched_timer0.11% 0.00% perf-ex [kernel.kallsyms] [k] tick_sched_handle0.11% 0.00% perf-ex [kernel.kallsyms] [k] update_process_times0.11% 0.00% perf-ex [kernel.kallsyms] [k] scheduler_tick0.11% 0.00% perf-ex [kernel.kallsyms] [k] trigger_load_balance0.11% 0.00% perf-ex [kernel.kallsyms] [k] nohz_balancer_kick0.11% 0.00% perf-ex [kernel.kallsyms] [k] smp_call_function_single_async0.11% 0.00% perf-ex [kernel.kallsyms] [k] generic_exec_single0.06% 0.00% perf-ex [kernel.kallsyms] [k] native_write_msr0.06% 0.00% perf-ex [kernel.kallsyms] [k] ctx_resched0.06% 0.00% perf-ex [kernel.kallsyms] [k] x86_pmu_enable0.06% 0.00% perf-ex [kernel.kallsyms] [k] intel_pmu_enable_all0.04% 0.04% perf-ex [kernel.kallsyms] [k] nmi_restore0.01% 0.01% perf-ex [kernel.kallsyms] [k] native_flush_tlb_one_user0.00% 0.00% perf-ex [kernel.kallsyms] [k] native_sched_clock0.00% 0.00% perf-ex [kernel.kallsyms] [k] its_return_thunk#
# (Cannot load tips.txt file, please install perf!)
#
当前目录如下
mytest
│ ├── Cargo.lock
│ ├── Cargo.toml
│ ├── out.folded
│ ├── perf.data
│ ├── perf.data.old
│ ├── src
│ │ └── main.rs
│ └── target├── mytest├── FlameGraph
│ ├── README.md
│ ├── aix-perf.pl
│ ├── demos
进入我们的mytest
文件夹
perf script | ~/FlameGraph/stackcollapse-perf.pl > out.folded
perf script
:将perf.data
中的二进制采样数据~/FlameGraph/stackcollapse-perf.pl
: 将perf script
的多行调用栈 “折叠”成一行一行的汇总格式,并统计每种栈出现的次数- 把原始性能数据 → 转成 “栈 + 计数” 的简洁汇总格式
~/FlameGraph/flamegraph.pl out.folded > mytest-flamegraph.svg
- 读取
out.folded
这种“折叠栈”文件,生成一个交互式 SVG 火焰图。
以下是得到的out.folded
和火焰图svg
mytest;<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next 19009718
mytest;<i32 as core::iter::range::Step>::forward_unchecked 6799607
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned 2168609
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned;core::num::<impl i32>::overflowing_add_unsigned 6883545
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned;core::num::<impl i32>::overflowing_add_unsigned;core::num::<impl i32>::overflowing_add 825448
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_map_object_deps;_dl_catch_exception;openaux;_dl_map_object;_dl_map_object_from_fd;_dl_map_segments;__mprotect;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_mprotect;do_mprotect_pkey;mprotect_fixup;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx 143804
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_map_object_deps;_dl_catch_exception;openaux;_dl_map_object;open_verify;__GI___read_nocancel;entry_SYSCALL_64 135150
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_new_object;asm_exc_page_fault;exc_page_fault;do_user_addr_fault;handle_mm_fault;__handle_mm_fault;handle_pte_fault;do_fault;do_read_fault;filemap_map_pages;next_uptodate_page 125265
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_relocate_object;_dl_protect_relro;__mprotect;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_mprotect;do_mprotect_pkey;mprotect_fixup;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx;perf_event_mmap_output;perf_output_begin 156958
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_relocate_object;elf_dynamic_do_Rela;elf_machine_rela_relative;asm_exc_page_fault;exc_page_fault;do_user_addr_fault;find_vma;vmacache_find 150888
mytest;_start;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;putname;kmem_cache_free;slab_free_freelist_hook.constprop.0 111752
mytest;core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next 7222319
mytest;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;elf_map;vm_mmap;vm_mmap_pgoff;do_mmap;mmap_region;__mmap_region;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx;perf_event_mmap_output;__perf_event__output_id_sample;perf_output_copy 93497
mytest;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;setup_arg_pages;shift_arg_pages;move_page_tables;move_page_tables.part.0;alloc_new_pud.constprop.0;__p4d_alloc;get_zeroed_page;alloc_pages;__alloc_pages 69216
mytest;mytest::test1 550608
mytest;mytest::test2 2129064
mytest;std::rt::lang_start_internal;std::panic::catch_unwind;std::panicking::catch_unwind;std::panicking::catch_unwind::do_call;std::rt::lang_start_internal::{{closure}};std::rt::init;std::sys::pal::unix::init;std::sys::pal::unix::init::sanitize_standard_fds;__GI___poll;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_poll;do_sys_poll;do_poll.constprop.0;tty_poll 162159
mytest;std::rt::lang_start_internal;std::panic::catch_unwind;std::panicking::catch_unwind;std::panicking::catch_unwind::do_call;std::rt::lang_start_internal::{{closure}};std::rt::init;std::sys::pal::unix::init;std::sys::pal::unix::stack_overflow::imp::init;std::sys::pal::unix::stack_overflow::imp::install_main_guard;std::sys::pal::unix::stack_overflow::imp::install_main_guard_linux;std::sys::pal::unix::stack_overflow::imp::stack_start_aligned;std::sys::pal::unix::stack_overflow::imp::get_stack_start;__pthread_getattr_np;__GI___isoc99_sscanf 166637
perf-exec;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;begin_new_exec;perf_event_exec;asm_sysvec_apic_timer_interrupt;sysvec_apic_timer_interrupt;__sysvec_apic_timer_interrupt;hrtimer_interrupt;__hrtimer_run_queues;tick_sched_timer;tick_sched_handle;update_process_times;calc_global_load_tick 37506
perf-exec;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;begin_new_exec;perf_event_exec;ctx_resched;x86_pmu_enable;intel_pmu_enable_all;native_write_msr 19422
如何下载
scp filament@your_server_ip:/mnt/hdd-ws/users/filament/mytest/mytest-flamegraph.svg D:\Documents#从服务器下载文件
scp username@servername file_path destination_path#上传文件
scp local_file_path username@server_ip:server_file_path
参考文章:
DatenLord|Rust程序性能分析 - 知乎
我的程序卡在哪里?—— 用火焰图精确定位性能瓶颈 | Mice World