当前位置: 首页 > news >正文

使用perf进行性能分析-入门级别

看一个cpp的例子

考虑以下的代码类似ls的代码minils.c

#include <dirent.h> //包含使用的Opendir readdir closedir 函数
#include <error.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h> //包含使用的 stat 结构体和函数 用于获取文件或目录的 状态信息void list_dir(const char *path) {DIR *dir_p;struct dirent *dir_entry;struct stat file_stat;dir_p = opendir(path);if (dir_p == NULL) {perror("open path failed");return;}/*文件目录如下其中..是父目录 .是当前目录递归搜索的时候需要跳过父目录和当前目录├── .├── ..├── demo-zeros├── learn_perf│   ├── log.sh│   ├── ls-mini│   ├── ls-mini.c│   └── test.c├── output├── perf.data├── perf.data.old├── test└── test.cc*//*依次读取 demo-zeroslear_perf   ├── log.sh   ├── ls-mini   ├── ls-mini.c   └── test.coutput...*/while ((dir_entry = readdir(dir_p)) != NULL) {char *filename = dir_entry->d_name;/// 跳过父目录和当前目录if (strcmp(filename, ".") == 0 || strcmp(filename, "..") == 0) {continue;}// 尝试获取文件状态信息。如果 stat() 返回 -1,表示失败。否则保存在// file_stat结构体中if (stat(filename, &file_stat) == -1) {perror("stat failed");continue;}//打印大小和文件名printf("%lld ", (long long)file_stat.st_size); // 文件大小printf("%s\n", filename);}
}int main() {list_dir(".");return 0;
}
#output
filament@black:~$ ./learn_perf/mini_ls 
220 .bash_logout
4096 .debug
94860 perf.data.old
40960000 output
2147483648 demo-zeros
1222 .bash_history
807 .profile
4096 .cache
148828 perf.data
183 .wget-hsts
16464 test
4096 learn_perf
4096 .local
124 test.cc
4096 .vscode-server
20 .lesshst
4096 .dotnet
3771 .bashrc

使用pref进行分析

对我们的./mini_ls的数据

 Performance counter stats for './mini_ls':0.94 msec task-clock                #    0.468 CPUs utilized          0      context-switches          #    0.000 /sec                   0      cpu-migrations            #    0.000 /sec                   61      page-faults               #   64.980 K/sec                  737982      cycles                    #    0.786 GHz                    712467      instructions              #    0.97  insn per cycle         149577      branches                  #  159.337 M/sec                  5528      branch-misses             #    3.70% of all branches        3689890      slots                     #    3.931 G/sec                  795858      topdown-retiring          #     21.6% retiring              477515      topdown-bad-spec          #     12.9% bad speculation       1562776      topdown-fe-bound          #     42.4% frontend bound        853739      topdown-be-bound          #     23.1% backend bound         0.002005034 seconds time elapsed0.002259000 seconds user0.000000000 seconds sys

syscallls的数据

 Performance counter stats for 'ls':1.87 msec task-clock                #    0.563 CPUs utilized          0      context-switches          #    0.000 /sec                   0      cpu-migrations            #    0.000 /sec                   103      page-faults               #   55.160 K/sec                  1478451      cycles                    #    0.792 GHz                    1652087      instructions              #    1.12  insn per cycle         346600      branches                  #  185.615 M/sec                  11215      branch-misses             #    3.24% of all branches        7392255      slots                     #    3.959 G/sec                  1797332      topdown-retiring          #     24.3% retiring              1072601      topdown-bad-spec          #     14.5% bad speculation       3043869      topdown-fe-bound          #     41.2% frontend bound        1478451      topdown-be-bound          #     20.0% backend bound         0.003314592 seconds time elapsed0.000000000 seconds user0.003587000 seconds sys

性能对比:./mini_ls vs. ls (标准)

统计指标 ./mini_ls (自定义) ls (标准) 性能差异(mini_ls vs ls 结论/意义
实际运行时间 (Elapsed Time) 0.002005034 s 0.003314592 s 🔻 快 39.5% mini_ls 完成任务所需总时间更短。
任务时钟 (task-clock) 0.94 msec 1.87 msec 🔻 少 49.8% 进程在 CPU 上花费的时间更少。
指令数 (instructions) 712,467 1,652,087 🔻 少 56.9% mini_ls 执行的操作步骤(指令)远少于 ls
周期数 (cycles) 737,982 1,478,451 🔻 少 50.1% 占用的 CPU 周期数更少。
IPC (insn per cycle) 0.97 1.12 🔺 低 13.4% mini_ls 每个周期执行的指令数较低,效率略低,可能是前端瓶颈(FE-Bound)所致。
分支预测错误 (branch-misses) 5,528 (3.70%) 11,215 (3.24%) 🔻 数量少一半,但百分比更高 mini_ls 的分支预测失败率略高。
前端瓶颈 (topdown-fe-bound) 42.4% 41.2% 🔺 略高 CPU 等待指令从前端获取或解码的时间占比略高。
后端瓶颈 (topdown-be-bound) 23.1% 20.0% 🔺 略高 CPU 等待后端资源(如内存、执行单元)的时间占比略高。
页错误 (page-faults) 61 103 🔻 少 40.8% 程序访问内存页面的次数更少。

另一个使用rust的例子

fn test2() {//20万次for _ in 0..200_000 {()}
}
fn test1() {//10万次for _ in 0..100_000 {()}test2();
}
fn main() {for _ in 0..10 {test1();}
}

perf命令

sudo perf record --call-graph=dwarf ./target/debug/mytest

--call-graph 作用

  • 启用调用图(call graph)记录
  • 记录函数调用关系,而不仅仅是函数执行时间

dwarf 调试格式

  • DWARF:Debugging With Attributed Record Formats
  • 优势
    • 利用编译器生成的调试信息
    • 能够处理复杂的栈帧布局
sudo perf record --call-graph=dwarf ./mytest

sudo perf report --stdio

读取得到的perf.data数据然后格式化输出

filament@black:~/learn_perf$ sudo perf report --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 146  of event 'cycles'
# Event count (approx.): 34204409
#
# Children      Self  Command  Shared Object         Symbol                                                                                                   
# ........  ........  .......  ....................  .........................................................................................................
#42.01%    42.01%  mytest   mytest                [.] <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next|---<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next33.61%    33.61%  mytest   mytest                [.] <i32 as core::iter::range::Step>::forward_unchecked|---<i32 as core::iter::range::Step>::forward_unchecked10.38%    10.38%  mytest   mytest                [.] core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next|---core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next6.20%     6.20%  mytest   mytest                [.] mytest::test2|---mytest::test23.79%     3.79%  mytest   mytest                [.] mytest::test1|---mytest::test12.57%     0.00%  mytest   [kernel.kallsyms]     [k] entry_SYSCALL_64_after_hwframe|---entry_SYSCALL_64_after_hwframedo_syscall_64|          --2.24%--x64_sys_call|          --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region2.57%     0.00%  mytest   [kernel.kallsyms]     [k] do_syscall_64|---do_syscall_64|          --2.24%--x64_sys_call|          --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region2.24%     0.00%  mytest   [kernel.kallsyms]     [k] x64_sys_call|---x64_sys_call|          --0.82%--__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region1.96%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _start|---_start|          --1.63%--_dl_start_dl_start_final (inlined)_dl_sysdep_startdl_main|          --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_start|---_dl_start_dl_start_final (inlined)_dl_sysdep_startdl_main|          --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_sysdep_start|---_dl_sysdep_startdl_main|          --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63%     0.00%  mytest   ld-linux-x86-64.so.2  [.] dl_main|---dl_main|          --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)1.63%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_start_final (inlined)|---_dl_start_final (inlined)_dl_sysdep_startdl_main|          --0.82%--_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.96%     0.00%  mytest   mytest                [.] std::rt::lang_start_internal|---std::rt::lang_start_internalstd::panic::catch_unwind (inlined)std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::panic::catch_unwind (inlined)|---std::panic::catch_unwind (inlined)std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::panicking::catch_unwind (inlined)|---std::panicking::catch_unwind (inlined)std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::panicking::catch_unwind::do_call (inlined)|---std::panicking::catch_unwind::do_call (inlined)std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::rt::lang_start_internal::{{closure}} (inlined)|---std::rt::lang_start_internal::{{closure}} (inlined)std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::rt::init (inlined)|---std::rt::init (inlined)std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::sys::pal::unix::init (inlined)|---std::sys::pal::unix::init (inlined)std::sys::pal::unix::stack_overflow::imp::init (inlined)0.96%     0.00%  mytest   mytest                [.] std::sys::pal::unix::stack_overflow::imp::init (inlined)|---std::sys::pal::unix::stack_overflow::imp::init (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_map_object_deps|---_dl_map_object_deps_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_catch_exception|---_dl_catch_exceptionopenaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] openaux|---openaux_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_map_object|---_dl_map_object_dl_map_object_from_fd_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_map_object_from_fd|---_dl_map_object_from_fd_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_map_segments (inlined)|---_dl_map_segments (inlined)0.82%     0.00%  mytest   ld-linux-x86-64.so.2  [.] __mmap64 (inlined)|---__mmap64 (inlined)__mmap64 (inlined)entry_SYSCALL_64_after_hwframedo_syscall_64x64_sys_call__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] __x64_sys_mmap|---__x64_sys_mmapksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] ksys_mmap_pgoff|---ksys_mmap_pgoffvm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] vm_mmap_pgoff|---vm_mmap_pgoffdo_mmapmmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] do_mmap|---do_mmapmmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] mmap_region|---mmap_region__mmap_region0.82%     0.00%  mytest   [kernel.kallsyms]     [k] __mmap_region|---__mmap_region0.73%     0.00%  mytest   [kernel.kallsyms]     [k] asm_exc_page_fault|---asm_exc_page_fault0.49%     0.49%  mytest   [kernel.kallsyms]     [k] sigaction_compat_abi0.49%     0.00%  mytest   libc.so.6             [.] __GI___libc_sigaction (inlined)0.49%     0.00%  mytest   [kernel.kallsyms]     [k] __x64_sys_rt_sigaction0.49%     0.00%  mytest   [kernel.kallsyms]     [k] do_sigaction0.47%     0.47%  mytest   [kernel.kallsyms]     [k] down_write_killable0.47%     0.00%  mytest   mytest                [.] std::sys::pal::unix::stack_overflow::imp::install_main_guard (inlined)0.47%     0.00%  mytest   mytest                [.] std::sys::pal::unix::stack_overflow::imp::install_main_guard_linux (inlined)0.47%     0.00%  mytest   mytest                [.] std::sys::pal::unix::stack_overflow::imp::stack_start_aligned (inlined)0.47%     0.00%  mytest   mytest                [.] std::sys::pal::unix::stack_overflow::imp::get_stack_start (inlined)0.47%     0.00%  mytest   libc.so.6             [.] __pthread_getattr_np (inlined)0.47%     0.00%  mytest   libc.so.6             [.] _IO_new_fopen (inlined)0.47%     0.00%  mytest   libc.so.6             [.] __fopen_internal (inlined)0.47%     0.00%  mytest   libc.so.6             [.] __GI___libc_malloc (inlined)0.47%     0.00%  mytest   libc.so.6             [.] tcache_init (inlined)0.47%     0.00%  mytest   libc.so.6             [.] tcache_init (inlined)0.47%     0.00%  mytest   libc.so.6             [.] _int_malloc0.47%     0.00%  mytest   libc.so.6             [.] sysmalloc0.47%     0.00%  mytest   libc.so.6             [.] __glibc_morecore (inlined)0.47%     0.00%  mytest   libc.so.6             [.] __GI___sbrk (inlined)0.47%     0.00%  mytest   libc.so.6             [.] __brk0.47%     0.00%  mytest   [kernel.kallsyms]     [k] __x64_sys_brk0.46%     0.00%  mytest   [kernel.kallsyms]     [k] __x64_sys_execve0.46%     0.00%  mytest   [kernel.kallsyms]     [k] do_execveat_common.isra.00.46%     0.00%  mytest   [kernel.kallsyms]     [k] bprm_execve0.46%     0.00%  mytest   [kernel.kallsyms]     [k] bprm_execve.part.00.46%     0.00%  mytest   [kernel.kallsyms]     [k] exec_binprm0.46%     0.00%  mytest   [kernel.kallsyms]     [k] search_binary_handler0.46%     0.00%  mytest   [kernel.kallsyms]     [k] load_elf_binary0.46%     0.46%  mytest   libc.so.6             [.] __libc_early_init0.44%     0.44%  mytest   ld-linux-x86-64.so.2  [.] do_lookup_x0.44%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_relocate_object0.44%     0.00%  mytest   ld-linux-x86-64.so.2  [.] elf_dynamic_do_Rela (inlined)0.44%     0.00%  mytest   ld-linux-x86-64.so.2  [.] elf_machine_rela (inlined)0.44%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_lookup_symbol_x0.42%     0.42%  mytest   [kernel.kallsyms]     [k] vma_set_page_prot0.40%     0.40%  mytest   [kernel.kallsyms]     [k] vma_interval_tree_insert0.40%     0.00%  mytest   ld-linux-x86-64.so.2  [.] _dl_map_segment (inlined)0.40%     0.00%  mytest   [kernel.kallsyms]     [k] vma_link0.40%     0.00%  mytest   [kernel.kallsyms]     [k] __vma_link_file0.37%     0.37%  mytest   [kernel.kallsyms]     [k] sync_regs0.37%     0.00%  mytest   ld-linux-x86-64.so.2  [.] elf_get_dynamic_info (inlined)0.33%     0.33%  mytest   [kernel.kallsyms]     [k] truncate_inode_pages_range0.33%     0.00%  mytest   [kernel.kallsyms]     [k] syscall_exit_to_user_mode0.33%     0.00%  mytest   [kernel.kallsyms]     [k] exit_to_user_mode_prepare0.33%     0.00%  mytest   [kernel.kallsyms]     [k] exit_to_user_mode_loop0.33%     0.00%  mytest   [kernel.kallsyms]     [k] task_work_run0.33%     0.00%  mytest   [kernel.kallsyms]     [k] ____fput0.33%     0.00%  mytest   [kernel.kallsyms]     [k] __fput0.33%     0.00%  mytest   [kernel.kallsyms]     [k] dput0.33%     0.00%  mytest   [kernel.kallsyms]     [k] dentry_kill0.33%     0.00%  mytest   [kernel.kallsyms]     [k] __dentry_kill0.33%     0.00%  mytest   [kernel.kallsyms]     [k] dentry_unlink_inode0.33%     0.00%  mytest   [kernel.kallsyms]     [k] iput0.33%     0.00%  mytest   [kernel.kallsyms]     [k] evict0.33%     0.00%  mytest   [kernel.kallsyms]     [k] truncate_inode_pages_final0.27%     0.27%  mytest   [kernel.kallsyms]     [k] __clear_user0.27%     0.00%  mytest   [kernel.kallsyms]     [k] clear_user0.19%     0.19%  mytest   [kernel.kallsyms]     [k] clear_page_erms0.19%     0.00%  mytest   [kernel.kallsyms]     [k] setup_arg_pages0.19%     0.00%  mytest   [kernel.kallsyms]     [k] shift_arg_pages0.19%     0.00%  mytest   [kernel.kallsyms]     [k] move_page_tables0.19%     0.00%  mytest   [kernel.kallsyms]     [k] move_page_tables.part.00.19%     0.00%  mytest   [kernel.kallsyms]     [k] __pte_alloc0.19%     0.00%  mytest   [kernel.kallsyms]     [k] pte_alloc_one0.19%     0.00%  mytest   [kernel.kallsyms]     [k] alloc_pages0.19%     0.00%  mytest   [kernel.kallsyms]     [k] __alloc_pages0.19%     0.00%  mytest   [kernel.kallsyms]     [k] get_page_from_freelist0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] entry_SYSCALL_64_after_hwframe0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] do_syscall_640.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] x64_sys_call0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] __x64_sys_execve0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] do_execveat_common.isra.00.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] bprm_execve0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] bprm_execve.part.00.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] exec_binprm0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] search_binary_handler0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] load_elf_binary0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] begin_new_exec0.18%     0.00%  perf-ex  [kernel.kallsyms]     [k] perf_event_exec0.11%     0.11%  perf-ex  [kernel.kallsyms]     [k] send_call_function_single_ipi0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] asm_sysvec_apic_timer_interrupt0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] sysvec_apic_timer_interrupt0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] __sysvec_apic_timer_interrupt0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] hrtimer_interrupt0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] __hrtimer_run_queues0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] tick_sched_timer0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] tick_sched_handle0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] update_process_times0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] scheduler_tick0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] trigger_load_balance0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] nohz_balancer_kick0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] smp_call_function_single_async0.11%     0.00%  perf-ex  [kernel.kallsyms]     [k] generic_exec_single0.06%     0.00%  perf-ex  [kernel.kallsyms]     [k] native_write_msr0.06%     0.00%  perf-ex  [kernel.kallsyms]     [k] ctx_resched0.06%     0.00%  perf-ex  [kernel.kallsyms]     [k] x86_pmu_enable0.06%     0.00%  perf-ex  [kernel.kallsyms]     [k] intel_pmu_enable_all0.04%     0.04%  perf-ex  [kernel.kallsyms]     [k] nmi_restore0.01%     0.01%  perf-ex  [kernel.kallsyms]     [k] native_flush_tlb_one_user0.00%     0.00%  perf-ex  [kernel.kallsyms]     [k] native_sched_clock0.00%     0.00%  perf-ex  [kernel.kallsyms]     [k] its_return_thunk#
# (Cannot load tips.txt file, please install perf!)
#

当前目录如下

mytest
│   ├── Cargo.lock
│   ├── Cargo.toml
│   ├── out.folded
│   ├── perf.data
│   ├── perf.data.old
│   ├── src
│   │   └── main.rs
│   └── target├── mytest├── FlameGraph
│   ├── README.md
│   ├── aix-perf.pl
│   ├── demos

进入我们的mytest文件夹

perf script | ~/FlameGraph/stackcollapse-perf.pl > out.folded

  • perf script:将 perf.data 中的二进制采样数据
  • ~/FlameGraph/stackcollapse-perf.pl: 将 perf script 的多行调用栈 “折叠”成一行一行的汇总格式,并统计每种栈出现的次数
  • 把原始性能数据 → 转成 “栈 + 计数” 的简洁汇总格式

~/FlameGraph/flamegraph.pl out.folded > mytest-flamegraph.svg

  • 读取 out.folded 这种“折叠栈”文件,生成一个交互式 SVG 火焰图

以下是得到的out.folded和火焰图svg

mytest;<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next 19009718
mytest;<i32 as core::iter::range::Step>::forward_unchecked 6799607
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned 2168609
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned;core::num::<impl i32>::overflowing_add_unsigned 6883545
mytest;<i32 as core::iter::range::Step>::forward_unchecked;core::num::<impl i32>::checked_add_unsigned;core::num::<impl i32>::overflowing_add_unsigned;core::num::<impl i32>::overflowing_add 825448
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_map_object_deps;_dl_catch_exception;openaux;_dl_map_object;_dl_map_object_from_fd;_dl_map_segments;__mprotect;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_mprotect;do_mprotect_pkey;mprotect_fixup;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx 143804
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_map_object_deps;_dl_catch_exception;openaux;_dl_map_object;open_verify;__GI___read_nocancel;entry_SYSCALL_64 135150
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_new_object;asm_exc_page_fault;exc_page_fault;do_user_addr_fault;handle_mm_fault;__handle_mm_fault;handle_pte_fault;do_fault;do_read_fault;filemap_map_pages;next_uptodate_page 125265
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_relocate_object;_dl_protect_relro;__mprotect;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_mprotect;do_mprotect_pkey;mprotect_fixup;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx;perf_event_mmap_output;perf_output_begin 156958
mytest;_start;_dl_start;_dl_start_final;_dl_sysdep_start;dl_main;_dl_relocate_object;elf_dynamic_do_Rela;elf_machine_rela_relative;asm_exc_page_fault;exc_page_fault;do_user_addr_fault;find_vma;vmacache_find 150888
mytest;_start;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;putname;kmem_cache_free;slab_free_freelist_hook.constprop.0 111752
mytest;core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next 7222319
mytest;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;elf_map;vm_mmap;vm_mmap_pgoff;do_mmap;mmap_region;__mmap_region;perf_event_mmap;perf_event_mmap_event;perf_iterate_sb;perf_iterate_ctx;perf_event_mmap_output;__perf_event__output_id_sample;perf_output_copy 93497
mytest;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;setup_arg_pages;shift_arg_pages;move_page_tables;move_page_tables.part.0;alloc_new_pud.constprop.0;__p4d_alloc;get_zeroed_page;alloc_pages;__alloc_pages 69216
mytest;mytest::test1 550608
mytest;mytest::test2 2129064
mytest;std::rt::lang_start_internal;std::panic::catch_unwind;std::panicking::catch_unwind;std::panicking::catch_unwind::do_call;std::rt::lang_start_internal::{{closure}};std::rt::init;std::sys::pal::unix::init;std::sys::pal::unix::init::sanitize_standard_fds;__GI___poll;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_poll;do_sys_poll;do_poll.constprop.0;tty_poll 162159
mytest;std::rt::lang_start_internal;std::panic::catch_unwind;std::panicking::catch_unwind;std::panicking::catch_unwind::do_call;std::rt::lang_start_internal::{{closure}};std::rt::init;std::sys::pal::unix::init;std::sys::pal::unix::stack_overflow::imp::init;std::sys::pal::unix::stack_overflow::imp::install_main_guard;std::sys::pal::unix::stack_overflow::imp::install_main_guard_linux;std::sys::pal::unix::stack_overflow::imp::stack_start_aligned;std::sys::pal::unix::stack_overflow::imp::get_stack_start;__pthread_getattr_np;__GI___isoc99_sscanf 166637
perf-exec;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;begin_new_exec;perf_event_exec;asm_sysvec_apic_timer_interrupt;sysvec_apic_timer_interrupt;__sysvec_apic_timer_interrupt;hrtimer_interrupt;__hrtimer_run_queues;tick_sched_timer;tick_sched_handle;update_process_times;calc_global_load_tick 37506
perf-exec;entry_SYSCALL_64_after_hwframe;do_syscall_64;x64_sys_call;__x64_sys_execve;do_execveat_common.isra.0;bprm_execve;bprm_execve.part.0;exec_binprm;search_binary_handler;load_elf_binary;begin_new_exec;perf_event_exec;ctx_resched;x86_pmu_enable;intel_pmu_enable_all;native_write_msr 19422

mytest-flamegraph

如何下载

scp filament@your_server_ip:/mnt/hdd-ws/users/filament/mytest/mytest-flamegraph.svg D:\Documents#从服务器下载文件
scp username@servername file_path destination_path#上传文件 
scp local_file_path username@server_ip:server_file_path

参考文章:

DatenLord|Rust程序性能分析 - 知乎

我的程序卡在哪里?—— 用火焰图精确定位性能瓶颈 | Mice World

http://www.hskmm.com/?act=detail&tid=27831

相关文章:

  • PySimpleGUI 如何像VB那样精确布局?
  • 并查集
  • 2025 年干燥机厂家最新推荐排行榜:聚焦实验室 / 工业用优质设备,精选实力企业权威指南喷雾/造粒/工业喷雾/陶瓷喷雾/制粒/奶粉喷雾干燥机厂家推荐
  • aardio fsys.file创建文件,写入文件报错的替代解决办法
  • Mac OS npm报错
  • npm install 遇到的灵异问题
  • Minix3-操作系统学习
  • 有限差分法
  • 电商-订单查询优化方案 - 努力-
  • 2025 年折弯机厂家最新推荐排行榜:数控 / 电液伺服 / 液压机型权威测评,领先企业深度解析
  • 2025 年最新推荐切割机厂家排行榜:全包围 / 半包围激光切割机及金属、等离子切割机优选品牌单平台光纤激光/大功率光纤金属/全自动等离子切割机厂家推荐
  • 2025航空插头实力厂家最新推荐排行榜:技术精湛与品质卓越的
  • Springboot项目开发一览 - 吾辈当奋斗
  • 企业数字化转型必备:纷享销客CRM如何助力企业突破数据瓶颈
  • P7457 [CERC2018] The Bridge on the River Kawaii
  • 2025 年温控器厂家最新推荐排行榜:涵盖电子式/机械式/双恒温/紧凑型温控器等多类型,综合性能、创新与口碑的权威榜单
  • 2025 年 VI 设计企业最新推荐榜:优质机构深度解析,助力企业精准匹配优质品牌视觉解决方案
  • 【Linux基础知识系列:第一百三十九篇】使用Bash编写函数提升脚本功能 - 教程
  • 括号序列构造字典序最小化问题
  • C++ 性能优化:用 CRTP 搭建零开销编译期多态
  • Python 中包(Package)和模块(Module)的区别
  • Elasticsearch Enterprise 9.1.5 (macOS, Linux, Windows) - 分布式搜索和分析引擎
  • GIMP 3.0.6 发布 - 免费开源图像编辑器
  • Elasticsearch Enterprise 8.19.5 (macOS, Linux, Windows) - 分布式搜索和分析引擎
  • 2025 年国内丝杆升降机厂家最新推荐排行榜:滚珠 / 螺旋 / 蜗轮 / 同步 / 电动类型品牌核心优势深度解析
  • Linux设置分辨率(临时)
  • CAD 多个dwg文件合成一张图(无需插件)
  • 鸿蒙应用开发从入门到实战(十八):组件编程思想之代码复用
  • Gerkin+Pytest(python)建立自动化(BDD)
  • git克隆代码保留提交记录,从源仓库迁移到新仓库地址