[缺陷描述]: kernel-selftests测试ptrace.vmaccess执行失败。 [重现概率]: 必现 [重现步骤] 1. 下载kernel-6.6.25-2_rc1.an23.src.rpm 2. rpm -i kernel-6.6.25-2_rc1.an23.src.rpm 3. yum-builddep -y /root/rpmbuild/SPECS/kernel.spec rpmbuild -bp /root/rpmbuild/SPECS/kernel.spec cd /root/rpmbuild/BUILD/kernel-6.6.25-2_rc1.an23/linux-6.6.25-2_rc1.an23.aarch64/tools/testing/selftests/ptrace 4. make;./vmaccess [期望结果]: 用例执行PASS [实际结果]: [root@iZbp143ti4ccpaufkzata6Z ptrace]# ./vmaccess TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN global.vmaccess ... # OK global.vmaccess ok 1 global.vmaccess # RUN global.attach ... # attach: Test terminated by timeout # FAIL global.attach not ok 2 global.attach # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 [root@iZbp143ti4ccpaufkzata6Z cgroup]# uname -r 6.6.25-2_rc1.an23.aarch64 [重现环境]: 环境信息:云上ecs [root@iZbp143ti4ccpaufkzata6Z breakpoints]# uname -ra Linux iZbp143ti4ccpaufkzata6Z 6.6.25-2_rc1.an23.aarch64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 15:02:38 CST 2024 aarch64 aarch64 aarch64 GNU/Linux [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# cat /etc/os-release NAME="Anolis OS" VERSION="23" ID="anolis" VERSION_ID="23" PLATFORM_ID="platform:an23" PRETTY_NAME="Anolis OS 23" ANSI_COLOR="0;31" HOME_URL="https://openanolis.cn/" BUG_REPORT_URL="https://bugzilla.openanolis.cn/" [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# df -h Filesystem Size Used Avail Use% Mounted on devtmpfs 4.0M 0 4.0M 0% /dev tmpfs 16G 0 16G 0% /dev/shm tmpfs 6.1G 804K 6.1G 1% /run efivarfs 256K 18K 239K 7% /sys/firmware/efi/efivars /dev/nvme0n1p2 40G 13G 27G 33% / tmpfs 16G 3.1M 16G 1% /tmp /dev/nvme0n1p1 500M 6.5M 494M 2% /boot/efi tmpfs 3.1G 4.0K 3.1G 1% /run/user/0 [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# free -g total used free shared buff/cache available Mem: 30 0 28 0 1 29 Swap: 0 0 0 [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# cat /proc/cmdline BOOT_IMAGE=(hd0,gpt2)/boot/vmlinuz-6.6.25-2_rc1.an23.aarch64 root=UUID=6424d533-3c41-4ad9-89fa-1d3bf8c49fd3 ro rhgb crashkernel=0M-2G:0M,2G-64G:256M,64G-:384M iommu.passthrough=1 iommu.strict=0 cryptomgr.notests cgroup.memory=nokmem rcupdate.rcu_cpu_stall_timeout=300 quiet selinux=1 console=tty0 biosdevname=0 net.ifnames=0 console=ttyAMA0,115200n8 noibrs nvme_core.io_timeout=4294967295 nvme_core.admin_timeout=4294967295 [root@iZbp143ti4ccpaufkzata6Z breakpoints]# [root@iZbp143ti4ccpaufkzata6Z breakpoints]# lscpu Architecture: aarch64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 8 On-line CPU(s) list: 0-7 Vendor ID: ARM BIOS Vendor ID: Alibaba Cloud Model name: Neoverse-N2 BIOS Model name: virt-rhel7.6.0 CPU @ 2.0GHz BIOS CPU family: 1 Model: 0 Thread(s) per core: 1 Core(s) per socket: 8 Socket(s): 1 Stepping: r0p0 BogoMIPS: 100.00 Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm 3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb dcpodp sve2 sveaes svepmull svebitperm svesh a3 svesm4 flagm2 frint svei8mm svebf16 i8mm bf16 dgh Caches (sum of all): L1d: 512 KiB (8 instances) L1i: 512 KiB (8 instances) L2: 8 MiB (8 instances) L3: 64 MiB (1 instance) NUMA: NUMA node(s): 1 NUMA node0 CPU(s): 0-7 Vulnerabilities: Gather data sampling: Not affected Itlb multihit: Not affected L1tf: Not affected Mds: Not affected Meltdown: Not affected Mmio stale data: Not affected Reg file data sampling: Not affected Retbleed: Not affected Spec rstack overflow: Not affected Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl Spectre v1: Mitigation; __user pointer sanitization Spectre v2: Mitigation; CSV2, BHB Srbds: Not affected Tsx async abort: Not affected
测试代码如下: TEST(attach) { int s, k, pid = fork(); if (!pid) { pthread_t pt; pthread_create(&pt, NULL, thread, NULL); pthread_join(pt, NULL); execlp("sleep", "sleep", "2", NULL); } sleep(1); k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); ASSERT_EQ(errno, EAGAIN); ASSERT_EQ(k, -1); k = waitpid(-1, &s, WNOHANG); ASSERT_NE(k, -1); ASSERT_NE(k, 0); ASSERT_NE(k, pid); ASSERT_EQ(WIFEXITED(s), 1); ASSERT_EQ(WEXITSTATUS(s), 0); sleep(1); k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); ASSERT_EQ(k, 0); k = waitpid(-1, &s, 0); ASSERT_EQ(k, pid); ASSERT_EQ(WIFSTOPPED(s), 1); ASSERT_EQ(WSTOPSIG(s), SIGSTOP); k = ptrace(PTRACE_DETACH, pid, 0L, 0L); ASSERT_EQ(k, 0); k = waitpid(-1, &s, 0); ASSERT_EQ(k, pid); ASSERT_EQ(WIFEXITED(s), 1); ASSERT_EQ(WEXITSTATUS(s), 0); k = waitpid(-1, NULL, 0); ASSERT_EQ(k, -1); ASSERT_EQ(errno, ECHILD); } strace可以看到,用例timeout了 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xffff903abed0) = 991697 rt_sigaction(SIGALRM, {sa_handler=0x401858, sa_mask=[], sa_flags=SA_SIGINFO}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0 setitimer(ITIMER_REAL, {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=30, tv_usec=0}}, Parent process after fork {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=0, tv_usec=0}}) = 0 wait4(991697, Trying to attach with ptrace 0xffffec642d5c, 0, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set) --- SIGALRM {si_signo=SIGALRM, si_code=SI_KERNEL} --- kill(-991697, SIGKILL) = 0 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call) setitimer(ITIMER_REAL, {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=0, tv_usec=0}}, {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=0, tv_usec=0}}) = 0 rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, NULL, 8) = 0 write(2, "# attach: Test terminated by tim"..., 37# attach: Test terminated by timeout ) = 37 write(1, "# FAIL global.attach\n", 31# FAIL global.attach ) = 31 --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=991697, si_uid=0, si_status=SIGKILL, si_utime=0, si_stime=0} --- write(1, "not ok 2 global.attach\n", 23not ok 2 global.attach ) = 23 munmap(0xffff903aa000, 1024) = 0 write(1, "# FAILED: 1 / 2 tests passed.\n", 30# FAILED: 1 / 2 tests passed. ) = 30 write(1, "# Totals: pass:1 fail:1 xfail:0 "..., 55# Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 ) = 55 exit_group(1) = ? +++ exited with 1 +++ 查了一下,社区已经有解决的patch。 https://lore.kernel.org/lkml/20230701145833.3604187-1-zhengyejian1@huawei.com/T/ 研发看看是不是将src包中的kernel-selftest代码更新一下。
社区 6.9 内核还存在这个问题,https://lore.kernel.org/lkml/20230701145833.3604187-1-zhengyejian1@huawei.com/T/ 的补丁还未被正式接受,建议等社区正式合入
记录下 anolis23 x86环境6.6.25-2.2_rc1.an23.x86_64内核系统下这个问题还存在 -------------------------- [root@5f9Lab15 ptrace]# ./vmaccess TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN global.vmaccess ... # OK global.vmaccess ok 1 global.vmaccess # RUN global.attach ... # attach: Test terminated by timeout # FAIL global.attach not ok 2 global.attach # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 [root@5f9Lab15 ptrace]# uname -r 6.6.25-2.2_rc1.an23.x86_64
ANCK6.6.71-3 rc1 kernel-selftests测试套ptrace.vmaccess也存在这个问题 #./vmaccess TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN global.vmaccess ... # OK global.vmaccess ok 1 global.vmaccess # RUN global.attach ... # attach: Test terminated by timeout # FAIL global.attach not ok 2 global.attach # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 #uname -r 6.6.71-3_rc1.al8.aarch64
alinux3,6.6.71-3_rc2.al8.x86_64内核版本上也存在同样的问题: #./vmaccess TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN global.vmaccess ... # OK global.vmaccess ok 1 global.vmaccess # RUN global.attach ... # attach: Test terminated by timeout # FAIL global.attach not ok 2 global.attach # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0
6.6.88-cbp.git.5ad4b5ac3.an23.x86_64内核也有这个报错 # ./vmaccess TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN global.vmaccess ... # OK global.vmaccess ok 1 global.vmaccess # RUN global.attach ... # attach: Test terminated by timeout # FAIL global.attach not ok 2 global.attach # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 # uname -r 6.6.88-cbp.git.5ad4b5ac3.an23.x86_64 # cat /etc/anolis-release Anolis OS release 23