180688 000000000009f150 <memcpy@@GLIBC_2.14>:
180689 ¦9f150: 48 8b 0d 01 bd 34 00 mov 0x34bd01(%rip),%rcx # 3eae58 <_rtld_global_ro@GLIBC_PRIVATE>
180690 ¦9f157: 48 8d 05 d2 c3 01 00 lea 0x1c3d2(%rip),%rax # bb530 <memcpy@GLIBC_2.2.5+0x60>
180691 ¦9f15e: 8b 91 b4 00 00 00 mov 0xb4(%rcx),%edx
180692 ¦9f164: f7 c2 00 00 08 00 test $0x80000,%edx
180693 ¦9f16a: 75 59 jne 9f1c5 <memcpy@@GLIBC_2.14+0x75>
180694 ¦9f16c: 89 d0 mov %edx,%eax
180695 ¦9f16e: 25 00 10 10 00 and $0x101000,%eax
180696 ¦9f173: 3d 00 10 00 00 cmp $0x1000,%eax
180697 ¦9f178: 74 56 je 9f1d0 <memcpy@@GLIBC_2.14+0x80>
180698 ¦9f17a: f6 c6 08 test $0x8,%dh
180699 ¦9f17d: 0f 85 7d 00 00 00 jne 9f200 <memcpy@@GLIBC_2.14+0xb0>
180700 ¦9f183: f6 41 79 02 testb $0x2,0x79(%rcx)
180701 ¦9f187: 74 08 je 9f191 <memcpy@@GLIBC_2.14+0x41>
180702 ¦9f189: f7 c2 00 00 04 00 test $0x40000,%edx
180703 ¦9f18f: 74 1f je 9f1b0 <memcpy@@GLIBC_2.14+0x60>
180704 ¦9f191: f6 81 85 00 00 00 02 testb $0x2,0x85(%rcx)
180705 ¦9f198: 48 8d 15 f1 c3 01 00 lea 0x1c3f1(%rip),%rdx # bb590 <memcpy@GLIBC_2.2.5+0xc0>
180706 ¦9f19f: 48 8d 05 2a c3 01 00 lea 0x1c32a(%rip),%rax # bb4d0 <memcpy@GLIBC_2.2.5>
180707 ¦9f1a6: 48 0f 45 c2 cmovne %rdx,%rax
180708 ¦9f1aa: c3 retq
180709 ¦9f1ab: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
180710 ¦9f1b0: 83 e2 02 and $0x2,%edx
180711 ¦9f1b3: 48 8d 05 06 d8 0c 00 lea 0xcd806(%rip),%rax # 16c9c0 <__nss_group_lookup@GLIBC_2.2.5+0x2a10>
180712 ¦9f1ba: 48 8d 15 6f 2d 0d 00 lea 0xd2d6f(%rip),%rdx # 171f30 <__nss_group_lookup@GLIBC_2.2.5+0x7f80>
180713 ¦9f1c1: 48 0f 45 c2 cmovne %rdx,%rax
180714 ¦9f1c5: f3 c3 repz retq
180715 ¦9f1c7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
180716 ¦9f1ce: 00 00
180717 ¦9f1d0: 81 e2 00 00 02 00 and $0x20000,%edx
180718 ¦9f1d6: 48 8d 05 f3 34 0f 00 lea 0xf34f3(%rip),%rax # 1926d0 <__nss_group_lookup@GLIBC_2.2.5+0x28720>
180719 ¦9f1dd: 75 e6 jne 9f1c5 <memcpy@@GLIBC_2.14+0x75>
180720 ¦9f1df: f6 81 85 00 00 00 02 testb $0x2,0x85(%rcx)
180721 ¦9f1e6: 48 8d 15 43 3f 0f 00 lea 0xf3f43(%rip),%rdx # 193130 <__nss_group_lookup@GLIBC_2.2.5+0x29180>
180722 ¦9f1ed: 48 8d 05 cc 3e 0f 00 lea 0xf3ecc(%rip),%rax # 1930c0 <__nss_group_lookup@GLIBC_2.2.5+0x29110>
180723 ¦9f1f4: 48 0f 45 c2 cmovne %rdx,%rax
180724 ¦9f1f8: c3 retq
180725 ¦9f1f9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
180726 ¦9f200: f6 81 85 00 00 00 02 testb $0x2,0x85(%rcx)
180727 ¦9f207: 48 8d 15 32 fb 0e 00 lea 0xefb32(%rip),%rdx # 18ed40 <__nss_group_lookup@GLIBC_2.2.5+0x24d90>
180728 ¦9f20e: 48 8d 05 cb fa 0e 00 lea 0xefacb(%rip),%rax # 18ece0 <__nss_group_lookup@GLIBC_2.2.5+0x24d30>
180729 ¦9f215: 48 0f 45 c2 cmovne %rdx,%rax
180730 ¦9f219: c3 retq
180731 ¦9f21a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
The above code is dumped by objdump application. I have the following questions.
movups xmm0, ptr qword[rsi]
movups ptr qword[rdi], xmm0
but in this function, we could not see such read-then-write code. How does it perform copy operation?This looks like glibc's dynamic linking resolver function that returns a function pointer (in %rax
) based on CPUID, e.g. selecting the AVX version on CPUs that support it.
This resolver override runs once, during dynamic linking (e.g. on the first call if you use lazy dynamic linking).
And yes, this is the code you'll find at the symbol name memcpy
.
Related: perf report shows this function "__memset_avx2_unaligned_erms" has overhead. does this mean memory is unaligned? talks about that memset implementation, which the generic dispatcher selected on that asker's CPU.
Note that glibc is open source, you can look at the hand-written commented asm source like for its memcpy / memmove implementations, e.g. https://code.woboq.org/userspace/glibc/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S.html