From df7958a9606a342e3c3ac5a40fc41f3a79669d62 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Tue, 15 Apr 2008 08:27:24 +0000 Subject: amd64 string ops: use alignment more carefully, and comment it. By capping max padding to not be bigger than three next insns, we avoid having ridiculously big NOPs like this one: 53:66 66 66 66 2e 0f 1f nopw %cs:0x0(%rax,%rax,1) 5a:84 00 00 00 00 00 which was bigger than next three insns combined! Size changes: text data bss dec hex filename 102 0 0 102 66 x86_64/memcpy.o 102 0 0 102 66 x86_64.old/memcpy.o 90 0 0 90 5a x86_64/mempcpy.o 102 0 0 102 66 x86_64.old/mempcpy.o 210 0 0 210 d2 x86_64/memset.o 242 0 0 242 f2 x86_64.old/memset.o 213 0 0 213 d5 x86_64/stpcpy.o 220 0 0 220 dc x86_64.old/stpcpy.o 428 0 0 428 1ac x86_64/strcat.o 444 0 0 444 1bc x86_64.old/strcat.o 417 0 0 417 1a1 x86_64/strchr.o 418 0 0 418 1a2 x86_64.old/strchr.o 33 0 0 33 21 x86_64/strcmp.o 33 0 0 33 21 x86_64.old/strcmp.o 213 0 0 213 d5 x86_64/strcpy.o 220 0 0 220 dc x86_64.old/strcpy.o 135 0 0 135 87 x86_64/strcspn.o 151 0 0 151 97 x86_64.old/strcspn.o 225 0 0 225 e1 x86_64/strlen.o 233 0 0 233 e9 x86_64.old/strlen.o 140 0 0 140 8c x86_64/strpbrk.o 156 0 0 156 9c x86_64.old/strpbrk.o 135 0 0 135 87 x86_64/strspn.o 151 0 0 151 97 x86_64.old/strspn.o Also, a few files got their .text alignment relaxed from 16 to 8 bytes, which reduces padding at link time. --- libc/string/x86_64/strchr.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'libc/string/x86_64/strchr.S') diff --git a/libc/string/x86_64/strchr.S b/libc/string/x86_64/strchr.S index c357bfd03..9ef46b7f2 100644 --- a/libc/string/x86_64/strchr.S +++ b/libc/string/x86_64/strchr.S @@ -92,7 +92,8 @@ ENTRY (BP_SYM (strchr)) each of whose bytes is C. This turns each byte that is C into a zero. */ - .p2align 4 + /* Next 3 insns are 10 bytes total, make sure we decode them in one go */ + .p2align 4,,10 4: /* Main Loop is unrolled 4 times. */ /* First unroll. */ @@ -230,8 +231,11 @@ ENTRY (BP_SYM (strchr)) reversed. */ - .p2align 4 /* Align, it's a jump target. */ -3: movq %r9,%rdx /* move to %rdx so that we can access bytes */ + /* Align, it's a jump target. */ + /* Next 3 insns are 9 bytes total, make sure we decode them in one go */ + .p2align 4,,9 +3: + movq %r9,%rdx /* move to %rdx so that we can access bytes */ subq $8,%rax /* correct pointer increment. */ testb %cl, %cl /* is first byte C? */ jz 6f /* yes => return pointer */ @@ -281,7 +285,7 @@ ENTRY (BP_SYM (strchr)) incq %rax 6: - nop + /* nop - huh?? */ retq END (BP_SYM (strchr)) -- cgit v1.2.3