diff options
Diffstat (limited to 'libc/string')
-rw-r--r-- | libc/string/bfin/memchr.S | 4 | ||||
-rw-r--r-- | libc/string/bfin/strcmp.S | 80 | ||||
-rw-r--r-- | libc/string/generic/strtok_r.c | 14 | ||||
-rw-r--r-- | libc/string/ia64/bzero.S | 136 | ||||
-rw-r--r-- | libc/string/ia64/memccpy.S | 102 | ||||
-rw-r--r-- | libc/string/ia64/memchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/memcmp.S | 94 | ||||
-rw-r--r-- | libc/string/ia64/memcpy.S | 160 | ||||
-rw-r--r-- | libc/string/ia64/memmove.S | 170 | ||||
-rw-r--r-- | libc/string/ia64/memset.S | 178 | ||||
-rw-r--r-- | libc/string/ia64/strchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/strcmp.S | 2 | ||||
-rw-r--r-- | libc/string/ia64/strcpy.S | 66 | ||||
-rw-r--r-- | libc/string/ia64/strlen.S | 18 | ||||
-rw-r--r-- | libc/string/ia64/strncmp.S | 10 | ||||
-rw-r--r-- | libc/string/ia64/strncpy.S | 90 | ||||
-rw-r--r-- | libc/string/sh64/memcpy.S | 2 | ||||
-rw-r--r-- | libc/string/sh64/memset.S | 29 | ||||
-rw-r--r-- | libc/string/sh64/strcpy.S | 28 | ||||
-rw-r--r-- | libc/string/xtensa/memcpy.S | 22 | ||||
-rw-r--r-- | libc/string/xtensa/memset.S | 12 | ||||
-rw-r--r-- | libc/string/xtensa/strcmp.S | 148 | ||||
-rw-r--r-- | libc/string/xtensa/strcpy.S | 72 | ||||
-rw-r--r-- | libc/string/xtensa/strlen.S | 56 | ||||
-rw-r--r-- | libc/string/xtensa/strncpy.S | 150 |
25 files changed, 854 insertions, 853 deletions
diff --git a/libc/string/bfin/memchr.S b/libc/string/bfin/memchr.S index 88e46bef6..26d419f7c 100644 --- a/libc/string/bfin/memchr.S +++ b/libc/string/bfin/memchr.S @@ -25,8 +25,8 @@ .weak _memchr ENTRY(_memchr) - P0 = R0; // P0 = address - P2 = R2; // P2 = count + P0 = R0; /* P0 = address */ + P2 = R2; /* P2 = count */ R1 = R1.B(Z); CC = R2 == 0; IF CC JUMP .Lfailed; diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S index 12e8c53c6..ef23aa9ab 100644 --- a/libc/string/bfin/strcmp.S +++ b/libc/string/bfin/strcmp.S @@ -29,66 +29,66 @@ ENTRY(_strcmp) p1 = r0; p2 = r1; - p0 = -1; // (need for loop counter init) + p0 = -1; /* (need for loop counter init) */ - // check if byte aligned - r0 = r0 | r1; // check both pointers at same time - r0 <<= 30; // dump all but last 2 bits - cc = az; // are they zero? - if !cc jump .Lunaligned; // no; use unaligned code. - // fall-thru for aligned case.. + /* check if byte aligned */ + r0 = r0 | r1; /* check both pointers at same time */ + r0 <<= 30; /* dump all but last 2 bits */ + cc = az; /* are they zero? */ + if !cc jump .Lunaligned; /* no; use unaligned code. */ + /* fall-thru for aligned case.. */ - // note that r0 is zero from the previous... - // p0 set to -1 + /* note that r0 is zero from the previous... */ + /* p0 set to -1 */ LSETUP (.Lbeginloop, .Lendloop) lc0=p0; - // pick up first words + /* pick up first words */ r1 = [p1++]; r2 = [p2++]; - // make up mask: 0FF0FF + /* make up mask: 0FF0FF */ r7 = 0xFF; r7.h = 0xFF; - // loop : 9 cycles to check 4 characters + /* loop : 9 cycles to check 4 characters */ cc = r1 == r2; .Lbeginloop: - if !cc jump .Lnotequal4; // compare failure, exit loop + if !cc jump .Lnotequal4; /* compare failure, exit loop */ - // starting with 44332211 - // see if char 3 or char 1 is 0 - r3 = r1 & r7; // form 00330011 - // add to zero, and (r2 is free, reload) + /* starting with 44332211 */ + /* see if char 3 or char 1 is 0 */ + r3 = r1 & r7; /* form 00330011 */ + /* add to zero, and (r2 is free, reload) */ r6 = r3 +|+ r0 || r2 = [p2++] || nop; - cc = az; // true if either is zero - r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020) - // (trick, saves having another mask) - // add to zero, and (r1 is free, reload) + cc = az; /* true if either is zero */ + r3 = r1 ^ r3; /* form 44002200 (4321^0301 => 4020) */ + /* (trick, saves having another mask) */ + /* add to zero, and (r1 is free, reload) */ r6 = r3 +|+ r0 || r1 = [p1++] || nop; - cc |= az; // true if either is zero - if cc jump .Lzero4; // leave if a zero somewhere + cc |= az; /* true if either is zero */ + if cc jump .Lzero4; /* leave if a zero somewhere */ .Lendloop: cc = r1 == r2; - // loop exits -.Lnotequal4: // compare failure on 4-char compare - // address pointers are one word ahead; - // faster to use zero4 exit code + /* loop exits */ +.Lnotequal4: /* compare failure on 4-char compare */ + /* address pointers are one word ahead; */ + /* faster to use zero4 exit code */ p1 += 4; p2 += 4; -.Lzero4: // one of the bytes in word 1 is zero - // but we've already fetched the next word; so - // backup two to look at failing word again +.Lzero4: /* one of the bytes in word 1 is zero */ + /* but we've already fetched the next word; so */ + /* backup two to look at failing word again */ p1 += -8; p2 += -8; - // here when pointers are unaligned: checks one - // character at a time. Also use at the end of - // the word-check algorithm to figure out what happened + /* here when pointers are unaligned: checks one */ + /* character at a time. Also use at the end of */ + /* the word-check algorithm to figure out what happened */ .Lunaligned: - // R0 is non-zero from before. - // p0 set to -1 + /* R0 is non-zero from before. */ + /* p0 set to -1 */ r0 = 0 (Z); r1 = B[p1++] (Z); @@ -96,18 +96,18 @@ ENTRY(_strcmp) LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0; .Lbeginloop1: - cc = r1; // first char must be non-zero - // chars must be the same + cc = r1; /* first char must be non-zero */ + /* chars must be the same */ r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop; cc &= az; - r3 = r0 - r2; // second char must be non-zero + r3 = r0 - r2; /* second char must be non-zero */ cc &= an; if !cc jump .Lexitloop1; .Lendloop1: r2 = B[p2++] (Z); -.Lexitloop1: // here means we found a zero or a difference. - // we have r2(N), p2(N), r1(N+1), p1(N+2) +.Lexitloop1: /* here means we found a zero or a difference. */ + /* we have r2(N), p2(N), r1(N+1), p1(N+2) */ r1=B[p1+ -2] (Z); r0 = r1 - r2; (r7:4) = [sp++]; diff --git a/libc/string/generic/strtok_r.c b/libc/string/generic/strtok_r.c index d082d226e..7648212f7 100644 --- a/libc/string/generic/strtok_r.c +++ b/libc/string/generic/strtok_r.c @@ -29,17 +29,17 @@ # define __rawmemchr strchr /* Experimentally off - libc_hidden_proto(strchr) */ #endif - -/* Parse S into tokens separated by characters in DELIM. +#if 0 + Parse S into tokens separated by characters in DELIM. If S is NULL, the saved pointer in SAVE_PTR is used as the next starting point. For example: char s[] = "-abc-=-def"; char *sp; - x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" - x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL - x = strtok_r(NULL, "=", &sp); // x = NULL - // s = "abc\0-def\0" -*/ + x = strtok_r(s, "-", &sp); /* x = "abc", sp = "=-def" */ + x = strtok_r(NULL, "-=", &sp); /* x = "def", sp = NULL */ + x = strtok_r(NULL, "=", &sp); /* x = NULL */ + /* s = "abc\0-def\0" */ +#endif char *strtok_r (char *s, const char *delim, char **save_ptr) { char *token; diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S index d390838a6..1f0f8b7ac 100644 --- a/libc/string/ia64/bzero.S +++ b/libc/string/ia64/bzero.S @@ -47,13 +47,13 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_unalgn p9 #define p_y p11 #define p_n p12 @@ -65,7 +65,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -87,49 +87,49 @@ ENTRY(bzero) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value + mov ret0 = dest /* return value */ nop.m 0 cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest nop.i 0 -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 - sub bytecnt = (MIN1+1), tmp // higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 */ + sub bytecnt = (MIN1+1), tmp /* higher than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = r0,-4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = r0,-2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi nop.m 0 (p_y) st2 [ptr2] = r0,-1 @@ -138,44 +138,44 @@ ENTRY(bzero) { .mmi (p_yy) st1 [ptr2] = r0 - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib - nop.m 0 + nop.m 0 shr.u linecnt = cnt, LSIZE_SH nop.b 0 ;; } .align 32 -.l1b: // ------------------// L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -190,7 +190,7 @@ ENTRY(bzero) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -198,14 +198,14 @@ ENTRY(bzero) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -213,11 +213,11 @@ ENTRY(bzero) (p_scr) br.cond.dpnt.many .store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } .align 32 -.l2: // -----------------------------// L2A: store 32B in 2 cycles +.l2: /* ----------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -228,38 +228,38 @@ ENTRY(bzero) ;; } .store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store +(p_y) store [ptr1] = myval, 8 /* store */ (p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store +{ .mmi /* store */ (p_yy) store [ptr1] = myval, 8 -(p_yy) add cnt = -8, cnt // subtract +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = r0,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = r0,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? + tbit.nz.unc p_y, p0 = cnt, 0 /* should we terminate with a st1 ? */ ;; } { .mib @@ -281,38 +281,38 @@ ENTRY(bzero) (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = r0, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = r0, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = r0 // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = r0 // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = r0 /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = r0 /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(bzero) diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S index 1afba3637..259d680bc 100644 --- a/libc/string/ia64/memccpy.S +++ b/libc/string/ia64/memccpy.S @@ -23,7 +23,7 @@ Inputs: in0: dest in1: src - in2: char + in2: char in3: byte count This implementation assumes little endian mode (UM.be = 0). @@ -69,75 +69,75 @@ ENTRY(memccpy) .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] .rotp p[MEMLAT + 6 + 1] - mov ret0 = r0 // return NULL if no match + mov ret0 = r0 /* return NULL if no match */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers - mov dest = in0 // dest + mov saved_pr = pr /* save the predicate registers */ + mov dest = in0 /* dest */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov saved_ec = ar.ec // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ + mov saved_ec = ar.ec /* save the loop counter */ .body - mov src = in1 // src - extr.u char = in2, 0, 8 // char - mov len = in3 // len - sub tmp = r0, in0 // tmp = -dest - cmp.ne p7, p0 = r0, r0 // clear p7 + mov src = in1 /* src */ + extr.u char = in2, 0, 8 /* char */ + mov len = in3 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - mov ar.ec = 0 // ec not guaranteed zero on entry -(p6) br.cond.spnt .cpyfew // copy byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + mov ar.ec = 0 /* ec not guaranteed zero on entry */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 mux1 charx8 = char, @brcst (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ cmp.eq p6, p0 = value, char (p6) br.cond.spnt .foundit br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 6 + 1 // six more passes needed - ld8 r[1] = [asrc], 8 // r[1] = w0 - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 6 + 1 /* six more passes needed */ + ld8 r[1] = [asrc], 8 /* r[1] = w0 */ + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ ALIGN(32) .l2: -(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[0]) ld8.s r[0] = [asrc], 8 /* r[0] = w1 */ +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) xor tmp3[0] = val[1], charx8 (p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1] -(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't - // valid - rollback! +(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 /* our data isn't */ + /* valid - rollback! */ (p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1] (p6) br.cond.spnt .gotit -(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+6]) st8 [dest] = val[3], 8 /* store val to dest */ +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ br.ctop.sptk .l2 br.cond.sptk .cpyfew .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 2 + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 2 + 1 ;; /* set EC */ .l3: (p[0]) ld8.s r[0] = [src], 8 (p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8 @@ -149,8 +149,8 @@ ENTRY(memccpy) (p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -163,14 +163,14 @@ ENTRY(memccpy) .foundit: (p6) mov ret0 = dest .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter - mov ar.ec = saved_ec ;; // restore the epilog counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ + mov ar.ec = saved_ec ;; /* restore the epilog counter */ br.ret.sptk.many b0 .gotit: .pred.rel "mutex" p6, p7 -(p6) mov value = val[3] // if coming from l2 -(p7) mov value = r[MEMLAT+2] // if coming from l3 +(p6) mov value = val[3] /* if coming from l2 */ +(p7) mov value = r[MEMLAT+2] /* if coming from l3 */ mov ar.lc = pos0[1] ;; .l5: extr.u tmp = value, 0, 8 ;; diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S index 2bf078fe6..0246b5997 100644 --- a/libc/string/ia64/memchr.S +++ b/libc/string/ia64/memchr.S @@ -62,18 +62,18 @@ ENTRY(__memchr) .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2] .rotp p[MEMLAT+3] .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .save pr, saved_pr - mov saved_pr = pr // save the predicates + mov saved_pr = pr /* save the predicates */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 - cmp.ne p7, p0 = r0, r0 // clear p7 - extr.u chr = in1, 0, 8 // chr = (unsigned char) in1 + and tmp = 7, str /* tmp = str % 8 */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */ mov len = in2 - cmp.gtu p6, p0 = 16, in2 // use a simple loop for short -(p6) br.cond.spnt .srchfew ;; // searches - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */ +(p6) br.cond.spnt .srchfew ;; /* searches */ + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; sub len = len, loopcnt @@ -86,12 +86,12 @@ ENTRY(__memchr) (p6) br.cond.spnt .foundit br.cloop.sptk .l1 ;; .str_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // remaining len = len & 7 + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* remaining len = len & 7 */ adds loopcnt = -1, loopcnt mov ar.ec = MEMLAT + 3 - mux1 chrx8 = chr, @brcst ;; // get a word full of chr + mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */ mov ar.lc = loopcnt mov pr.rot = 1 << 16 ;; .l2: @@ -114,12 +114,12 @@ ENTRY(__memchr) (p6) br.cond.dpnt .foundit br.cloop.sptk .l3 ;; .notfound: - cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here) - mov ret0 = r0 ;; // return NULL + cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */ + mov ret0 = r0 ;; /* return NULL */ .foundit: .pred.rel "mutex" p6, p7 -(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3 -(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2 +(p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */ +(p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */ mov pr = saved_pr, -1 mov ar.lc = saved_lc br.ret.sptk.many b0 diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S index 8b0c096ce..adb1a20de 100644 --- a/libc/string/ia64/memcmp.S +++ b/libc/string/ia64/memcmp.S @@ -28,7 +28,7 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 + shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 shr.u tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 >> sh2 and all the mux1 instructions should be replaced by plain mov's. */ @@ -36,8 +36,8 @@ #include "sysdep.h" #undef ret -#define OP_T_THRES 16 -#define OPSIZ 8 +#define OP_T_THRES 16 +#define OPSIZ 8 #define MEMLAT 2 #define start r15 @@ -56,85 +56,85 @@ ENTRY(memcmp) .prologue - alloc r2 = ar.pfs, 3, 37, 0, 40 + alloc r2 = ar.pfs, 3, 37, 0, 40 .rotr r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2] .rotp p[MEMLAT + 4 + 1] - mov ret0 = r0 // by default return value = 0 + mov ret0 = r0 /* by default return value = 0 */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp = r0, in0 // tmp = -dest + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES -(p6) br.cond.spnt .cmpfew // compare byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ +(p6) br.cond.spnt .cmpfew /* compare byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value1 = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value1 = [src], 1 /* value = *src++ */ ld1 value2 = [dest], 1 ;; cmp.ne p6, p0 = value1, value2 (p6) br.cond.spnt .done br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 4 + 1 // four more passes needed - ld8 r[1] = [asrc], 8 ;; // r[1] = w0 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 4 + 1 /* four more passes needed */ + ld8 r[1] = [asrc], 8 ;; /* r[1] = w0 */ .align 32 -// We enter this loop with p6 cleared by the above comparison +/* We enter this loop with p6 cleared by the above comparison */ .l2: -(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1 +(p[0]) ld8 r[0] = [asrc], 8 /* r[0] = w1 */ (p[0]) ld8 q[0] = [dest], 8 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) cmp.ne p6, p0 = q[MEMLAT + 4], val[1] -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ (p6) br.cond.spnt .l2exit br.ctop.sptk .l2 br.cond.sptk .cmpfew .l3exit: mux1 value1 = r[MEMLAT], @rev mux1 value2 = q[MEMLAT], @rev - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ .l2exit: (p6) mux1 value1 = val[1], @rev (p6) mux1 value2 = q[MEMLAT + 4], @rev ;; cmp.ltu p6, p7 = value2, value1 ;; (p6) mov ret0 = -1 (p7) mov ret0 = 1 - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 1 ;; /* set EC */ .l3: (p[0]) ld8 r[0] = [src], 8 (p[0]) ld8 q[0] = [dest], 8 @@ -142,8 +142,8 @@ ENTRY(memcmp) (p6) br.cond.spnt .l3exit br.ctop.dptk .l3 ;; .cmpfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -154,10 +154,10 @@ ENTRY(memcmp) (p6) br.cond.spnt .done br.cloop.dptk .l4 ;; .done: -(p6) sub ret0 = value2, value1 // don't execute it if falling thru +(p6) sub ret0 = value2, value1 /* don't execute it if falling thru */ .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 END(memcmp) libc_hidden_def (memcmp) diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S index 810eb0c0e..6c48a72d9 100644 --- a/libc/string/ia64/memcpy.S +++ b/libc/string/ia64/memcpy.S @@ -42,8 +42,8 @@ #define LFETCH_DIST 500 -#define ALIGN_UNROLL_no 4 // no. of elements -#define ALIGN_UNROLL_sh 2 // (shift amount) +#define ALIGN_UNROLL_no 4 /* no. of elements */ +#define ALIGN_UNROLL_sh 2 /* (shift amount) */ #define MEMLAT 8 #define Nrot ((4*(MEMLAT+2) + 7) & ~7) @@ -168,76 +168,76 @@ ENTRY(memcpy) .rotr r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] .rotp p[MEMLAT+2] .rotf fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] - mov ret0 = in0 // return tmp2 = dest + mov ret0 = in0 /* return tmp2 = dest */ .save pr, saved_pr - movi0 saved_pr = pr // save the predicate registers + movi0 saved_pr = pr /* save the predicate registers */ } { .mmi - and tmp4 = 7, in0 // check if destination is aligned - mov dest = in0 // dest - mov src = in1 // src + and tmp4 = 7, in0 /* check if destination is aligned */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ ;; } { .mii - cmp.eq p_scr, p0 = in2, r0 // if (len == 0) + cmp.eq p_scr, p0 = in2, r0 /* if (len == 0) */ .save ar.lc, saved_lc - movi0 saved_lc = ar.lc // save the loop counter + movi0 saved_lc = ar.lc /* save the loop counter */ .body - cmp.ge p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH + cmp.ge p_few, p0 = OP_T_THRES, in2 /* is len <= OP_T_THRESH */ } { .mbb - mov len = in2 // len -(p_scr) br.cond.dpnt.few .restore_and_exit // Branch no. 1: return dest -(p_few) br.cond.dpnt.many .copy_bytes // Branch no. 2: copy byte by byte + mov len = in2 /* len */ +(p_scr) br.cond.dpnt.few .restore_and_exit /* Branch no. 1: return dest */ +(p_few) br.cond.dpnt.many .copy_bytes /* Branch no. 2: copy byte by byte */ ;; } { .mmi #if defined(USE_LFETCH) - lfetch.nt1 [dest] // - lfetch.nt1 [src] // + lfetch.nt1 [dest] /* */ + lfetch.nt1 [src] /* */ #endif - shr.u elemcnt = len, 3 // elemcnt = len / 8 + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ } { .mib - cmp.eq p_scr, p0 = tmp4, r0 // is destination aligned? - sub loopcnt = 7, tmp4 // + cmp.eq p_scr, p0 = tmp4, r0 /* is destination aligned? */ + sub loopcnt = 7, tmp4 /* */ (p_scr) br.cond.dptk.many .dest_aligned ;; } { .mmi - ld1 tmp2 = [src], 1 // - sub len = len, loopcnt, 1 // reduce len - movi0 ar.lc = loopcnt // + ld1 tmp2 = [src], 1 /* */ + sub len = len, loopcnt, 1 /* reduce len */ + movi0 ar.lc = loopcnt /* */ } { .mib - cmp.ne p_scr, p0 = 0, loopcnt // avoid loading beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid loading beyond end-point */ ;; } -.l0: // ---------------------------- // L0: Align src on 8-byte boundary +.l0: /* ---------------------------- L0: Align src on 8-byte boundary */ { .mmi - st1 [dest] = tmp2, 1 // -(p_scr) ld1 tmp2 = [src], 1 // + st1 [dest] = tmp2, 1 /* */ +(p_scr) ld1 tmp2 = [src], 1 /* */ } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt - br.cloop.dptk.few .l0 // + br.cloop.dptk.few .l0 /* */ ;; } .dest_aligned: { .mmi - and tmp4 = 7, src // ready for alignment check - shr.u elemcnt = len, 3 // elemcnt = len / 8 + and tmp4 = 7, src /* ready for alignment check */ + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ ;; } { .mib - cmp.ne p_scr, p0 = tmp4, r0 // is source also aligned - tbit.nz p_xtr, p_nxtr = src, 3 // prepare a separate move if src -} { .mib // is not 16B aligned - add ptr2 = LFETCH_DIST, dest // prefetch address + cmp.ne p_scr, p0 = tmp4, r0 /* is source also aligned */ + tbit.nz p_xtr, p_nxtr = src, 3 /* prepare a separate move if src */ +} { .mib /* is not 16B aligned */ + add ptr2 = LFETCH_DIST, dest /* prefetch address */ add ptr1 = LFETCH_DIST, src (p_scr) br.cond.dptk.many .src_not_aligned ;; } -// The optimal case, when dest, and src are aligned +/* The optimal case, when dest, and src are aligned */ .both_aligned: { .mmi .pred.rel "mutex",p_xtr,p_nxtr -(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify -(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt // Need only N to qualify - movi0 pr.rot = 1 << 16 // set rotating predicates +(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt /* Need N + 1 to qualify */ +(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt /* Need only N to qualify */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ } { .mib (p_scr) br.cond.dpnt.many .copy_full_words ;; } @@ -245,21 +245,21 @@ ENTRY(memcpy) { .mmi (p_xtr) load tempreg = [src], 8 (p_xtr) add elemcnt = -1, elemcnt - movi0 ar.ec = MEMLAT + 1 // set the epilog counter + movi0 ar.ec = MEMLAT + 1 /* set the epilog counter */ ;; } { .mmi -(p_xtr) add len = -8, len // - add asrc = 16, src // one bank apart (for USE_INT) - shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh // cater for unrolling +(p_xtr) add len = -8, len /* */ + add asrc = 16, src /* one bank apart (for USE_INT) */ + shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh /* cater for unrolling */ ;;} { .mmi add loopcnt = -1, loopcnt -(p_xtr) store [dest] = tempreg, 8 // copy the "extra" word +(p_xtr) store [dest] = tempreg, 8 /* copy the "extra" word */ nop.i 0 ;; } { .mib add adest = 16, dest - movi0 ar.lc = loopcnt // set the loop counter + movi0 ar.lc = loopcnt /* set the loop counter */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -268,7 +268,7 @@ ENTRY(memcpy) .align 32 #endif #if defined(USE_FLP) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi #if defined(USE_LFETCH) (p[0]) lfetch.nt1 [ptr2],32 @@ -290,7 +290,7 @@ ENTRY(memcpy) br.ctop.dptk.many .l1 ;; } #elif defined(USE_INT) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi (p[0]) load the_r[0] = [src], 8 (p[0]) load the_q[0] = [asrc], 8 @@ -317,58 +317,58 @@ ENTRY(memcpy) .copy_full_words: { .mib - cmp.gt p_scr, p0 = 8, len // - shr.u elemcnt = len, 3 // + cmp.gt p_scr, p0 = 8, len /* */ + shr.u elemcnt = len, 3 /* */ (p_scr) br.cond.dpnt.many .copy_bytes ;; } { .mii load tempreg = [src], 8 - add loopcnt = -1, elemcnt // + add loopcnt = -1, elemcnt /* */ ;; } { .mii - cmp.ne p_scr, p0 = 0, loopcnt // - mov ar.lc = loopcnt // + cmp.ne p_scr, p0 = 0, loopcnt /* */ + mov ar.lc = loopcnt /* */ ;; } -.l2: // ------------------------------- // L2: Max 4 words copied separately +.l2: /* ------------------------------- L2: Max 4 words copied separately */ { .mmi store [dest] = tempreg, 8 -(p_scr) load tempreg = [src], 8 // +(p_scr) load tempreg = [src], 8 /* */ add len = -8, len } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l2 ;; } .copy_bytes: { .mib - cmp.eq p_scr, p0 = len, r0 // is len == 0 ? - add loopcnt = -1, len // len--; + cmp.eq p_scr, p0 = len, r0 /* is len == 0 ? */ + add loopcnt = -1, len /* len--; */ (p_scr) br.cond.spnt .restore_and_exit ;; } { .mii ld1 tmp2 = [src], 1 movi0 ar.lc = loopcnt - cmp.ne p_scr, p0 = 0, loopcnt // avoid load beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid load beyond end-point */ ;; } -.l3: // ------------------------------- // L3: Final byte move +.l3: /* ------------------------------- L3: Final byte move */ { .mmi st1 [dest] = tmp2, 1 (p_scr) ld1 tmp2 = [src], 1 } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l3 ;; } .restore_and_exit: { .mmi - movi0 pr = saved_pr, -1 // restore the predicate registers + movi0 pr = saved_pr, -1 /* restore the predicate registers */ ;; } { .mib - movi0 ar.lc = saved_lc // restore the loop counter + movi0 ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 ;; } @@ -376,41 +376,41 @@ ENTRY(memcpy) .src_not_aligned: { .mmi cmp.gt p_scr, p0 = 16, len - and sh1 = 7, src // sh1 = src % 8 - shr.u loopcnt = len, 4 // element-cnt = len / 16 + and sh1 = 7, src /* sh1 = src % 8 */ + shr.u loopcnt = len, 4 /* element-cnt = len / 16 */ } { .mib add tmp4 = @ltoff(.table), gp add tmp3 = @ltoff(.loop56), gp -(p_scr) br.cond.dpnt.many .copy_bytes // do byte by byte if too few +(p_scr) br.cond.dpnt.many .copy_bytes /* do byte by byte if too few */ ;; } { .mmi - and asrc = -8, src // asrc = (-8) -- align src for loop - add loopcnt = -1, loopcnt // loopcnt-- - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) + and asrc = -8, src /* asrc = (-8) -- align src for loop */ + add loopcnt = -1, loopcnt /* loopcnt-- */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ } { .mmi - ld8 ptable = [tmp4] // ptable = &table - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - and tmp2 = -16, len // tmp2 = len & -OPSIZ + ld8 ptable = [tmp4] /* ptable = &table */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + and tmp2 = -16, len /* tmp2 = len & -OPSIZ */ ;; } { .mmi - add tmp3 = ptable, sh1 // tmp3 = &table + sh1 - add src = src, tmp2 // src += len & (-16) - movi0 ar.lc = loopcnt // set LC + add tmp3 = ptable, sh1 /* tmp3 = &table + sh1 */ + add src = src, tmp2 /* src += len & (-16) */ + movi0 ar.lc = loopcnt /* set LC */ ;; } { .mmi - ld8 tmp4 = [tmp3] // tmp4 = loop offset - sub len = len, tmp2 // len -= len & (-16) - movi0 ar.ec = MEMLAT + 2 // one more pass needed + ld8 tmp4 = [tmp3] /* tmp4 = loop offset */ + sub len = len, tmp2 /* len -= len & (-16) */ + movi0 ar.ec = MEMLAT + 2 /* one more pass needed */ ;; } { .mmi - ld8 s[1] = [asrc], 8 // preload - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - movi0 pr.rot = 1 << 16 // set rotating predicates + ld8 s[1] = [asrc], 8 /* preload */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ ;; } { .mib nop.m 0 movi0 b6 = loopaddr - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ ;; } LOOP(8) @@ -426,7 +426,7 @@ libc_hidden_def (memcpy) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S index 00342d8e0..beaada6fc 100644 --- a/libc/string/ia64/memmove.S +++ b/libc/string/ia64/memmove.S @@ -81,48 +81,48 @@ ENTRY(memmove) alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot .rotr r[MEMLAT + 2], q[MEMLAT + 1] .rotp p[MEMLAT + 2] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - or tmp3 = in0, in1 ;; // tmp3 = dest | src - or tmp3 = tmp3, in2 // tmp3 = dest | src | len - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp2 = r0, in0 // tmp2 = -dest - cmp.eq p6, p0 = in2, r0 // if (len == 0) -(p6) br.cond.spnt .restore_and_exit;;// return dest; - and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7 - cmp.le p6, p0 = dest, src // if dest <= src it's always safe -(p6) br.cond.spnt .forward // to copy forward + or tmp3 = in0, in1 ;; /* tmp3 = dest | src */ + or tmp3 = tmp3, in2 /* tmp3 = dest | src | len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp2 = r0, in0 /* tmp2 = -dest */ + cmp.eq p6, p0 = in2, r0 /* if (len == 0) */ +(p6) br.cond.spnt .restore_and_exit;;/* return dest; */ + and tmp4 = 7, tmp3 /* tmp4 = (dest | src | len) & 7 */ + cmp.le p6, p0 = dest, src /* if dest <= src it's always safe */ +(p6) br.cond.spnt .forward /* to copy forward */ add tmp3 = src, len;; - cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len -(p6) br.cond.spnt .backward // we have to copy backward + cmp.lt p6, p0 = dest, tmp3 /* if dest > src && dest < src + len */ +(p6) br.cond.spnt .backward /* we have to copy backward */ .forward: - shr.u loopcnt = len, 4 ;; // loopcnt = len / 16 - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .next // goto next; + shr.u loopcnt = len, 4 ;; /* loopcnt = len / 16 */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .next /* goto next; */ -// The optimal case, when dest, src and len are all multiples of 8 +/* The optimal case, when dest, src and len are all multiples of 8 */ and tmp3 = 0xf, len - mov pr.rot = 1 << 16 // set rotating predicates - mov ar.ec = MEMLAT + 1 ;; // set the epilog counter - cmp.ne p6, p0 = tmp3, r0 // do we have to copy an extra word? - adds loopcnt = -1, loopcnt;; // --loopcnt + mov pr.rot = 1 << 16 /* set rotating predicates */ + mov ar.ec = MEMLAT + 1 ;; /* set the epilog counter */ + cmp.ne p6, p0 = tmp3, r0 /* do we have to copy an extra word? */ + adds loopcnt = -1, loopcnt;; /* --loopcnt */ (p6) ld8 value = [src], 8;; -(p6) st8 [dest] = value, 8 // copy the "odd" word - mov ar.lc = loopcnt // set the loop counter +(p6) st8 [dest] = value, 8 /* copy the "odd" word */ + mov ar.lc = loopcnt /* set the loop counter */ cmp.eq p6, p0 = 8, len -(p6) br.cond.spnt .restore_and_exit;;// the one-word special case - adds adest = 8, dest // set adest one word ahead of dest - adds asrc = 8, src ;; // set asrc one word ahead of src - nop.b 0 // get the "golden" alignment for - nop.b 0 // the next loop +(p6) br.cond.spnt .restore_and_exit;;/* the one-word special case */ + adds adest = 8, dest /* set adest one word ahead of dest */ + adds asrc = 8, src ;; /* set asrc one word ahead of src */ + nop.b 0 /* get the "golden" alignment for */ + nop.b 0 /* the next loop */ .l0: (p[0]) ld8 r[0] = [src], 16 (p[0]) ld8 q[0] = [asrc], 16 @@ -130,50 +130,50 @@ ENTRY(memmove) (p[MEMLAT]) st8 [adest] = q[MEMLAT], 16 br.ctop.dptk .l0 ;; - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .next: - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - and loopcnt = 7, tmp2 // loopcnt = -dest % 8 -(p6) br.cond.spnt .cpyfew // copy byte by byte + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + and loopcnt = 7, tmp2 /* loopcnt = -dest % 8 */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp2 = -8, len // tmp2 = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len;; // len = len % 8 - adds loopcnt = -1, loopcnt // --loopcnt + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp2 = -8, len /* tmp2 = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len;; /* len = len % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ addl tmp4 = @ltoff(.table), gp addl tmp3 = @ltoff(.loop56), gp - mov ar.ec = MEMLAT + 1 // set EC - mov pr.rot = 1 << 16;; // set rotating predicates - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov ar.ec = MEMLAT + 1 /* set EC */ + mov pr.rot = 1 << 16;; /* set rotating predicates */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp2 // src += len & -OPSIZ - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - ld8 ptable = [tmp4];; // ptable = &table - add tmp3 = ptable, sh1;; // tmp3 = &table + sh1 - mov ar.ec = MEMLAT + 1 + 1 // one more pass needed - ld8 tmp4 = [tmp3];; // tmp4 = loop offset - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - ld8 r[1] = [asrc], 8;; // w0 + add src = src, tmp2 /* src += len & -OPSIZ */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + ld8 ptable = [tmp4];; /* ptable = &table */ + add tmp3 = ptable, sh1;; /* tmp3 = &table + sh1 */ + mov ar.ec = MEMLAT + 1 + 1 /* one more pass needed */ + ld8 tmp4 = [tmp3];; /* tmp4 = loop offset */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + ld8 r[1] = [asrc], 8;; /* w0 */ mov b6 = loopaddr;; - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ LOOP(8) LOOP(16) @@ -189,8 +189,8 @@ ENTRY(memmove) (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -199,36 +199,36 @@ ENTRY(memmove) st1 [dest] = value, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 -// In the case of a backward copy, optimise only the case when everything -// is a multiple of 8, otherwise copy byte by byte. The backward copy is -// used only when the blocks are overlapping and dest > src. - +/* In the case of a backward copy, optimise only the case when everything + is a multiple of 8, otherwise copy byte by byte. The backward copy is + used only when the blocks are overlapping and dest > src. +*/ .backward: - shr.u loopcnt = len, 3 // loopcnt = len / 8 - add src = src, len // src points one byte past the end - add dest = dest, len ;; // dest points one byte past the end - mov ar.ec = MEMLAT + 1 // set the epilog counter - mov pr.rot = 1 << 16 // set rotating predicates - adds loopcnt = -1, loopcnt // --loopcnt - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .bytecopy ;; // copy byte by byte backward - adds src = -8, src // src points to the last word - adds dest = -8, dest // dest points to the last word - mov ar.lc = loopcnt;; // set the loop counter + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + add src = src, len /* src points one byte past the end */ + add dest = dest, len ;; /* dest points one byte past the end */ + mov ar.ec = MEMLAT + 1 /* set the epilog counter */ + mov pr.rot = 1 << 16 /* set rotating predicates */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .bytecopy ;; /* copy byte by byte backward */ + adds src = -8, src /* src points to the last word */ + adds dest = -8, dest /* dest points to the last word */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l5: (p[0]) ld8 r[0] = [src], -8 (p[MEMLAT]) st8 [dest] = r[MEMLAT], -8 br.ctop.dptk .l5 br.cond.sptk .restore_and_exit .bytecopy: - adds src = -1, src // src points to the last byte - adds dest = -1, dest // dest points to the last byte - adds loopcnt = -1, len;; // loopcnt = len - 1 - mov ar.lc = loopcnt;; // set the loop counter + adds src = -1, src /* src points to the last byte */ + adds dest = -1, dest /* dest points to the last byte */ + adds loopcnt = -1, len;; /* loopcnt = len - 1 */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l6: (p[0]) ld1 r[0] = [src], -1 (p[MEMLAT]) st1 [dest] = r[MEMLAT], -1 @@ -239,7 +239,7 @@ END(memmove) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memset.S b/libc/string/ia64/memset.S index ed27f3f31..45df5838e 100644 --- a/libc/string/ia64/memset.S +++ b/libc/string/ia64/memset.S @@ -46,15 +46,15 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 #define fvalue f6 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_nz p7 #define p_zr p8 #define p_unalgn p9 @@ -68,7 +68,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -90,97 +90,97 @@ ENTRY(memset) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + mov ret0 = dest /* return value */ + cmp.ne p_nz, p_zr = value, r0 /* use stf.spill if value is zero */ cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 + mux1 value = value, @brcst /* create 8 identical bytes in word */ +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 higher - sub bytecnt = (MIN1+1), tmp // than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 higher */ + sub bytecnt = (MIN1+1), tmp /* than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = value, -4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = value, -2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi - setf.sig fvalue=value // transfer value to FLP side + setf.sig fvalue=value /* transfer value to FLP side */ (p_y) st2 [ptr2] = value, -1 (p_n) add ptr2 = 1, ptr2 ;; } { .mmi (p_yy) st1 [ptr2] = value - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib nop.m 0 shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +(p_zr) br.cond.dptk.many .l1b /* Jump to use stf.spill */ ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO - .align 32 // -------- // L1A: store ahead into cache lines; fill later + .align 32 /* -------- L1A: store ahead into cache lines; fill later */ #endif { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi -(p_scr) add loopcnt = -1, linecnt // start of stores - add ptr2 = 8, ptr1 // (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total -;; } // range +(p_scr) add loopcnt = -1, linecnt /* start of stores */ + add ptr2 = 8, ptr1 /* (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total */ +;; } /* range */ { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1a: { .mib - store [ptr9] = myval, 128 // Do stores one cache line apart + store [ptr9] = myval, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1a ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1ax: @@ -211,7 +211,7 @@ ENTRY(memset) { .mmi store [ptr2] = myval, 8 store [ptr0] = myval, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb store [ptr2] = myval, 24 @@ -219,9 +219,9 @@ ENTRY(memset) br.cloop.dptk.few .l1ax ;; } { .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 + cmp.le p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .fraction_of_line /* Branch no. 2 */ + br.cond.dpnt.many .move_bytes_from_alignment /* Branch no. 3 */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -229,32 +229,32 @@ ENTRY(memset) #else .align 32 #endif -.l1b: // ------------------ // L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -269,7 +269,7 @@ ENTRY(memset) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -277,14 +277,14 @@ ENTRY(memset) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -292,13 +292,13 @@ ENTRY(memset) (p_scr) br.cond.dpnt.many store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO .align 32 #endif -.l2: // ---------------------------- // L2A: store 32B in 2 cycles +.l2: /* ---------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -309,34 +309,34 @@ ENTRY(memset) ;; } store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt // -(p_y) add cnt = -8, cnt // subtract +(p_y) store [ptr1] = myval, 8 /* store */ +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt /* */ +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store -(p_yy) store [ptr1] = myval, 8 // -(p_yy) add cnt = -8, cnt // subtract +{ .mmi /* store */ +(p_yy) store [ptr1] = myval, 8 /* */ +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = value, 4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = value, 2 @@ -362,38 +362,38 @@ store_words: (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = value, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = value /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = value /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(memset) diff --git a/libc/string/ia64/strchr.S b/libc/string/ia64/strchr.S index 401a07941..66703f26d 100644 --- a/libc/string/ia64/strchr.S +++ b/libc/string/ia64/strchr.S @@ -49,15 +49,15 @@ ENTRY(strchr) .prologue alloc r2 = ar.pfs, 2, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 + and tmp = 7, str /* tmp = str % 8 */ mux1 chrx8 = chr, @brcst - extr.u chr = chr, 0, 8 // retain only the last byte - cmp.ne p8, p0 = r0, r0 // clear p8 + extr.u chr = chr, 0, 8 /* retain only the last byte */ + cmp.ne p8, p0 = r0, r0 /* clear p8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -75,10 +75,10 @@ ENTRY(strchr) nop.b 0 nop.b 0 .l2: - ld8.s val2 = [ret0], 8 // don't bomb out here + ld8.s val2 = [ret0], 8 /* don't bomb out here */ czx1.r pos0 = val1 - xor tmp = val1, chrx8 // if val1 contains chr, tmp will - ;; // contain a zero in its position + xor tmp = val1, chrx8 /* if val1 contains chr, tmp will */ + ;; /* contain a zero in its position */ czx1.r poschr = tmp cmp.ne p6, p0 = 8, pos0 ;; @@ -90,21 +90,21 @@ ENTRY(strchr) mov val1 = val2 br.cond.dptk .l2 .foundit: -(p6) cmp.lt p8, p0 = pos0, poschr // we found chr and null in the word -(p8) br.cond.spnt .notfound // null was found before chr +(p6) cmp.lt p8, p0 = pos0, poschr /* we found chr and null in the word */ +(p8) br.cond.spnt .notfound /* null was found before chr */ add ret0 = ret0, poschr ;; - adds ret0 = -15, ret0 ;; // should be -16, but we decrement -.restore_and_exit: // ret0 in the next instruction - adds ret0 = -1, ret0 // ret0 was pointing 1 char too far - mov ar.lc = saved_lc // restore the loop counter + adds ret0 = -15, ret0 ;; /* should be -16, but we decrement */ +.restore_and_exit: /* ret0 in the next instruction */ + adds ret0 = -1, ret0 /* ret0 was pointing 1 char too far */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .notfound: - mov ret0 = r0 // return NULL if null was found + mov ret0 = r0 /* return NULL if null was found */ mov ar.lc = saved_lc br.ret.sptk.many b0 .recovery: adds ret0 = -8, ret0;; - ld8 val2 = [ret0], 8 // bomb out here + ld8 val2 = [ret0], 8 /* bomb out here */ br.cond.sptk .back END(strchr) libc_hidden_def (strchr) diff --git a/libc/string/ia64/strcmp.S b/libc/string/ia64/strcmp.S index d3b41e642..4da72fa10 100644 --- a/libc/string/ia64/strcmp.S +++ b/libc/string/ia64/strcmp.S @@ -42,7 +42,7 @@ ENTRY(strcmp) .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - cmp.eq p6, p0 = r0, r0 // set p6 + cmp.eq p6, p0 = r0, r0 /* set p6 */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 diff --git a/libc/string/ia64/strcpy.S b/libc/string/ia64/strcpy.S index e4a9915ca..7b002f661 100644 --- a/libc/string/ia64/strcpy.S +++ b/libc/string/ia64/strcpy.S @@ -27,8 +27,8 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl value = r[1], sh1 // value = w0 << sh1 - shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 + shl value = r[1], sh1 // value = w0 << sh1 + shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 */ #include "sysdep.h" @@ -53,62 +53,62 @@ ENTRY(strcpy) .prologue - alloc r2 = ar.pfs, 2, 0, 30, 32 + alloc r2 = ar.pfs, 2, 0, 30, 32 #define MEMLAT 2 .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - sub tmp = r0, in0 ;; // tmp = -dest - mov dest = in0 // dest - mov src = in1 // src - and loopcnt = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and loopcnt = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p0 = loopcnt, r0 - adds loopcnt = -1, loopcnt // --loopcnt + adds loopcnt = -1, loopcnt /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.eq p6, p0 = c, r0 (p6) br.cond.dpnt .restore_and_exit br.cloop.dptk .l1 ;; .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - mov ar.lc = -1 // "infinite" loop - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src + and sh1 = 7, src /* sh1 = src % 8 */ + mov ar.lc = -1 /* "infinite" loop */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - cmp.ne p7, p0 = r0, r0 // clear p7 - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; ld8 r[1] = [asrc],8 ;; .align 32 .l2: ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ (p7) br.cond.dpnt .found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p0 = 8, pos (p7) br.cond.dpnt .found0 - st8 [dest] = value, 8 // store val to dest + st8 [dest] = value, 8 /* store val to dest */ br.ctop.dptk .l2 ;; .src_aligned: .l3: @@ -124,14 +124,14 @@ ENTRY(strcpy) .found0: mov ar.lc = pos .l4: - extr.u c = value, 0, 8 // c = value & 0xff + extr.u c = value, 0, 8 /* c = value & 0xff */ shr.u value = value, 8 ;; st1 [dest] = c, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add tmp = -8, asrc ;; diff --git a/libc/string/ia64/strlen.S b/libc/string/ia64/strlen.S index 9b27a2d1b..edbe84359 100644 --- a/libc/string/ia64/strlen.S +++ b/libc/string/ia64/strlen.S @@ -50,13 +50,13 @@ ENTRY(strlen) .prologue alloc r2 = ar.pfs, 1, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov str = in0 - mov len = r0 // len = 0 - and tmp = 7, in0 // tmp = str % 8 + mov len = r0 /* len = 0 */ + and tmp = 7, in0 /* tmp = str % 8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -69,11 +69,11 @@ ENTRY(strlen) adds len = 1, len br.cloop.dptk .l1 .str_aligned: - mov origadd = str // origadd = orig + mov origadd = str /* origadd = orig */ ld8 val1 = [str], 8;; nop.b 0 nop.b 0 -.l2: ld8.s val2 = [str], 8 // don't bomb out here +.l2: ld8.s val2 = [str], 8 /* don't bomb out here */ czx1.r pos0 = val1 ;; cmp.ne p6, p0 = 8, pos0 @@ -83,16 +83,16 @@ ENTRY(strlen) mov val1 = val2 br.cond.dptk .l2 .foundit: - sub tmp = str, origadd // tmp = crt address - orig + sub tmp = str, origadd /* tmp = crt address - orig */ add len = len, pos0;; add len = len, tmp;; adds len = -16, len .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .recovery: adds str = -8, str;; - ld8 val2 = [str], 8 // bomb out here + ld8 val2 = [str], 8 /* bomb out here */ br.cond.sptk .back END(strlen) libc_hidden_def (strlen) diff --git a/libc/string/ia64/strncmp.S b/libc/string/ia64/strncmp.S index 8e0373c7f..e31f8fbd9 100644 --- a/libc/string/ia64/strncmp.S +++ b/libc/string/ia64/strncmp.S @@ -23,7 +23,7 @@ Inputs: in0: s1 in1: s2 - in2: n + in2: n Unlike memcmp(), this function is optimized for mismatches within the first few characters. */ @@ -42,13 +42,13 @@ ENTRY(strncmp) alloc r2 = ar.pfs, 3, 0, 0, 0 mov ret0 = r0 - cmp.eq p6, p0 = r0, r0 // set p6 - cmp.eq p7, p0 = n, r0 // return immediately if n == 0 + cmp.eq p6, p0 = r0, r0 /* set p6 */ + cmp.eq p7, p0 = n, r0 /* return immediately if n == 0 */ (p7) br.cond.spnt .restore_and_exit ;; .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - adds n = -1, n // n-- + adds n = -1, n /* n-- */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 @@ -58,5 +58,5 @@ ENTRY(strncmp) sub ret0 = val1, val2 .restore_and_exit: br.ret.sptk.many b0 -END(strncmp) +END(strncmp) libc_hidden_weak (strncmp) diff --git a/libc/string/ia64/strncpy.S b/libc/string/ia64/strncpy.S index 4f1129350..3f29bbd52 100644 --- a/libc/string/ia64/strncpy.S +++ b/libc/string/ia64/strncpy.S @@ -58,64 +58,64 @@ ENTRY(strncpy) .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov ar.ec = 0 // ec is not guaranteed to - // be zero upon function entry + mov saved_lc = ar.lc /* save the loop counter */ + mov ar.ec = 0 /* ec is not guaranteed to */ + /* be zero upon function entry */ .body cmp.geu p6, p5 = 24, in2 (p6) br.cond.spnt .short_len - sub tmp = r0, in0 ;; // tmp = -dest - mov len = in2 // len - mov dest = in0 // dest - mov src = in1 // src - and tmp = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov len = in2 /* len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and tmp = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p7 = tmp, r0 - adds loopcnt = -1, tmp // --loopcnt + adds loopcnt = -1, tmp /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; - sub len = len, tmp // len -= -dest % 8 + sub len = len, tmp /* len -= -dest % 8 */ mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes -(p5) ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ +(p5) ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.ne p5, p7 = c, r0 br.cloop.dptk .l1 ;; (p7) br.cond.dpnt .found0_align -.dest_aligned: // p7 should be cleared here - shr.u c = len, 3 // c = len / 8 - and sh1 = 7, src // sh1 = src % 8 - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src - adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1 +.dest_aligned: /* p7 should be cleared here */ + shr.u c = len, 3 /* c = len / 8 */ + and sh1 = 7, src /* sh1 = src % 8 */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ + adds c = (MEMLAT-1), c /* c = (len / 8) + MEMLAT - 1 */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - mov ar.lc = c // "infinite" loop - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + mov ar.lc = c /* "infinite" loop */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - adds c = -(MEMLAT-1), c ;; // c = (len / 8) + adds c = -(MEMLAT-1), c ;; /* c = (len / 8) */ ld8 r[1] = [asrc],8 mov ar.lc = c ;; .align 32 .l2: -(p6) st8 [dest] = value, 8 // store val to dest +(p6) st8 [dest] = value, 8 /* store val to dest */ ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? - adds len = -8, len // len -= 8 + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ + adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .nonalign_found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p6 = 8, pos (p7) br.cond.dpnt .nonalign_found0 @@ -137,7 +137,7 @@ ENTRY(strncpy) (p[MEMLAT]) mov value = r[MEMLAT] (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;; (p[MEMLAT]) cmp.ne p7, p0 = 8, pos -(p[MEMLAT]) adds len = -8, len // len -= 8 +(p[MEMLAT]) adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .found0 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 ;; @@ -152,7 +152,7 @@ ENTRY(strncpy) (p5) br.cond.dptk .restore_and_exit ;; mov ar.lc = len .l4: -(p6) extr.u c = value, 0, 8 // c = value & 0xff +(p6) extr.u c = value, 0, 8 /* c = value & 0xff */ (p6) shr.u value = value, 8 ;; st1 [dest] = c, 1 cmp.ne p6, p0 = c, r0 @@ -165,7 +165,7 @@ ENTRY(strncpy) mov value = 0 ;; .found0: shl tmp = pos, 3 - shr.u loopcnt = len, 4 // loopcnt = len / 16 + shr.u loopcnt = len, 4 /* loopcnt = len / 16 */ mov c = -1 ;; cmp.eq p6, p0 = loopcnt, r0 adds loopcnt = -1, loopcnt @@ -192,24 +192,24 @@ ENTRY(strncpy) st1 [dest] = r0, 1 br.cloop.dptk .l7 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .short_len: cmp.eq p5, p0 = in2, r0 adds loopcnt = -1, in2 (p5) br.cond.spnt .restore_and_exit ;; - mov ar.lc = loopcnt // p6 should be set when we get here + mov ar.lc = loopcnt /* p6 should be set when we get here */ .l8: -(p6) ld1 c = [in1], 1 // c = *src++ +(p6) ld1 c = [in1], 1 /* c = *src++ */ ;; - st1 [in0] = c, 1 // *dest++ = c + st1 [in0] = c, 1 /* *dest++ = c */ (p6) cmp.ne p6, p0 = c, r0 br.cloop.dptk .l8 ;; - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add c = 8, len diff --git a/libc/string/sh64/memcpy.S b/libc/string/sh64/memcpy.S index 3c0ea0c0d..470784ecd 100644 --- a/libc/string/sh64/memcpy.S +++ b/libc/string/sh64/memcpy.S @@ -151,7 +151,7 @@ Large: add r2, r4, r5 ldlo.q r3, 0, r0 addi r5, -16, r5 - movi 64+8, r27 // could subtract r7 from that. + movi 64+8, r27 /* could subtract r7 from that. */ stlo.q r2, 0, r0 sthi.q r2, 7, r0 ldx.q r22, r6, r0 diff --git a/libc/string/sh64/memset.S b/libc/string/sh64/memset.S index f588323f0..1b8812cd6 100644 --- a/libc/string/sh64/memset.S +++ b/libc/string/sh64/memset.S @@ -32,12 +32,12 @@ memset: ptabs r18, tr2 mshflo.b r3,r3,r3 add r4, r22, r23 - mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 + mperm.w r3, r63, r3 /* Fill pattern now in every byte of r3 */ movi 8, r9 - bgtu/u r23, r9, tr0 // multiquad + bgtu/u r23, r9, tr0 /* multiquad */ - beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses + beqi/u r4, 0, tr2 /* Return with size 0 - ensures no mem accesses */ ldlo.q r2, 0, r7 shlli r4, 2, r4 movi -1, r8 @@ -52,20 +52,21 @@ multiquad: stlo.q r2, 0, r3 shlri r23, 3, r24 add r2, r4, r5 - beqi/u r24, 1, tr0 // lastquad + beqi/u r24, 1, tr0 /* lastquad */ pta/l loop, tr1 sub r2, r22, r25 - andi r5, -8, r20 // calculate end address and - addi r20, -7*8, r8 // loop end address; This might overflow, so we need - // to use a different test before we start the loop - bge/u r24, r9, tr1 // loop + andi r5, -8, r20 /* calculate end address and */ + addi r20, -7*8, r8 /* loop end address; This might overflow, so we need + to use a different test before we start the loop + */ + bge/u r24, r9, tr1 /* loop */ st.q r25, 8, r3 st.q r20, -8, r3 shlri r24, 1, r24 - beqi/u r24, 1, tr0 // lastquad + beqi/u r24, 1, tr0 /* lastquad */ st.q r25, 16, r3 st.q r20, -16, r3 - beqi/u r24, 2, tr0 // lastquad + beqi/u r24, 2, tr0 /* lastquad */ st.q r25, 24, r3 st.q r20, -24, r3 lastquad: @@ -73,15 +74,15 @@ lastquad: blink tr2,r63 loop: -!!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895. - // QQQ commenting out is locically correct, but sub-optimal - // QQQ Sean McGoogan - 4th April 2003. +!!! alloco r25, 32 /* QQQ comment out for short-term fix to SHUK #3895. + QQQ commenting out is locically correct, but sub-optimal + QQQ Sean McGoogan - 4th April 2003. */ st.q r25, 8, r3 st.q r25, 16, r3 st.q r25, 24, r3 st.q r25, 32, r3 addi r25, 32, r25 - bgeu/l r8, r25, tr1 // loop + bgeu/l r8, r25, tr1 /* loop */ st.q r20, -40, r3 st.q r20, -32, r3 diff --git a/libc/string/sh64/strcpy.S b/libc/string/sh64/strcpy.S index da79d5143..f317707b7 100644 --- a/libc/string/sh64/strcpy.S +++ b/libc/string/sh64/strcpy.S @@ -31,7 +31,7 @@ strcpy: addi r2, 8, r0 mcmpeq.b r4,r63,r6 SHHI r6,r7,r6 - bnei/u r6,0,tr1 // shortstring + bnei/u r6,0,tr1 /* shortstring */ pta/l no_lddst, tr2 ori r3,-8,r23 sub r2, r23, r0 @@ -41,28 +41,28 @@ strcpy: pta/l loop, tr0 ori r2,-8,r22 mcmpeq.b r5, r63, r6 - bgt/u r22, r23, tr2 // no_lddst + bgt/u r22, r23, tr2 /* no_lddst */ - // r22 < r23 : Need to do a load from the destination. - // r22 == r23 : Doesn't actually need to load from destination, - // but still can be handled here. + /* r22 < r23 : Need to do a load from the destination. */ + /* r22 == r23 : Doesn't actually need to load from destination, */ + /* but still can be handled here. */ ldlo.q r2, 0, r9 movi -1, r8 SHLO r8, r7, r8 mcmv r4, r8, r9 stlo.q r2, 0, r9 - beqi/l r6, 0, tr0 // loop + beqi/l r6, 0, tr0 /* loop */ add r5, r63, r4 addi r0, 8, r0 - blink tr1, r63 // shortstring + blink tr1, r63 /* shortstring */ no_lddst: - // r22 > r23: note that for r22 == r23 the sthi.q would clobber - // bytes before the destination region. + /* r22 > r23: note that for r22 == r23 the sthi.q would clobber */ + /* bytes before the destination region. */ stlo.q r2, 0, r4 SHHI r4, r7, r4 sthi.q r0, -1, r4 - beqi/l r6, 0, tr0 // loop + beqi/l r6, 0, tr0 /* loop */ add r5, r63, r4 addi r0, 8, r0 @@ -77,7 +77,7 @@ shortstring2: shlri r4,8,r4 addi r0,1,r0 bnei/l r5,0,tr1 - blink tr4,r63 // return + blink tr4,r63 /* return */ .balign 8 loop: @@ -86,16 +86,16 @@ loop: addi r0, 16, r0 sthi.q r0, -9, r5 mcmpeq.b r4, r63, r6 - bnei/u r6, 0, tr1 // shortstring + bnei/u r6, 0, tr1 /* shortstring */ ldx.q r0, r21, r5 stlo.q r0, -8, r4 sthi.q r0, -1, r4 mcmpeq.b r5, r63, r6 - beqi/l r6, 0, tr0 // loop + beqi/l r6, 0, tr0 /* loop */ add r5, r63, r4 addi r0, 8, r0 - blink tr1, r63 // shortstring + blink tr1, r63 /* shortstring */ .size strcpy,.-strcpy diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S index 19f3a6818..fc04c023e 100644 --- a/libc/string/xtensa/memcpy.S +++ b/libc/string/xtensa/memcpy.S @@ -83,7 +83,7 @@ __memcpy_aux: loopnez a4, 2f #else beqz a4, 2f - add a7, a3, a4 // a7 = end address for source + add a7, a3, a4 /* a7 = end address for source */ #endif 1: l8ui a6, a3, 0 addi a3, a3, 1 @@ -98,7 +98,7 @@ __memcpy_aux: /* Destination is unaligned. */ .align 4 -.Ldst1mod2: // dst is only byte aligned +.Ldst1mod2: /* dst is only byte aligned */ /* Do short copies byte-by-byte. */ _bltui a4, 7, .Lbytecopy @@ -113,7 +113,7 @@ __memcpy_aux: /* Return to main algorithm if dst is now aligned. */ _bbci.l a5, 1, .Ldstaligned -.Ldst2mod4: // dst has 16-bit alignment +.Ldst2mod4: /* dst has 16-bit alignment */ /* Do short copies byte-by-byte. */ _bltui a4, 6, .Lbytecopy @@ -134,7 +134,7 @@ __memcpy_aux: ENTRY (memcpy) /* a2 = dst, a3 = src, a4 = len */ - mov a5, a2 // copy dst so that a2 is return value + mov a5, a2 /* copy dst so that a2 is return value */ _bbsi.l a2, 0, .Ldst1mod2 _bbsi.l a2, 1, .Ldst2mod4 .Ldstaligned: @@ -152,7 +152,7 @@ ENTRY (memcpy) #else beqz a7, 2f slli a8, a7, 4 - add a8, a8, a3 // a8 = end of last 16B source chunk + add a8, a8, a3 /* a8 = end of last 16B source chunk */ #endif 1: l32i a6, a3, 0 l32i a7, a3, 4 @@ -218,18 +218,18 @@ ENTRY (memcpy) /* Copy 16 bytes per iteration for word-aligned dst and unaligned src. */ - ssa8 a3 // set shift amount from byte offset + ssa8 a3 /* set shift amount from byte offset */ #if UNALIGNED_ADDRESSES_CHECKED - and a11, a3, a8 // save unalignment offset for below - sub a3, a3, a11 // align a3 + and a11, a3, a8 /* save unalignment offset for below */ + sub a3, a3, a11 /* align a3 */ #endif - l32i a6, a3, 0 // load first word + l32i a6, a3, 0 /* load first word */ #if XCHAL_HAVE_LOOPS loopnez a7, 2f #else beqz a7, 2f slli a10, a7, 4 - add a10, a10, a3 // a10 = end of last 16B source chunk + add a10, a10, a3 /* a10 = end of last 16B source chunk */ #endif 1: l32i a7, a3, 4 l32i a8, a3, 8 @@ -273,7 +273,7 @@ ENTRY (memcpy) mov a6, a7 4: #if UNALIGNED_ADDRESSES_CHECKED - add a3, a3, a11 // readjust a3 with correct misalignment + add a3, a3, a11 /* readjust a3 with correct misalignment */ #endif bbsi.l a4, 1, 5f bbsi.l a4, 0, 6f diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S index c0928825d..076b8f001 100644 --- a/libc/string/xtensa/memset.S +++ b/libc/string/xtensa/memset.S @@ -29,7 +29,7 @@ The algorithm is as follows: Create a word with c in all byte positions. - + If the destination is aligned, set 16B chunks with a loop, and then finish up with 8B, 4B, 2B, and 1B stores conditional on the length. @@ -57,7 +57,7 @@ __memset_aux: loopnez a4, 2f #else beqz a4, 2f - add a6, a5, a4 // a6 = ending address + add a6, a5, a4 /* a6 = ending address */ #endif 1: s8i a3, a5, 0 addi a5, a5, 1 @@ -71,7 +71,7 @@ __memset_aux: .align 4 -.Ldst1mod2: // dst is only byte aligned +.Ldst1mod2: /* dst is only byte aligned */ /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset @@ -84,7 +84,7 @@ __memset_aux: /* Now retest if dst is aligned. */ _bbci.l a5, 1, .Ldstaligned -.Ldst2mod4: // dst has 16-bit alignment +.Ldst2mod4: /* dst has 16-bit alignment */ /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset @@ -108,7 +108,7 @@ ENTRY (memset) slli a7, a3, 16 or a3, a3, a7 - mov a5, a2 // copy dst so that a2 is return value + mov a5, a2 /* copy dst so that a2 is return value */ /* Check if dst is unaligned. */ _bbsi.l a2, 0, .Ldst1mod2 @@ -124,7 +124,7 @@ ENTRY (memset) #else beqz a7, 2f slli a6, a7, 4 - add a6, a6, a5 // a6 = end of last 16B chunk + add a6, a6, a5 /* a6 = end of last 16B chunk */ #endif /* Set 16 bytes per iteration. */ 1: s32i a3, a5, 0 diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S index 622bb27ed..ac058a2bf 100644 --- a/libc/string/xtensa/strcmp.S +++ b/libc/string/xtensa/strcmp.S @@ -45,35 +45,35 @@ ENTRY (strcmp) /* a2 = s1, a3 = s2 */ - l8ui a8, a2, 0 // byte 0 from s1 - l8ui a9, a3, 0 // byte 0 from s2 - movi a10, 3 // mask + l8ui a8, a2, 0 /* byte 0 from s1 */ + l8ui a9, a3, 0 /* byte 0 from s2 */ + movi a10, 3 /* mask */ bne a8, a9, .Lretdiff or a11, a2, a3 bnone a11, a10, .Laligned - xor a11, a2, a3 // compare low two bits of s1 and s2 - bany a11, a10, .Lunaligned // if they have different alignment + xor a11, a2, a3 /* compare low two bits of s1 and s2 */ + bany a11, a10, .Lunaligned /* if they have different alignment */ /* s1/s2 are not word-aligned. */ - addi a2, a2, 1 // advance s1 - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 - bnone a2, a10, .Laligned // if s1/s2 now aligned - l8ui a8, a2, 0 // byte 1 from s1 - l8ui a9, a3, 0 // byte 1 from s2 - addi a2, a2, 1 // advance s1 - bne a8, a9, .Lretdiff // if different, return difference - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 - bnone a2, a10, .Laligned // if s1/s2 now aligned - l8ui a8, a2, 0 // byte 2 from s1 - l8ui a9, a3, 0 // byte 2 from s2 - addi a2, a2, 1 // advance s1 - bne a8, a9, .Lretdiff // if different, return difference - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 + addi a2, a2, 1 /* advance s1 */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ + bnone a2, a10, .Laligned /* if s1/s2 now aligned */ + l8ui a8, a2, 0 /* byte 1 from s1 */ + l8ui a9, a3, 0 /* byte 1 from s2 */ + addi a2, a2, 1 /* advance s1 */ + bne a8, a9, .Lretdiff /* if different, return difference */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ + bnone a2, a10, .Laligned /* if s1/s2 now aligned */ + l8ui a8, a2, 0 /* byte 2 from s1 */ + l8ui a9, a3, 0 /* byte 2 from s2 */ + addi a2, a2, 1 /* advance s1 */ + bne a8, a9, .Lretdiff /* if different, return difference */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ j .Laligned /* s1 and s2 have different alignment. @@ -92,8 +92,8 @@ ENTRY (strcmp) /* (2 mod 4) alignment for loop instruction */ .Lunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lretdiff // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lretdiff /* loop forever (almost anyway) */ #endif .Lnextbyte: l8ui a8, a2, 0 @@ -131,32 +131,32 @@ ENTRY (strcmp) #if XCHAL_HAVE_LOOPS .Laligned: .begin no-transform - l32r a4, .Lmask0 // mask for byte 0 + l32r a4, .Lmask0 /* mask for byte 0 */ l32r a7, .Lmask4 /* Loop forever. (a4 is more than than the maximum number of iterations) */ loop a4, .Laligned_done /* First unrolled loop body. */ - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 bnall a9, a7, .Lprobeq /* Second unrolled loop body. */ - l32i a8, a2, 4 // get word from s1+4 - l32i a9, a3, 4 // get word from s2+4 + l32i a8, a2, 4 /* get word from s1+4 */ + l32i a9, a3, 4 /* get word from s2+4 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 bnall a9, a7, .Lprobeq2 - addi a2, a2, 8 // advance s1 pointer - addi a3, a3, 8 // advance s2 pointer + addi a2, a2, 8 /* advance s1 pointer */ + addi a3, a3, 8 /* advance s2 pointer */ .Laligned_done: - or a1, a1, a1 // nop + or a1, a1, a1 /* nop */ .Lprobeq2: /* Adjust pointers to account for the loop unrolling. */ @@ -166,15 +166,15 @@ ENTRY (strcmp) #else /* !XCHAL_HAVE_LOOPS */ .Laligned: - movi a4, MASK0 // mask for byte 0 + movi a4, MASK0 /* mask for byte 0 */ movi a7, MASK4 j .Lfirstword .Lnextword: - addi a2, a2, 4 // advance s1 pointer - addi a3, a3, 4 // advance s2 pointer + addi a2, a2, 4 /* advance s1 pointer */ + addi a3, a3, 4 /* advance s2 pointer */ .Lfirstword: - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 @@ -186,49 +186,49 @@ ENTRY (strcmp) /* Words are probably equal, but check for sure. If not, loop over the rest of string using normal algorithm. */ - bnone a8, a4, .Leq // if byte 0 is zero - l32r a5, .Lmask1 // mask for byte 1 - l32r a6, .Lmask2 // mask for byte 2 - bnone a8, a5, .Leq // if byte 1 is zero - l32r a7, .Lmask3 // mask for byte 3 - bnone a8, a6, .Leq // if byte 2 is zero - bnone a8, a7, .Leq // if byte 3 is zero - addi.n a2, a2, 4 // advance s1 pointer - addi.n a3, a3, 4 // advance s2 pointer + bnone a8, a4, .Leq /* if byte 0 is zero */ + l32r a5, .Lmask1 /* mask for byte 1 */ + l32r a6, .Lmask2 /* mask for byte 2 */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + l32r a7, .Lmask3 /* mask for byte 3 */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bnone a8, a7, .Leq /* if byte 3 is zero */ + addi.n a2, a2, 4 /* advance s1 pointer */ + addi.n a3, a3, 4 /* advance s2 pointer */ #if XCHAL_HAVE_LOOPS /* align (1 mod 4) */ - loop a4, .Leq // loop forever (a4 is bigger than max iters) + loop a4, .Leq /* loop forever (a4 is bigger than max iters) */ .end no-transform - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 - addi a2, a2, 4 // advance s1 pointer + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ + addi a2, a2, 4 /* advance s1 pointer */ bne a8, a9, .Lwne - bnone a8, a4, .Leq // if byte 0 is zero - bnone a8, a5, .Leq // if byte 1 is zero - bnone a8, a6, .Leq // if byte 2 is zero - bnone a8, a7, .Leq // if byte 3 is zero - addi a3, a3, 4 // advance s2 pointer + bnone a8, a4, .Leq /* if byte 0 is zero */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bnone a8, a7, .Leq /* if byte 3 is zero */ + addi a3, a3, 4 /* advance s2 pointer */ #else /* !XCHAL_HAVE_LOOPS */ j .Lfirstword2 .Lnextword2: - addi a3, a3, 4 // advance s2 pointer + addi a3, a3, 4 /* advance s2 pointer */ .Lfirstword2: - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 - addi a2, a2, 4 // advance s1 pointer + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ + addi a2, a2, 4 /* advance s1 pointer */ bne a8, a9, .Lwne - bnone a8, a4, .Leq // if byte 0 is zero - bnone a8, a5, .Leq // if byte 1 is zero - bnone a8, a6, .Leq // if byte 2 is zero - bany a8, a7, .Lnextword2 // if byte 3 is zero + bnone a8, a4, .Leq /* if byte 0 is zero */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bany a8, a7, .Lnextword2 /* if byte 3 is zero */ #endif /* !XCHAL_HAVE_LOOPS */ /* Words are equal; some byte is zero. */ -.Leq: movi a2, 0 // return equal +.Leq: movi a2, 0 /* return equal */ retw .Lwne2: /* Words are not equal. On big-endian processors, if none of the @@ -243,18 +243,18 @@ ENTRY (strcmp) .Lposreturn: movi a2, 1 retw -.Lsomezero: // There is probably some zero byte. +.Lsomezero: /* There is probably some zero byte. */ #endif /* __XTENSA_EB__ */ .Lwne: /* Words are not equal. */ - xor a2, a8, a9 // get word with nonzero in byte that differs - bany a2, a4, .Ldiff0 // if byte 0 differs - movi a5, MASK1 // mask for byte 1 - bnone a8, a4, .Leq // if byte 0 is zero - bany a2, a5, .Ldiff1 // if byte 1 differs - movi a6, MASK2 // mask for byte 2 - bnone a8, a5, .Leq // if byte 1 is zero - bany a2, a6, .Ldiff2 // if byte 2 differs - bnone a8, a6, .Leq // if byte 2 is zero + xor a2, a8, a9 /* get word with nonzero in byte that differs */ + bany a2, a4, .Ldiff0 /* if byte 0 differs */ + movi a5, MASK1 /* mask for byte 1 */ + bnone a8, a4, .Leq /* if byte 0 is zero */ + bany a2, a5, .Ldiff1 /* if byte 1 differs */ + movi a6, MASK2 /* mask for byte 2 */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bany a2, a6, .Ldiff2 /* if byte 2 differs */ + bnone a8, a6, .Leq /* if byte 2 is zero */ #ifdef __XTENSA_EB__ .Ldiff3: .Ldiff2: diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S index 108070384..dc0a15175 100644 --- a/libc/string/xtensa/strcpy.S +++ b/libc/string/xtensa/strcpy.S @@ -36,7 +36,7 @@ ENTRY (strcpy) /* a2 = dst, a3 = src */ - mov a10, a2 // leave dst in return value register + mov a10, a2 /* leave dst in return value register */ movi a4, MASK0 movi a5, MASK1 movi a6, MASK2 @@ -51,23 +51,23 @@ ENTRY (strcpy) j .Ldstunaligned -.Lsrc1mod2: // src address is odd - l8ui a8, a3, 0 // get byte 0 - addi a3, a3, 1 // advance src pointer - s8i a8, a10, 0 // store byte 0 - beqz a8, 1f // if byte 0 is zero - addi a10, a10, 1 // advance dst pointer - bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned +.Lsrc1mod2: /* src address is odd */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a3, a3, 1 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + beqz a8, 1f /* if byte 0 is zero */ + addi a10, a10, 1 /* advance dst pointer */ + bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */ -.Lsrc2mod4: // src address is 2 mod 4 - l8ui a8, a3, 0 // get byte 0 +.Lsrc2mod4: /* src address is 2 mod 4 */ + l8ui a8, a3, 0 /* get byte 0 */ /* 1-cycle interlock */ - s8i a8, a10, 0 // store byte 0 - beqz a8, 1f // if byte 0 is zero - l8ui a8, a3, 1 // get byte 0 - addi a3, a3, 2 // advance src pointer - s8i a8, a10, 1 // store byte 0 - addi a10, a10, 2 // advance dst pointer + s8i a8, a10, 0 /* store byte 0 */ + beqz a8, 1f /* if byte 0 is zero */ + l8ui a8, a3, 1 /* get byte 0 */ + addi a3, a3, 2 /* advance src pointer */ + s8i a8, a10, 1 /* store byte 0 */ + addi a10, a10, 2 /* advance dst pointer */ bnez a8, .Lsrcaligned 1: retw @@ -78,28 +78,28 @@ ENTRY (strcpy) #if XCHAL_HAVE_LOOPS /* (2 mod 4) alignment for loop instruction */ .Laligned: - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lz3 // loop forever (almost anyway) - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - bnone a8, a7, .Lz3 // if byte 3 is zero - addi a10, a10, 4 // advance dst pointer + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lz3 /* loop forever (almost anyway) */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + bnone a8, a7, .Lz3 /* if byte 3 is zero */ + addi a10, a10, 4 /* advance dst pointer */ #else /* !XCHAL_HAVE_LOOPS */ -1: addi a10, a10, 4 // advance dst pointer +1: addi a10, a10, 4 /* advance dst pointer */ .Laligned: - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - bany a8, a7, 1b // if byte 3 is zero + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + bany a8, a7, 1b /* if byte 3 is zero */ #endif /* !XCHAL_HAVE_LOOPS */ .Lz3: /* Byte 3 is zero. */ @@ -133,8 +133,8 @@ ENTRY (strcpy) .Ldstunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 2f // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 2f /* loop forever (almost anyway) */ #endif 1: l8ui a8, a3, 0 addi a3, a3, 1 diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S index dd72c16fa..9ee4995f4 100644 --- a/libc/string/xtensa/strlen.S +++ b/libc/string/xtensa/strlen.S @@ -36,7 +36,7 @@ ENTRY (strlen) /* a2 = s */ - addi a3, a2, -4 // because we overincrement at the end + addi a3, a2, -4 /* because we overincrement at the end */ movi a4, MASK0 movi a5, MASK1 movi a6, MASK2 @@ -45,21 +45,21 @@ ENTRY (strlen) bbsi.l a2, 1, .L2mod4 j .Laligned -.L1mod2: // address is odd - l8ui a8, a3, 4 // get byte 0 - addi a3, a3, 1 // advance string pointer - beqz a8, .Lz3 // if byte 0 is zero - bbci.l a3, 1, .Laligned // if string pointer is now word-aligned +.L1mod2: /* address is odd */ + l8ui a8, a3, 4 /* get byte 0 */ + addi a3, a3, 1 /* advance string pointer */ + beqz a8, .Lz3 /* if byte 0 is zero */ + bbci.l a3, 1, .Laligned /* if string pointer is now word-aligned */ -.L2mod4: // address is 2 mod 4 - addi a3, a3, 2 // advance ptr for aligned access - l32i a8, a3, 0 // get word with first two bytes of string - bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero - bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero +.L2mod4: /* address is 2 mod 4 */ + addi a3, a3, 2 /* advance ptr for aligned access */ + l32i a8, a3, 0 /* get word with first two bytes of string */ + bnone a8, a6, .Lz2 /* if byte 2 (of word, not string) is zero */ + bany a8, a7, .Laligned /* if byte 3 (of word, not string) is nonzero */ /* Byte 3 is zero. */ - addi a3, a3, 3 // point to zero byte - sub a2, a3, a2 // subtract to get length + addi a3, a3, 3 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ retw @@ -69,36 +69,36 @@ ENTRY (strlen) /* (2 mod 4) alignment for loop instruction */ .Laligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lz3 // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lz3 /* loop forever (almost anyway) */ #endif -1: l32i a8, a3, 4 // get next word of string - addi a3, a3, 4 // advance string pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero +1: l32i a8, a3, 4 /* get next word of string */ + addi a3, a3, 4 /* advance string pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ #if XCHAL_HAVE_LOOPS - bnone a8, a7, .Lz3 // if byte 3 is zero + bnone a8, a7, .Lz3 /* if byte 3 is zero */ #else - bany a8, a7, 1b // repeat if byte 3 is non-zero + bany a8, a7, 1b /* repeat if byte 3 is non-zero */ #endif .Lz3: /* Byte 3 is zero. */ - addi a3, a3, 3 // point to zero byte + addi a3, a3, 3 /* point to zero byte */ /* Fall through.... */ .Lz0: /* Byte 0 is zero. */ - sub a2, a3, a2 // subtract to get length + sub a2, a3, a2 /* subtract to get length */ retw .Lz1: /* Byte 1 is zero. */ - addi a3, a3, 1 // point to zero byte - sub a2, a3, a2 // subtract to get length + addi a3, a3, 1 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ retw .Lz2: /* Byte 2 is zero. */ - addi a3, a3, 2 // point to zero byte - sub a2, a3, a2 // subtract to get length + addi a3, a3, 2 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ retw libc_hidden_def (strlen) diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S index 7ba2ef77d..fe3ec894c 100644 --- a/libc/string/xtensa/strncpy.S +++ b/libc/string/xtensa/strncpy.S @@ -41,29 +41,29 @@ .literal_position __strncpy_aux: -.Lsrc1mod2: // src address is odd - l8ui a8, a3, 0 // get byte 0 - addi a3, a3, 1 // advance src pointer - s8i a8, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer - beqz a8, .Lfill // if byte 0 is zero - bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned - -.Lsrc2mod4: // src address is 2 mod 4 - l8ui a8, a3, 0 // get byte 0 - addi a4, a4, -1 // decrement n - s8i a8, a10, 0 // store byte 0 - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer - beqz a8, .Lfill // if byte 0 is zero - l8ui a8, a3, 1 // get byte 0 - addi a3, a3, 2 // advance src pointer - s8i a8, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer +.Lsrc1mod2: /* src address is odd */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a3, a3, 1 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + beqz a8, .Lfill /* if byte 0 is zero */ + bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */ + +.Lsrc2mod4: /* src address is 2 mod 4 */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a4, a4, -1 /* decrement n */ + s8i a8, a10, 0 /* store byte 0 */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + beqz a8, .Lfill /* if byte 0 is zero */ + l8ui a8, a3, 1 /* get byte 0 */ + addi a3, a3, 2 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ bnez a8, .Lsrcaligned j .Lfill @@ -74,8 +74,8 @@ __strncpy_aux: ENTRY (strncpy) /* a2 = dst, a3 = src */ - mov a10, a2 // leave dst in return value register - beqz a4, .Lret // if n is zero + mov a10, a2 /* leave dst in return value register */ + beqz a4, .Lret /* if n is zero */ movi a11, MASK0 movi a5, MASK1 @@ -125,28 +125,28 @@ ENTRY (strncpy) .Lfillcleanup: /* Fill leftover (1 to 3) bytes with zero. */ - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ addi a10, a10, 1 - bnez a4, .Lfillcleanup + bnez a4, .Lfillcleanup 2: retw - -.Lfill1mod2: // dst address is odd - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - addi a10, a10, 1 // advance dst pointer - bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned - -.Lfill2mod4: // dst address is 2 mod 4 - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - s8i a9, a10, 1 // store byte 1 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - addi a10, a10, 2 // advance dst pointer + +.Lfill1mod2: /* dst address is odd */ + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + bbci.l a10, 1, .Lfillaligned /* if dst is now word-aligned */ + +.Lfill2mod4: /* dst address is 2 mod 4 */ + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + s8i a9, a10, 1 /* store byte 1 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + addi a10, a10, 2 /* advance dst pointer */ j .Lfillaligned @@ -156,32 +156,32 @@ ENTRY (strncpy) /* (2 mod 4) alignment for loop instruction */ .Laligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 1f // loop forever (almost anyway) - blti a4, 5, .Ldstunaligned // n is near limit; do one at a time - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a11, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - addi a4, a4, -4 // decrement n - addi a10, a10, 4 // advance dst pointer - bnone a8, a7, .Lfill // if byte 3 is zero -1: + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 1f /* loop forever (almost anyway) */ + blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a11, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + addi a4, a4, -4 /* decrement n */ + addi a10, a10, 4 /* advance dst pointer */ + bnone a8, a7, .Lfill /* if byte 3 is zero */ +1: #else /* !XCHAL_HAVE_LOOPS */ -1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a11, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - addi a4, a4, -4 // decrement n - addi a10, a10, 4 // advance dst pointer - bany a8, a7, 1b // no zeroes +1: blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a11, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + addi a4, a4, -4 /* decrement n */ + addi a10, a10, 4 /* advance dst pointer */ + bany a8, a7, 1b /* no zeroes */ #endif /* !XCHAL_HAVE_LOOPS */ j .Lfill @@ -191,8 +191,8 @@ ENTRY (strncpy) movi a8, 0 #endif s8i a8, a10, 0 - addi a4, a4, -1 // decrement n - addi a10, a10, 1 // advance dst pointer + addi a4, a4, -1 /* decrement n */ + addi a10, a10, 1 /* advance dst pointer */ j .Lfill .Lz1: /* Byte 1 is zero. */ @@ -200,8 +200,8 @@ ENTRY (strncpy) extui a8, a8, 16, 16 #endif s16i a8, a10, 0 - addi a4, a4, -2 // decrement n - addi a10, a10, 2 // advance dst pointer + addi a4, a4, -2 /* decrement n */ + addi a10, a10, 2 /* advance dst pointer */ j .Lfill .Lz2: /* Byte 2 is zero. */ @@ -211,8 +211,8 @@ ENTRY (strncpy) s16i a8, a10, 0 movi a8, 0 s8i a8, a10, 2 - addi a4, a4, -3 // decrement n - addi a10, a10, 3 // advance dst pointer + addi a4, a4, -3 /* decrement n */ + addi a10, a10, 3 /* advance dst pointer */ j .Lfill .align 4 @@ -220,8 +220,8 @@ ENTRY (strncpy) .Ldstunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 2f // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 2f /* loop forever (almost anyway) */ #endif 1: l8ui a8, a3, 0 addi a3, a3, 1 |