diff options
Diffstat (limited to 'libc/string/ia64')
-rw-r--r-- | libc/string/ia64/bzero.S | 136 | ||||
-rw-r--r-- | libc/string/ia64/memccpy.S | 102 | ||||
-rw-r--r-- | libc/string/ia64/memchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/memcmp.S | 94 | ||||
-rw-r--r-- | libc/string/ia64/memcpy.S | 160 | ||||
-rw-r--r-- | libc/string/ia64/memmove.S | 170 | ||||
-rw-r--r-- | libc/string/ia64/memset.S | 178 | ||||
-rw-r--r-- | libc/string/ia64/strchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/strcmp.S | 2 | ||||
-rw-r--r-- | libc/string/ia64/strcpy.S | 66 | ||||
-rw-r--r-- | libc/string/ia64/strlen.S | 18 | ||||
-rw-r--r-- | libc/string/ia64/strncmp.S | 10 | ||||
-rw-r--r-- | libc/string/ia64/strncpy.S | 90 |
13 files changed, 545 insertions, 545 deletions
diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S index d390838a6..1f0f8b7ac 100644 --- a/libc/string/ia64/bzero.S +++ b/libc/string/ia64/bzero.S @@ -47,13 +47,13 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_unalgn p9 #define p_y p11 #define p_n p12 @@ -65,7 +65,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -87,49 +87,49 @@ ENTRY(bzero) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value + mov ret0 = dest /* return value */ nop.m 0 cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest nop.i 0 -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 - sub bytecnt = (MIN1+1), tmp // higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 */ + sub bytecnt = (MIN1+1), tmp /* higher than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = r0,-4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = r0,-2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi nop.m 0 (p_y) st2 [ptr2] = r0,-1 @@ -138,44 +138,44 @@ ENTRY(bzero) { .mmi (p_yy) st1 [ptr2] = r0 - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib - nop.m 0 + nop.m 0 shr.u linecnt = cnt, LSIZE_SH nop.b 0 ;; } .align 32 -.l1b: // ------------------// L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -190,7 +190,7 @@ ENTRY(bzero) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -198,14 +198,14 @@ ENTRY(bzero) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -213,11 +213,11 @@ ENTRY(bzero) (p_scr) br.cond.dpnt.many .store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } .align 32 -.l2: // -----------------------------// L2A: store 32B in 2 cycles +.l2: /* ----------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -228,38 +228,38 @@ ENTRY(bzero) ;; } .store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store +(p_y) store [ptr1] = myval, 8 /* store */ (p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store +{ .mmi /* store */ (p_yy) store [ptr1] = myval, 8 -(p_yy) add cnt = -8, cnt // subtract +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = r0,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = r0,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? + tbit.nz.unc p_y, p0 = cnt, 0 /* should we terminate with a st1 ? */ ;; } { .mib @@ -281,38 +281,38 @@ ENTRY(bzero) (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = r0, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = r0, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = r0 // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = r0 // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = r0 /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = r0 /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(bzero) diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S index 1afba3637..259d680bc 100644 --- a/libc/string/ia64/memccpy.S +++ b/libc/string/ia64/memccpy.S @@ -23,7 +23,7 @@ Inputs: in0: dest in1: src - in2: char + in2: char in3: byte count This implementation assumes little endian mode (UM.be = 0). @@ -69,75 +69,75 @@ ENTRY(memccpy) .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] .rotp p[MEMLAT + 6 + 1] - mov ret0 = r0 // return NULL if no match + mov ret0 = r0 /* return NULL if no match */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers - mov dest = in0 // dest + mov saved_pr = pr /* save the predicate registers */ + mov dest = in0 /* dest */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov saved_ec = ar.ec // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ + mov saved_ec = ar.ec /* save the loop counter */ .body - mov src = in1 // src - extr.u char = in2, 0, 8 // char - mov len = in3 // len - sub tmp = r0, in0 // tmp = -dest - cmp.ne p7, p0 = r0, r0 // clear p7 + mov src = in1 /* src */ + extr.u char = in2, 0, 8 /* char */ + mov len = in3 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - mov ar.ec = 0 // ec not guaranteed zero on entry -(p6) br.cond.spnt .cpyfew // copy byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + mov ar.ec = 0 /* ec not guaranteed zero on entry */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 mux1 charx8 = char, @brcst (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ cmp.eq p6, p0 = value, char (p6) br.cond.spnt .foundit br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 6 + 1 // six more passes needed - ld8 r[1] = [asrc], 8 // r[1] = w0 - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 6 + 1 /* six more passes needed */ + ld8 r[1] = [asrc], 8 /* r[1] = w0 */ + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ ALIGN(32) .l2: -(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[0]) ld8.s r[0] = [asrc], 8 /* r[0] = w1 */ +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) xor tmp3[0] = val[1], charx8 (p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1] -(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't - // valid - rollback! +(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 /* our data isn't */ + /* valid - rollback! */ (p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1] (p6) br.cond.spnt .gotit -(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+6]) st8 [dest] = val[3], 8 /* store val to dest */ +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ br.ctop.sptk .l2 br.cond.sptk .cpyfew .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 2 + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 2 + 1 ;; /* set EC */ .l3: (p[0]) ld8.s r[0] = [src], 8 (p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8 @@ -149,8 +149,8 @@ ENTRY(memccpy) (p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -163,14 +163,14 @@ ENTRY(memccpy) .foundit: (p6) mov ret0 = dest .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter - mov ar.ec = saved_ec ;; // restore the epilog counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ + mov ar.ec = saved_ec ;; /* restore the epilog counter */ br.ret.sptk.many b0 .gotit: .pred.rel "mutex" p6, p7 -(p6) mov value = val[3] // if coming from l2 -(p7) mov value = r[MEMLAT+2] // if coming from l3 +(p6) mov value = val[3] /* if coming from l2 */ +(p7) mov value = r[MEMLAT+2] /* if coming from l3 */ mov ar.lc = pos0[1] ;; .l5: extr.u tmp = value, 0, 8 ;; diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S index 2bf078fe6..0246b5997 100644 --- a/libc/string/ia64/memchr.S +++ b/libc/string/ia64/memchr.S @@ -62,18 +62,18 @@ ENTRY(__memchr) .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2] .rotp p[MEMLAT+3] .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .save pr, saved_pr - mov saved_pr = pr // save the predicates + mov saved_pr = pr /* save the predicates */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 - cmp.ne p7, p0 = r0, r0 // clear p7 - extr.u chr = in1, 0, 8 // chr = (unsigned char) in1 + and tmp = 7, str /* tmp = str % 8 */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */ mov len = in2 - cmp.gtu p6, p0 = 16, in2 // use a simple loop for short -(p6) br.cond.spnt .srchfew ;; // searches - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */ +(p6) br.cond.spnt .srchfew ;; /* searches */ + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; sub len = len, loopcnt @@ -86,12 +86,12 @@ ENTRY(__memchr) (p6) br.cond.spnt .foundit br.cloop.sptk .l1 ;; .str_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // remaining len = len & 7 + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* remaining len = len & 7 */ adds loopcnt = -1, loopcnt mov ar.ec = MEMLAT + 3 - mux1 chrx8 = chr, @brcst ;; // get a word full of chr + mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */ mov ar.lc = loopcnt mov pr.rot = 1 << 16 ;; .l2: @@ -114,12 +114,12 @@ ENTRY(__memchr) (p6) br.cond.dpnt .foundit br.cloop.sptk .l3 ;; .notfound: - cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here) - mov ret0 = r0 ;; // return NULL + cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */ + mov ret0 = r0 ;; /* return NULL */ .foundit: .pred.rel "mutex" p6, p7 -(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3 -(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2 +(p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */ +(p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */ mov pr = saved_pr, -1 mov ar.lc = saved_lc br.ret.sptk.many b0 diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S index 8b0c096ce..adb1a20de 100644 --- a/libc/string/ia64/memcmp.S +++ b/libc/string/ia64/memcmp.S @@ -28,7 +28,7 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 + shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 shr.u tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 >> sh2 and all the mux1 instructions should be replaced by plain mov's. */ @@ -36,8 +36,8 @@ #include "sysdep.h" #undef ret -#define OP_T_THRES 16 -#define OPSIZ 8 +#define OP_T_THRES 16 +#define OPSIZ 8 #define MEMLAT 2 #define start r15 @@ -56,85 +56,85 @@ ENTRY(memcmp) .prologue - alloc r2 = ar.pfs, 3, 37, 0, 40 + alloc r2 = ar.pfs, 3, 37, 0, 40 .rotr r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2] .rotp p[MEMLAT + 4 + 1] - mov ret0 = r0 // by default return value = 0 + mov ret0 = r0 /* by default return value = 0 */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp = r0, in0 // tmp = -dest + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES -(p6) br.cond.spnt .cmpfew // compare byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ +(p6) br.cond.spnt .cmpfew /* compare byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value1 = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value1 = [src], 1 /* value = *src++ */ ld1 value2 = [dest], 1 ;; cmp.ne p6, p0 = value1, value2 (p6) br.cond.spnt .done br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 4 + 1 // four more passes needed - ld8 r[1] = [asrc], 8 ;; // r[1] = w0 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 4 + 1 /* four more passes needed */ + ld8 r[1] = [asrc], 8 ;; /* r[1] = w0 */ .align 32 -// We enter this loop with p6 cleared by the above comparison +/* We enter this loop with p6 cleared by the above comparison */ .l2: -(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1 +(p[0]) ld8 r[0] = [asrc], 8 /* r[0] = w1 */ (p[0]) ld8 q[0] = [dest], 8 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) cmp.ne p6, p0 = q[MEMLAT + 4], val[1] -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ (p6) br.cond.spnt .l2exit br.ctop.sptk .l2 br.cond.sptk .cmpfew .l3exit: mux1 value1 = r[MEMLAT], @rev mux1 value2 = q[MEMLAT], @rev - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ .l2exit: (p6) mux1 value1 = val[1], @rev (p6) mux1 value2 = q[MEMLAT + 4], @rev ;; cmp.ltu p6, p7 = value2, value1 ;; (p6) mov ret0 = -1 (p7) mov ret0 = 1 - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 1 ;; /* set EC */ .l3: (p[0]) ld8 r[0] = [src], 8 (p[0]) ld8 q[0] = [dest], 8 @@ -142,8 +142,8 @@ ENTRY(memcmp) (p6) br.cond.spnt .l3exit br.ctop.dptk .l3 ;; .cmpfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -154,10 +154,10 @@ ENTRY(memcmp) (p6) br.cond.spnt .done br.cloop.dptk .l4 ;; .done: -(p6) sub ret0 = value2, value1 // don't execute it if falling thru +(p6) sub ret0 = value2, value1 /* don't execute it if falling thru */ .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 END(memcmp) libc_hidden_def (memcmp) diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S index 810eb0c0e..6c48a72d9 100644 --- a/libc/string/ia64/memcpy.S +++ b/libc/string/ia64/memcpy.S @@ -42,8 +42,8 @@ #define LFETCH_DIST 500 -#define ALIGN_UNROLL_no 4 // no. of elements -#define ALIGN_UNROLL_sh 2 // (shift amount) +#define ALIGN_UNROLL_no 4 /* no. of elements */ +#define ALIGN_UNROLL_sh 2 /* (shift amount) */ #define MEMLAT 8 #define Nrot ((4*(MEMLAT+2) + 7) & ~7) @@ -168,76 +168,76 @@ ENTRY(memcpy) .rotr r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] .rotp p[MEMLAT+2] .rotf fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] - mov ret0 = in0 // return tmp2 = dest + mov ret0 = in0 /* return tmp2 = dest */ .save pr, saved_pr - movi0 saved_pr = pr // save the predicate registers + movi0 saved_pr = pr /* save the predicate registers */ } { .mmi - and tmp4 = 7, in0 // check if destination is aligned - mov dest = in0 // dest - mov src = in1 // src + and tmp4 = 7, in0 /* check if destination is aligned */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ ;; } { .mii - cmp.eq p_scr, p0 = in2, r0 // if (len == 0) + cmp.eq p_scr, p0 = in2, r0 /* if (len == 0) */ .save ar.lc, saved_lc - movi0 saved_lc = ar.lc // save the loop counter + movi0 saved_lc = ar.lc /* save the loop counter */ .body - cmp.ge p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH + cmp.ge p_few, p0 = OP_T_THRES, in2 /* is len <= OP_T_THRESH */ } { .mbb - mov len = in2 // len -(p_scr) br.cond.dpnt.few .restore_and_exit // Branch no. 1: return dest -(p_few) br.cond.dpnt.many .copy_bytes // Branch no. 2: copy byte by byte + mov len = in2 /* len */ +(p_scr) br.cond.dpnt.few .restore_and_exit /* Branch no. 1: return dest */ +(p_few) br.cond.dpnt.many .copy_bytes /* Branch no. 2: copy byte by byte */ ;; } { .mmi #if defined(USE_LFETCH) - lfetch.nt1 [dest] // - lfetch.nt1 [src] // + lfetch.nt1 [dest] /* */ + lfetch.nt1 [src] /* */ #endif - shr.u elemcnt = len, 3 // elemcnt = len / 8 + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ } { .mib - cmp.eq p_scr, p0 = tmp4, r0 // is destination aligned? - sub loopcnt = 7, tmp4 // + cmp.eq p_scr, p0 = tmp4, r0 /* is destination aligned? */ + sub loopcnt = 7, tmp4 /* */ (p_scr) br.cond.dptk.many .dest_aligned ;; } { .mmi - ld1 tmp2 = [src], 1 // - sub len = len, loopcnt, 1 // reduce len - movi0 ar.lc = loopcnt // + ld1 tmp2 = [src], 1 /* */ + sub len = len, loopcnt, 1 /* reduce len */ + movi0 ar.lc = loopcnt /* */ } { .mib - cmp.ne p_scr, p0 = 0, loopcnt // avoid loading beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid loading beyond end-point */ ;; } -.l0: // ---------------------------- // L0: Align src on 8-byte boundary +.l0: /* ---------------------------- L0: Align src on 8-byte boundary */ { .mmi - st1 [dest] = tmp2, 1 // -(p_scr) ld1 tmp2 = [src], 1 // + st1 [dest] = tmp2, 1 /* */ +(p_scr) ld1 tmp2 = [src], 1 /* */ } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt - br.cloop.dptk.few .l0 // + br.cloop.dptk.few .l0 /* */ ;; } .dest_aligned: { .mmi - and tmp4 = 7, src // ready for alignment check - shr.u elemcnt = len, 3 // elemcnt = len / 8 + and tmp4 = 7, src /* ready for alignment check */ + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ ;; } { .mib - cmp.ne p_scr, p0 = tmp4, r0 // is source also aligned - tbit.nz p_xtr, p_nxtr = src, 3 // prepare a separate move if src -} { .mib // is not 16B aligned - add ptr2 = LFETCH_DIST, dest // prefetch address + cmp.ne p_scr, p0 = tmp4, r0 /* is source also aligned */ + tbit.nz p_xtr, p_nxtr = src, 3 /* prepare a separate move if src */ +} { .mib /* is not 16B aligned */ + add ptr2 = LFETCH_DIST, dest /* prefetch address */ add ptr1 = LFETCH_DIST, src (p_scr) br.cond.dptk.many .src_not_aligned ;; } -// The optimal case, when dest, and src are aligned +/* The optimal case, when dest, and src are aligned */ .both_aligned: { .mmi .pred.rel "mutex",p_xtr,p_nxtr -(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify -(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt // Need only N to qualify - movi0 pr.rot = 1 << 16 // set rotating predicates +(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt /* Need N + 1 to qualify */ +(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt /* Need only N to qualify */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ } { .mib (p_scr) br.cond.dpnt.many .copy_full_words ;; } @@ -245,21 +245,21 @@ ENTRY(memcpy) { .mmi (p_xtr) load tempreg = [src], 8 (p_xtr) add elemcnt = -1, elemcnt - movi0 ar.ec = MEMLAT + 1 // set the epilog counter + movi0 ar.ec = MEMLAT + 1 /* set the epilog counter */ ;; } { .mmi -(p_xtr) add len = -8, len // - add asrc = 16, src // one bank apart (for USE_INT) - shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh // cater for unrolling +(p_xtr) add len = -8, len /* */ + add asrc = 16, src /* one bank apart (for USE_INT) */ + shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh /* cater for unrolling */ ;;} { .mmi add loopcnt = -1, loopcnt -(p_xtr) store [dest] = tempreg, 8 // copy the "extra" word +(p_xtr) store [dest] = tempreg, 8 /* copy the "extra" word */ nop.i 0 ;; } { .mib add adest = 16, dest - movi0 ar.lc = loopcnt // set the loop counter + movi0 ar.lc = loopcnt /* set the loop counter */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -268,7 +268,7 @@ ENTRY(memcpy) .align 32 #endif #if defined(USE_FLP) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi #if defined(USE_LFETCH) (p[0]) lfetch.nt1 [ptr2],32 @@ -290,7 +290,7 @@ ENTRY(memcpy) br.ctop.dptk.many .l1 ;; } #elif defined(USE_INT) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi (p[0]) load the_r[0] = [src], 8 (p[0]) load the_q[0] = [asrc], 8 @@ -317,58 +317,58 @@ ENTRY(memcpy) .copy_full_words: { .mib - cmp.gt p_scr, p0 = 8, len // - shr.u elemcnt = len, 3 // + cmp.gt p_scr, p0 = 8, len /* */ + shr.u elemcnt = len, 3 /* */ (p_scr) br.cond.dpnt.many .copy_bytes ;; } { .mii load tempreg = [src], 8 - add loopcnt = -1, elemcnt // + add loopcnt = -1, elemcnt /* */ ;; } { .mii - cmp.ne p_scr, p0 = 0, loopcnt // - mov ar.lc = loopcnt // + cmp.ne p_scr, p0 = 0, loopcnt /* */ + mov ar.lc = loopcnt /* */ ;; } -.l2: // ------------------------------- // L2: Max 4 words copied separately +.l2: /* ------------------------------- L2: Max 4 words copied separately */ { .mmi store [dest] = tempreg, 8 -(p_scr) load tempreg = [src], 8 // +(p_scr) load tempreg = [src], 8 /* */ add len = -8, len } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l2 ;; } .copy_bytes: { .mib - cmp.eq p_scr, p0 = len, r0 // is len == 0 ? - add loopcnt = -1, len // len--; + cmp.eq p_scr, p0 = len, r0 /* is len == 0 ? */ + add loopcnt = -1, len /* len--; */ (p_scr) br.cond.spnt .restore_and_exit ;; } { .mii ld1 tmp2 = [src], 1 movi0 ar.lc = loopcnt - cmp.ne p_scr, p0 = 0, loopcnt // avoid load beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid load beyond end-point */ ;; } -.l3: // ------------------------------- // L3: Final byte move +.l3: /* ------------------------------- L3: Final byte move */ { .mmi st1 [dest] = tmp2, 1 (p_scr) ld1 tmp2 = [src], 1 } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l3 ;; } .restore_and_exit: { .mmi - movi0 pr = saved_pr, -1 // restore the predicate registers + movi0 pr = saved_pr, -1 /* restore the predicate registers */ ;; } { .mib - movi0 ar.lc = saved_lc // restore the loop counter + movi0 ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 ;; } @@ -376,41 +376,41 @@ ENTRY(memcpy) .src_not_aligned: { .mmi cmp.gt p_scr, p0 = 16, len - and sh1 = 7, src // sh1 = src % 8 - shr.u loopcnt = len, 4 // element-cnt = len / 16 + and sh1 = 7, src /* sh1 = src % 8 */ + shr.u loopcnt = len, 4 /* element-cnt = len / 16 */ } { .mib add tmp4 = @ltoff(.table), gp add tmp3 = @ltoff(.loop56), gp -(p_scr) br.cond.dpnt.many .copy_bytes // do byte by byte if too few +(p_scr) br.cond.dpnt.many .copy_bytes /* do byte by byte if too few */ ;; } { .mmi - and asrc = -8, src // asrc = (-8) -- align src for loop - add loopcnt = -1, loopcnt // loopcnt-- - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) + and asrc = -8, src /* asrc = (-8) -- align src for loop */ + add loopcnt = -1, loopcnt /* loopcnt-- */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ } { .mmi - ld8 ptable = [tmp4] // ptable = &table - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - and tmp2 = -16, len // tmp2 = len & -OPSIZ + ld8 ptable = [tmp4] /* ptable = &table */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + and tmp2 = -16, len /* tmp2 = len & -OPSIZ */ ;; } { .mmi - add tmp3 = ptable, sh1 // tmp3 = &table + sh1 - add src = src, tmp2 // src += len & (-16) - movi0 ar.lc = loopcnt // set LC + add tmp3 = ptable, sh1 /* tmp3 = &table + sh1 */ + add src = src, tmp2 /* src += len & (-16) */ + movi0 ar.lc = loopcnt /* set LC */ ;; } { .mmi - ld8 tmp4 = [tmp3] // tmp4 = loop offset - sub len = len, tmp2 // len -= len & (-16) - movi0 ar.ec = MEMLAT + 2 // one more pass needed + ld8 tmp4 = [tmp3] /* tmp4 = loop offset */ + sub len = len, tmp2 /* len -= len & (-16) */ + movi0 ar.ec = MEMLAT + 2 /* one more pass needed */ ;; } { .mmi - ld8 s[1] = [asrc], 8 // preload - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - movi0 pr.rot = 1 << 16 // set rotating predicates + ld8 s[1] = [asrc], 8 /* preload */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ ;; } { .mib nop.m 0 movi0 b6 = loopaddr - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ ;; } LOOP(8) @@ -426,7 +426,7 @@ libc_hidden_def (memcpy) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S index 00342d8e0..beaada6fc 100644 --- a/libc/string/ia64/memmove.S +++ b/libc/string/ia64/memmove.S @@ -81,48 +81,48 @@ ENTRY(memmove) alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot .rotr r[MEMLAT + 2], q[MEMLAT + 1] .rotp p[MEMLAT + 2] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - or tmp3 = in0, in1 ;; // tmp3 = dest | src - or tmp3 = tmp3, in2 // tmp3 = dest | src | len - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp2 = r0, in0 // tmp2 = -dest - cmp.eq p6, p0 = in2, r0 // if (len == 0) -(p6) br.cond.spnt .restore_and_exit;;// return dest; - and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7 - cmp.le p6, p0 = dest, src // if dest <= src it's always safe -(p6) br.cond.spnt .forward // to copy forward + or tmp3 = in0, in1 ;; /* tmp3 = dest | src */ + or tmp3 = tmp3, in2 /* tmp3 = dest | src | len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp2 = r0, in0 /* tmp2 = -dest */ + cmp.eq p6, p0 = in2, r0 /* if (len == 0) */ +(p6) br.cond.spnt .restore_and_exit;;/* return dest; */ + and tmp4 = 7, tmp3 /* tmp4 = (dest | src | len) & 7 */ + cmp.le p6, p0 = dest, src /* if dest <= src it's always safe */ +(p6) br.cond.spnt .forward /* to copy forward */ add tmp3 = src, len;; - cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len -(p6) br.cond.spnt .backward // we have to copy backward + cmp.lt p6, p0 = dest, tmp3 /* if dest > src && dest < src + len */ +(p6) br.cond.spnt .backward /* we have to copy backward */ .forward: - shr.u loopcnt = len, 4 ;; // loopcnt = len / 16 - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .next // goto next; + shr.u loopcnt = len, 4 ;; /* loopcnt = len / 16 */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .next /* goto next; */ -// The optimal case, when dest, src and len are all multiples of 8 +/* The optimal case, when dest, src and len are all multiples of 8 */ and tmp3 = 0xf, len - mov pr.rot = 1 << 16 // set rotating predicates - mov ar.ec = MEMLAT + 1 ;; // set the epilog counter - cmp.ne p6, p0 = tmp3, r0 // do we have to copy an extra word? - adds loopcnt = -1, loopcnt;; // --loopcnt + mov pr.rot = 1 << 16 /* set rotating predicates */ + mov ar.ec = MEMLAT + 1 ;; /* set the epilog counter */ + cmp.ne p6, p0 = tmp3, r0 /* do we have to copy an extra word? */ + adds loopcnt = -1, loopcnt;; /* --loopcnt */ (p6) ld8 value = [src], 8;; -(p6) st8 [dest] = value, 8 // copy the "odd" word - mov ar.lc = loopcnt // set the loop counter +(p6) st8 [dest] = value, 8 /* copy the "odd" word */ + mov ar.lc = loopcnt /* set the loop counter */ cmp.eq p6, p0 = 8, len -(p6) br.cond.spnt .restore_and_exit;;// the one-word special case - adds adest = 8, dest // set adest one word ahead of dest - adds asrc = 8, src ;; // set asrc one word ahead of src - nop.b 0 // get the "golden" alignment for - nop.b 0 // the next loop +(p6) br.cond.spnt .restore_and_exit;;/* the one-word special case */ + adds adest = 8, dest /* set adest one word ahead of dest */ + adds asrc = 8, src ;; /* set asrc one word ahead of src */ + nop.b 0 /* get the "golden" alignment for */ + nop.b 0 /* the next loop */ .l0: (p[0]) ld8 r[0] = [src], 16 (p[0]) ld8 q[0] = [asrc], 16 @@ -130,50 +130,50 @@ ENTRY(memmove) (p[MEMLAT]) st8 [adest] = q[MEMLAT], 16 br.ctop.dptk .l0 ;; - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .next: - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - and loopcnt = 7, tmp2 // loopcnt = -dest % 8 -(p6) br.cond.spnt .cpyfew // copy byte by byte + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + and loopcnt = 7, tmp2 /* loopcnt = -dest % 8 */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp2 = -8, len // tmp2 = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len;; // len = len % 8 - adds loopcnt = -1, loopcnt // --loopcnt + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp2 = -8, len /* tmp2 = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len;; /* len = len % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ addl tmp4 = @ltoff(.table), gp addl tmp3 = @ltoff(.loop56), gp - mov ar.ec = MEMLAT + 1 // set EC - mov pr.rot = 1 << 16;; // set rotating predicates - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov ar.ec = MEMLAT + 1 /* set EC */ + mov pr.rot = 1 << 16;; /* set rotating predicates */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp2 // src += len & -OPSIZ - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - ld8 ptable = [tmp4];; // ptable = &table - add tmp3 = ptable, sh1;; // tmp3 = &table + sh1 - mov ar.ec = MEMLAT + 1 + 1 // one more pass needed - ld8 tmp4 = [tmp3];; // tmp4 = loop offset - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - ld8 r[1] = [asrc], 8;; // w0 + add src = src, tmp2 /* src += len & -OPSIZ */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + ld8 ptable = [tmp4];; /* ptable = &table */ + add tmp3 = ptable, sh1;; /* tmp3 = &table + sh1 */ + mov ar.ec = MEMLAT + 1 + 1 /* one more pass needed */ + ld8 tmp4 = [tmp3];; /* tmp4 = loop offset */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + ld8 r[1] = [asrc], 8;; /* w0 */ mov b6 = loopaddr;; - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ LOOP(8) LOOP(16) @@ -189,8 +189,8 @@ ENTRY(memmove) (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -199,36 +199,36 @@ ENTRY(memmove) st1 [dest] = value, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 -// In the case of a backward copy, optimise only the case when everything -// is a multiple of 8, otherwise copy byte by byte. The backward copy is -// used only when the blocks are overlapping and dest > src. - +/* In the case of a backward copy, optimise only the case when everything + is a multiple of 8, otherwise copy byte by byte. The backward copy is + used only when the blocks are overlapping and dest > src. +*/ .backward: - shr.u loopcnt = len, 3 // loopcnt = len / 8 - add src = src, len // src points one byte past the end - add dest = dest, len ;; // dest points one byte past the end - mov ar.ec = MEMLAT + 1 // set the epilog counter - mov pr.rot = 1 << 16 // set rotating predicates - adds loopcnt = -1, loopcnt // --loopcnt - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .bytecopy ;; // copy byte by byte backward - adds src = -8, src // src points to the last word - adds dest = -8, dest // dest points to the last word - mov ar.lc = loopcnt;; // set the loop counter + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + add src = src, len /* src points one byte past the end */ + add dest = dest, len ;; /* dest points one byte past the end */ + mov ar.ec = MEMLAT + 1 /* set the epilog counter */ + mov pr.rot = 1 << 16 /* set rotating predicates */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .bytecopy ;; /* copy byte by byte backward */ + adds src = -8, src /* src points to the last word */ + adds dest = -8, dest /* dest points to the last word */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l5: (p[0]) ld8 r[0] = [src], -8 (p[MEMLAT]) st8 [dest] = r[MEMLAT], -8 br.ctop.dptk .l5 br.cond.sptk .restore_and_exit .bytecopy: - adds src = -1, src // src points to the last byte - adds dest = -1, dest // dest points to the last byte - adds loopcnt = -1, len;; // loopcnt = len - 1 - mov ar.lc = loopcnt;; // set the loop counter + adds src = -1, src /* src points to the last byte */ + adds dest = -1, dest /* dest points to the last byte */ + adds loopcnt = -1, len;; /* loopcnt = len - 1 */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l6: (p[0]) ld1 r[0] = [src], -1 (p[MEMLAT]) st1 [dest] = r[MEMLAT], -1 @@ -239,7 +239,7 @@ END(memmove) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memset.S b/libc/string/ia64/memset.S index ed27f3f31..45df5838e 100644 --- a/libc/string/ia64/memset.S +++ b/libc/string/ia64/memset.S @@ -46,15 +46,15 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 #define fvalue f6 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_nz p7 #define p_zr p8 #define p_unalgn p9 @@ -68,7 +68,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -90,97 +90,97 @@ ENTRY(memset) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + mov ret0 = dest /* return value */ + cmp.ne p_nz, p_zr = value, r0 /* use stf.spill if value is zero */ cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 + mux1 value = value, @brcst /* create 8 identical bytes in word */ +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 higher - sub bytecnt = (MIN1+1), tmp // than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 higher */ + sub bytecnt = (MIN1+1), tmp /* than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = value, -4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = value, -2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi - setf.sig fvalue=value // transfer value to FLP side + setf.sig fvalue=value /* transfer value to FLP side */ (p_y) st2 [ptr2] = value, -1 (p_n) add ptr2 = 1, ptr2 ;; } { .mmi (p_yy) st1 [ptr2] = value - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib nop.m 0 shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +(p_zr) br.cond.dptk.many .l1b /* Jump to use stf.spill */ ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO - .align 32 // -------- // L1A: store ahead into cache lines; fill later + .align 32 /* -------- L1A: store ahead into cache lines; fill later */ #endif { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi -(p_scr) add loopcnt = -1, linecnt // start of stores - add ptr2 = 8, ptr1 // (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total -;; } // range +(p_scr) add loopcnt = -1, linecnt /* start of stores */ + add ptr2 = 8, ptr1 /* (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total */ +;; } /* range */ { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1a: { .mib - store [ptr9] = myval, 128 // Do stores one cache line apart + store [ptr9] = myval, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1a ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1ax: @@ -211,7 +211,7 @@ ENTRY(memset) { .mmi store [ptr2] = myval, 8 store [ptr0] = myval, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb store [ptr2] = myval, 24 @@ -219,9 +219,9 @@ ENTRY(memset) br.cloop.dptk.few .l1ax ;; } { .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 + cmp.le p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .fraction_of_line /* Branch no. 2 */ + br.cond.dpnt.many .move_bytes_from_alignment /* Branch no. 3 */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -229,32 +229,32 @@ ENTRY(memset) #else .align 32 #endif -.l1b: // ------------------ // L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -269,7 +269,7 @@ ENTRY(memset) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -277,14 +277,14 @@ ENTRY(memset) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -292,13 +292,13 @@ ENTRY(memset) (p_scr) br.cond.dpnt.many store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO .align 32 #endif -.l2: // ---------------------------- // L2A: store 32B in 2 cycles +.l2: /* ---------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -309,34 +309,34 @@ ENTRY(memset) ;; } store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt // -(p_y) add cnt = -8, cnt // subtract +(p_y) store [ptr1] = myval, 8 /* store */ +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt /* */ +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store -(p_yy) store [ptr1] = myval, 8 // -(p_yy) add cnt = -8, cnt // subtract +{ .mmi /* store */ +(p_yy) store [ptr1] = myval, 8 /* */ +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = value, 4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = value, 2 @@ -362,38 +362,38 @@ store_words: (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = value, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = value /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = value /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(memset) diff --git a/libc/string/ia64/strchr.S b/libc/string/ia64/strchr.S index 401a07941..66703f26d 100644 --- a/libc/string/ia64/strchr.S +++ b/libc/string/ia64/strchr.S @@ -49,15 +49,15 @@ ENTRY(strchr) .prologue alloc r2 = ar.pfs, 2, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 + and tmp = 7, str /* tmp = str % 8 */ mux1 chrx8 = chr, @brcst - extr.u chr = chr, 0, 8 // retain only the last byte - cmp.ne p8, p0 = r0, r0 // clear p8 + extr.u chr = chr, 0, 8 /* retain only the last byte */ + cmp.ne p8, p0 = r0, r0 /* clear p8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -75,10 +75,10 @@ ENTRY(strchr) nop.b 0 nop.b 0 .l2: - ld8.s val2 = [ret0], 8 // don't bomb out here + ld8.s val2 = [ret0], 8 /* don't bomb out here */ czx1.r pos0 = val1 - xor tmp = val1, chrx8 // if val1 contains chr, tmp will - ;; // contain a zero in its position + xor tmp = val1, chrx8 /* if val1 contains chr, tmp will */ + ;; /* contain a zero in its position */ czx1.r poschr = tmp cmp.ne p6, p0 = 8, pos0 ;; @@ -90,21 +90,21 @@ ENTRY(strchr) mov val1 = val2 br.cond.dptk .l2 .foundit: -(p6) cmp.lt p8, p0 = pos0, poschr // we found chr and null in the word -(p8) br.cond.spnt .notfound // null was found before chr +(p6) cmp.lt p8, p0 = pos0, poschr /* we found chr and null in the word */ +(p8) br.cond.spnt .notfound /* null was found before chr */ add ret0 = ret0, poschr ;; - adds ret0 = -15, ret0 ;; // should be -16, but we decrement -.restore_and_exit: // ret0 in the next instruction - adds ret0 = -1, ret0 // ret0 was pointing 1 char too far - mov ar.lc = saved_lc // restore the loop counter + adds ret0 = -15, ret0 ;; /* should be -16, but we decrement */ +.restore_and_exit: /* ret0 in the next instruction */ + adds ret0 = -1, ret0 /* ret0 was pointing 1 char too far */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .notfound: - mov ret0 = r0 // return NULL if null was found + mov ret0 = r0 /* return NULL if null was found */ mov ar.lc = saved_lc br.ret.sptk.many b0 .recovery: adds ret0 = -8, ret0;; - ld8 val2 = [ret0], 8 // bomb out here + ld8 val2 = [ret0], 8 /* bomb out here */ br.cond.sptk .back END(strchr) libc_hidden_def (strchr) diff --git a/libc/string/ia64/strcmp.S b/libc/string/ia64/strcmp.S index d3b41e642..4da72fa10 100644 --- a/libc/string/ia64/strcmp.S +++ b/libc/string/ia64/strcmp.S @@ -42,7 +42,7 @@ ENTRY(strcmp) .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - cmp.eq p6, p0 = r0, r0 // set p6 + cmp.eq p6, p0 = r0, r0 /* set p6 */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 diff --git a/libc/string/ia64/strcpy.S b/libc/string/ia64/strcpy.S index e4a9915ca..7b002f661 100644 --- a/libc/string/ia64/strcpy.S +++ b/libc/string/ia64/strcpy.S @@ -27,8 +27,8 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl value = r[1], sh1 // value = w0 << sh1 - shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 + shl value = r[1], sh1 // value = w0 << sh1 + shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 */ #include "sysdep.h" @@ -53,62 +53,62 @@ ENTRY(strcpy) .prologue - alloc r2 = ar.pfs, 2, 0, 30, 32 + alloc r2 = ar.pfs, 2, 0, 30, 32 #define MEMLAT 2 .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - sub tmp = r0, in0 ;; // tmp = -dest - mov dest = in0 // dest - mov src = in1 // src - and loopcnt = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and loopcnt = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p0 = loopcnt, r0 - adds loopcnt = -1, loopcnt // --loopcnt + adds loopcnt = -1, loopcnt /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.eq p6, p0 = c, r0 (p6) br.cond.dpnt .restore_and_exit br.cloop.dptk .l1 ;; .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - mov ar.lc = -1 // "infinite" loop - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src + and sh1 = 7, src /* sh1 = src % 8 */ + mov ar.lc = -1 /* "infinite" loop */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - cmp.ne p7, p0 = r0, r0 // clear p7 - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; ld8 r[1] = [asrc],8 ;; .align 32 .l2: ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ (p7) br.cond.dpnt .found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p0 = 8, pos (p7) br.cond.dpnt .found0 - st8 [dest] = value, 8 // store val to dest + st8 [dest] = value, 8 /* store val to dest */ br.ctop.dptk .l2 ;; .src_aligned: .l3: @@ -124,14 +124,14 @@ ENTRY(strcpy) .found0: mov ar.lc = pos .l4: - extr.u c = value, 0, 8 // c = value & 0xff + extr.u c = value, 0, 8 /* c = value & 0xff */ shr.u value = value, 8 ;; st1 [dest] = c, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add tmp = -8, asrc ;; diff --git a/libc/string/ia64/strlen.S b/libc/string/ia64/strlen.S index 9b27a2d1b..edbe84359 100644 --- a/libc/string/ia64/strlen.S +++ b/libc/string/ia64/strlen.S @@ -50,13 +50,13 @@ ENTRY(strlen) .prologue alloc r2 = ar.pfs, 1, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov str = in0 - mov len = r0 // len = 0 - and tmp = 7, in0 // tmp = str % 8 + mov len = r0 /* len = 0 */ + and tmp = 7, in0 /* tmp = str % 8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -69,11 +69,11 @@ ENTRY(strlen) adds len = 1, len br.cloop.dptk .l1 .str_aligned: - mov origadd = str // origadd = orig + mov origadd = str /* origadd = orig */ ld8 val1 = [str], 8;; nop.b 0 nop.b 0 -.l2: ld8.s val2 = [str], 8 // don't bomb out here +.l2: ld8.s val2 = [str], 8 /* don't bomb out here */ czx1.r pos0 = val1 ;; cmp.ne p6, p0 = 8, pos0 @@ -83,16 +83,16 @@ ENTRY(strlen) mov val1 = val2 br.cond.dptk .l2 .foundit: - sub tmp = str, origadd // tmp = crt address - orig + sub tmp = str, origadd /* tmp = crt address - orig */ add len = len, pos0;; add len = len, tmp;; adds len = -16, len .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .recovery: adds str = -8, str;; - ld8 val2 = [str], 8 // bomb out here + ld8 val2 = [str], 8 /* bomb out here */ br.cond.sptk .back END(strlen) libc_hidden_def (strlen) diff --git a/libc/string/ia64/strncmp.S b/libc/string/ia64/strncmp.S index 8e0373c7f..e31f8fbd9 100644 --- a/libc/string/ia64/strncmp.S +++ b/libc/string/ia64/strncmp.S @@ -23,7 +23,7 @@ Inputs: in0: s1 in1: s2 - in2: n + in2: n Unlike memcmp(), this function is optimized for mismatches within the first few characters. */ @@ -42,13 +42,13 @@ ENTRY(strncmp) alloc r2 = ar.pfs, 3, 0, 0, 0 mov ret0 = r0 - cmp.eq p6, p0 = r0, r0 // set p6 - cmp.eq p7, p0 = n, r0 // return immediately if n == 0 + cmp.eq p6, p0 = r0, r0 /* set p6 */ + cmp.eq p7, p0 = n, r0 /* return immediately if n == 0 */ (p7) br.cond.spnt .restore_and_exit ;; .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - adds n = -1, n // n-- + adds n = -1, n /* n-- */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 @@ -58,5 +58,5 @@ ENTRY(strncmp) sub ret0 = val1, val2 .restore_and_exit: br.ret.sptk.many b0 -END(strncmp) +END(strncmp) libc_hidden_weak (strncmp) diff --git a/libc/string/ia64/strncpy.S b/libc/string/ia64/strncpy.S index 4f1129350..3f29bbd52 100644 --- a/libc/string/ia64/strncpy.S +++ b/libc/string/ia64/strncpy.S @@ -58,64 +58,64 @@ ENTRY(strncpy) .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov ar.ec = 0 // ec is not guaranteed to - // be zero upon function entry + mov saved_lc = ar.lc /* save the loop counter */ + mov ar.ec = 0 /* ec is not guaranteed to */ + /* be zero upon function entry */ .body cmp.geu p6, p5 = 24, in2 (p6) br.cond.spnt .short_len - sub tmp = r0, in0 ;; // tmp = -dest - mov len = in2 // len - mov dest = in0 // dest - mov src = in1 // src - and tmp = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov len = in2 /* len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and tmp = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p7 = tmp, r0 - adds loopcnt = -1, tmp // --loopcnt + adds loopcnt = -1, tmp /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; - sub len = len, tmp // len -= -dest % 8 + sub len = len, tmp /* len -= -dest % 8 */ mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes -(p5) ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ +(p5) ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.ne p5, p7 = c, r0 br.cloop.dptk .l1 ;; (p7) br.cond.dpnt .found0_align -.dest_aligned: // p7 should be cleared here - shr.u c = len, 3 // c = len / 8 - and sh1 = 7, src // sh1 = src % 8 - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src - adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1 +.dest_aligned: /* p7 should be cleared here */ + shr.u c = len, 3 /* c = len / 8 */ + and sh1 = 7, src /* sh1 = src % 8 */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ + adds c = (MEMLAT-1), c /* c = (len / 8) + MEMLAT - 1 */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - mov ar.lc = c // "infinite" loop - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + mov ar.lc = c /* "infinite" loop */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - adds c = -(MEMLAT-1), c ;; // c = (len / 8) + adds c = -(MEMLAT-1), c ;; /* c = (len / 8) */ ld8 r[1] = [asrc],8 mov ar.lc = c ;; .align 32 .l2: -(p6) st8 [dest] = value, 8 // store val to dest +(p6) st8 [dest] = value, 8 /* store val to dest */ ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? - adds len = -8, len // len -= 8 + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ + adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .nonalign_found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p6 = 8, pos (p7) br.cond.dpnt .nonalign_found0 @@ -137,7 +137,7 @@ ENTRY(strncpy) (p[MEMLAT]) mov value = r[MEMLAT] (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;; (p[MEMLAT]) cmp.ne p7, p0 = 8, pos -(p[MEMLAT]) adds len = -8, len // len -= 8 +(p[MEMLAT]) adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .found0 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 ;; @@ -152,7 +152,7 @@ ENTRY(strncpy) (p5) br.cond.dptk .restore_and_exit ;; mov ar.lc = len .l4: -(p6) extr.u c = value, 0, 8 // c = value & 0xff +(p6) extr.u c = value, 0, 8 /* c = value & 0xff */ (p6) shr.u value = value, 8 ;; st1 [dest] = c, 1 cmp.ne p6, p0 = c, r0 @@ -165,7 +165,7 @@ ENTRY(strncpy) mov value = 0 ;; .found0: shl tmp = pos, 3 - shr.u loopcnt = len, 4 // loopcnt = len / 16 + shr.u loopcnt = len, 4 /* loopcnt = len / 16 */ mov c = -1 ;; cmp.eq p6, p0 = loopcnt, r0 adds loopcnt = -1, loopcnt @@ -192,24 +192,24 @@ ENTRY(strncpy) st1 [dest] = r0, 1 br.cloop.dptk .l7 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .short_len: cmp.eq p5, p0 = in2, r0 adds loopcnt = -1, in2 (p5) br.cond.spnt .restore_and_exit ;; - mov ar.lc = loopcnt // p6 should be set when we get here + mov ar.lc = loopcnt /* p6 should be set when we get here */ .l8: -(p6) ld1 c = [in1], 1 // c = *src++ +(p6) ld1 c = [in1], 1 /* c = *src++ */ ;; - st1 [in0] = c, 1 // *dest++ = c + st1 [in0] = c, 1 /* *dest++ = c */ (p6) cmp.ne p6, p0 = c, r0 br.cloop.dptk .l8 ;; - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add c = 8, len |