diff options
Diffstat (limited to 'libc/string/ia64/memmove.S')
-rw-r--r-- | libc/string/ia64/memmove.S | 170 |
1 files changed, 85 insertions, 85 deletions
diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S index 00342d8e0..beaada6fc 100644 --- a/libc/string/ia64/memmove.S +++ b/libc/string/ia64/memmove.S @@ -81,48 +81,48 @@ ENTRY(memmove) alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot .rotr r[MEMLAT + 2], q[MEMLAT + 1] .rotp p[MEMLAT + 2] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - or tmp3 = in0, in1 ;; // tmp3 = dest | src - or tmp3 = tmp3, in2 // tmp3 = dest | src | len - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp2 = r0, in0 // tmp2 = -dest - cmp.eq p6, p0 = in2, r0 // if (len == 0) -(p6) br.cond.spnt .restore_and_exit;;// return dest; - and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7 - cmp.le p6, p0 = dest, src // if dest <= src it's always safe -(p6) br.cond.spnt .forward // to copy forward + or tmp3 = in0, in1 ;; /* tmp3 = dest | src */ + or tmp3 = tmp3, in2 /* tmp3 = dest | src | len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp2 = r0, in0 /* tmp2 = -dest */ + cmp.eq p6, p0 = in2, r0 /* if (len == 0) */ +(p6) br.cond.spnt .restore_and_exit;;/* return dest; */ + and tmp4 = 7, tmp3 /* tmp4 = (dest | src | len) & 7 */ + cmp.le p6, p0 = dest, src /* if dest <= src it's always safe */ +(p6) br.cond.spnt .forward /* to copy forward */ add tmp3 = src, len;; - cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len -(p6) br.cond.spnt .backward // we have to copy backward + cmp.lt p6, p0 = dest, tmp3 /* if dest > src && dest < src + len */ +(p6) br.cond.spnt .backward /* we have to copy backward */ .forward: - shr.u loopcnt = len, 4 ;; // loopcnt = len / 16 - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .next // goto next; + shr.u loopcnt = len, 4 ;; /* loopcnt = len / 16 */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .next /* goto next; */ -// The optimal case, when dest, src and len are all multiples of 8 +/* The optimal case, when dest, src and len are all multiples of 8 */ and tmp3 = 0xf, len - mov pr.rot = 1 << 16 // set rotating predicates - mov ar.ec = MEMLAT + 1 ;; // set the epilog counter - cmp.ne p6, p0 = tmp3, r0 // do we have to copy an extra word? - adds loopcnt = -1, loopcnt;; // --loopcnt + mov pr.rot = 1 << 16 /* set rotating predicates */ + mov ar.ec = MEMLAT + 1 ;; /* set the epilog counter */ + cmp.ne p6, p0 = tmp3, r0 /* do we have to copy an extra word? */ + adds loopcnt = -1, loopcnt;; /* --loopcnt */ (p6) ld8 value = [src], 8;; -(p6) st8 [dest] = value, 8 // copy the "odd" word - mov ar.lc = loopcnt // set the loop counter +(p6) st8 [dest] = value, 8 /* copy the "odd" word */ + mov ar.lc = loopcnt /* set the loop counter */ cmp.eq p6, p0 = 8, len -(p6) br.cond.spnt .restore_and_exit;;// the one-word special case - adds adest = 8, dest // set adest one word ahead of dest - adds asrc = 8, src ;; // set asrc one word ahead of src - nop.b 0 // get the "golden" alignment for - nop.b 0 // the next loop +(p6) br.cond.spnt .restore_and_exit;;/* the one-word special case */ + adds adest = 8, dest /* set adest one word ahead of dest */ + adds asrc = 8, src ;; /* set asrc one word ahead of src */ + nop.b 0 /* get the "golden" alignment for */ + nop.b 0 /* the next loop */ .l0: (p[0]) ld8 r[0] = [src], 16 (p[0]) ld8 q[0] = [asrc], 16 @@ -130,50 +130,50 @@ ENTRY(memmove) (p[MEMLAT]) st8 [adest] = q[MEMLAT], 16 br.ctop.dptk .l0 ;; - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .next: - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - and loopcnt = 7, tmp2 // loopcnt = -dest % 8 -(p6) br.cond.spnt .cpyfew // copy byte by byte + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + and loopcnt = 7, tmp2 /* loopcnt = -dest % 8 */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp2 = -8, len // tmp2 = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len;; // len = len % 8 - adds loopcnt = -1, loopcnt // --loopcnt + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp2 = -8, len /* tmp2 = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len;; /* len = len % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ addl tmp4 = @ltoff(.table), gp addl tmp3 = @ltoff(.loop56), gp - mov ar.ec = MEMLAT + 1 // set EC - mov pr.rot = 1 << 16;; // set rotating predicates - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov ar.ec = MEMLAT + 1 /* set EC */ + mov pr.rot = 1 << 16;; /* set rotating predicates */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp2 // src += len & -OPSIZ - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - ld8 ptable = [tmp4];; // ptable = &table - add tmp3 = ptable, sh1;; // tmp3 = &table + sh1 - mov ar.ec = MEMLAT + 1 + 1 // one more pass needed - ld8 tmp4 = [tmp3];; // tmp4 = loop offset - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - ld8 r[1] = [asrc], 8;; // w0 + add src = src, tmp2 /* src += len & -OPSIZ */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + ld8 ptable = [tmp4];; /* ptable = &table */ + add tmp3 = ptable, sh1;; /* tmp3 = &table + sh1 */ + mov ar.ec = MEMLAT + 1 + 1 /* one more pass needed */ + ld8 tmp4 = [tmp3];; /* tmp4 = loop offset */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + ld8 r[1] = [asrc], 8;; /* w0 */ mov b6 = loopaddr;; - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ LOOP(8) LOOP(16) @@ -189,8 +189,8 @@ ENTRY(memmove) (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -199,36 +199,36 @@ ENTRY(memmove) st1 [dest] = value, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 -// In the case of a backward copy, optimise only the case when everything -// is a multiple of 8, otherwise copy byte by byte. The backward copy is -// used only when the blocks are overlapping and dest > src. - +/* In the case of a backward copy, optimise only the case when everything + is a multiple of 8, otherwise copy byte by byte. The backward copy is + used only when the blocks are overlapping and dest > src. +*/ .backward: - shr.u loopcnt = len, 3 // loopcnt = len / 8 - add src = src, len // src points one byte past the end - add dest = dest, len ;; // dest points one byte past the end - mov ar.ec = MEMLAT + 1 // set the epilog counter - mov pr.rot = 1 << 16 // set rotating predicates - adds loopcnt = -1, loopcnt // --loopcnt - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .bytecopy ;; // copy byte by byte backward - adds src = -8, src // src points to the last word - adds dest = -8, dest // dest points to the last word - mov ar.lc = loopcnt;; // set the loop counter + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + add src = src, len /* src points one byte past the end */ + add dest = dest, len ;; /* dest points one byte past the end */ + mov ar.ec = MEMLAT + 1 /* set the epilog counter */ + mov pr.rot = 1 << 16 /* set rotating predicates */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .bytecopy ;; /* copy byte by byte backward */ + adds src = -8, src /* src points to the last word */ + adds dest = -8, dest /* dest points to the last word */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l5: (p[0]) ld8 r[0] = [src], -8 (p[MEMLAT]) st8 [dest] = r[MEMLAT], -8 br.ctop.dptk .l5 br.cond.sptk .restore_and_exit .bytecopy: - adds src = -1, src // src points to the last byte - adds dest = -1, dest // dest points to the last byte - adds loopcnt = -1, len;; // loopcnt = len - 1 - mov ar.lc = loopcnt;; // set the loop counter + adds src = -1, src /* src points to the last byte */ + adds dest = -1, dest /* dest points to the last byte */ + adds loopcnt = -1, len;; /* loopcnt = len - 1 */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l6: (p[0]) ld1 r[0] = [src], -1 (p[MEMLAT]) st1 [dest] = r[MEMLAT], -1 @@ -239,7 +239,7 @@ END(memmove) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 |