diff options
author | Eric Andersen <andersen@codepoet.org> | 2003-11-08 07:42:34 +0000 |
---|---|---|
committer | Eric Andersen <andersen@codepoet.org> | 2003-11-08 07:42:34 +0000 |
commit | 2a01fa1548e5671880d6dd18c7d216ddf958ea96 (patch) | |
tree | db51930aad916a898e321d50638f20ddeabdcddf /ldso | |
parent | b36d61d33ece9afa32733bbcc7def4cb4afc4856 (diff) |
Joakim Tjernlund writes:
> Very interesting. Do you have any suggestions for how
> we could fix our powerpc shared library loader
Removing those instr. comes with a very big performance
penalty. To flush the dcache you will have read up to 8KB
dummy data and to invalidate the icache you will have to
execute up to 16KB nops. I don't know of any other way from
user space.
hmm, actually I think it will work reliable to perform a
store to the same page(s) as the dcbst/icbi will act on. That
way you will make the DTLB Error happen(if any) prior to the
dcbst/icbi. The worst thing that can happen then is a regular
DTLB Miss and that works for dcbst/icbi.
You will have to lookout for if dcbst/icbi crosses a page
boundary. Then you will have to perform a store to both
pages.
Jocke
# And again later writes:
Hi again
I think I know what the problem is. The
PPC_DCBST;PPC_SYNC;PPC_ICBI;PPC_ISYNC sequence is executed
even if no modification has been done i some cases:
_dl_linux_resolver(), the last else has no store for insns[0].
these is a insns[1] = OPCODE_B(delta - 4) that
does not have a PPC_DCBST.
_dl_do_lazy_reloc(), for R_PPC_NONE there is no store.
for R_PPC_JMP_SLOT there is a
insns[1] = OPCODE_B(delta)that does not
have a PPC_DCBST.
_dl_do_reloc(), for R_PPC_COPY there is no store.
for R_PPC_JMP_SLOT there is a
reloc_addr[1] = OPCODE_B(delta) that does not
have a PPC_DCBST.
_dl_init_got(), I THINK that the
PPC_DCBST(plt);
PPC_DCBST(plt+4);
PPC_DCBST(plt+8);
PPC_SYNC;
PPC_ICBI(plt);
PPC_ICBI(plt+4);
PPC_ICBI(plt+8);
PPC_ISYNC;
is off a bit. The address range does not match the sum
of the plt[] and tramp[] address range.
Jocke
# And then later added the comment:
I think that the tramp[] part should be included in the
PPC_DCBST/PPC_ICBI sequence. Then you have to add entries for
plt+12 and plt+16. If the tramp[] part should be excluded,
then all is well.
Jocke
Diffstat (limited to 'ldso')
-rw-r--r-- | ldso/ldso/powerpc/elfinterp.c | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/ldso/ldso/powerpc/elfinterp.c b/ldso/ldso/powerpc/elfinterp.c index e3b39de16..93468c4ec 100644 --- a/ldso/ldso/powerpc/elfinterp.c +++ b/ldso/ldso/powerpc/elfinterp.c @@ -152,10 +152,14 @@ void _dl_init_got(unsigned long *plt,struct elf_resolve *tpnt) PPC_DCBST(plt); PPC_DCBST(plt+4); PPC_DCBST(plt+8); + PPC_DCBST(plt+12); + PPC_DCBST(plt+16-1); PPC_SYNC; PPC_ICBI(plt); - PPC_ICBI(plt+4); - PPC_ICBI(plt+8); + PPC_ICBI(plt+4); /* glibc thinks this is not needed */ + PPC_ICBI(plt+8); /* glibc thinks this is not needed */ + PPC_ICBI(plt+12); /* glibc thinks this is not needed */ + PPC_ICBI(plt+16-1); PPC_ISYNC; } @@ -245,7 +249,15 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) //PPC_SYNC; //PPC_ICBI(ptr+index); //PPC_ISYNC; + + /* instructions were modified */ insns[1] = OPCODE_B(delta - 4); + PPC_DCBST(insn_addr+1); + PPC_SYNC; + PPC_ICBI(insn_addr+1); + PPC_ISYNC; + + return new_addr; } /* instructions were modified */ @@ -344,6 +356,7 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope, switch (reloc_type) { case R_PPC_NONE: + return 0; break; case R_PPC_JMP_SLOT: { @@ -380,8 +393,11 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope, /* instructions were modified */ PPC_DCBST(reloc_addr); + PPC_DCBST(reloc_addr+1); PPC_SYNC; PPC_ICBI(reloc_addr); + PPC_ICBI(reloc_addr+1); + PPC_ISYNC; #if defined (__SUPPORT_LD_DEBUG__) if(_dl_debug_reloc && _dl_debug_detail) @@ -435,6 +451,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, #endif switch (reloc_type) { case R_PPC_NONE: + return 0; break; case R_PPC_REL24: #if 0 @@ -494,6 +511,10 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, //DPRINTF(" index %x delta %x\n",index,delta); reloc_addr[0] = OPCODE_LI(11,index*4); reloc_addr[1] = OPCODE_B(delta); + + /* instructions were modified */ + PPC_DCBST(reloc_addr+1); + PPC_ICBI(reloc_addr+1); } } break; @@ -503,6 +524,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, break; case R_PPC_COPY: // handled later + return 0; break; default: #if 0 @@ -521,6 +543,7 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, PPC_DCBST(reloc_addr); PPC_SYNC; PPC_ICBI(reloc_addr); + PPC_ISYNC; #if defined (__SUPPORT_LD_DEBUG__) if(_dl_debug_reloc && _dl_debug_detail) |