1 /* Machine-dependent ELF dynamic relocation functions. PowerPC version.
2 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
23 #include <sys/param.h>
26 #include <elf/dynamic-link.h>
27 #include <dl-machine.h>
28 #include <stdio-common/_itoa.h>
30 /* Because ld.so is now versioned, these functions can be in their own file;
31 no relocations need to be done to call them.
32 Of course, if ld.so is not versioned... */
33 #if !(DO_VERSIONING - 0)
34 #error This will not work with versioning turned off, sorry.
38 /* Stuff for the PLT. */
39 #define PLT_INITIAL_ENTRY_WORDS 18
40 #define PLT_LONGBRANCH_ENTRY_WORDS 0
41 #define PLT_TRAMPOLINE_ENTRY_WORDS 6
42 #define PLT_DOUBLE_SIZE (1<<13)
43 #define PLT_ENTRY_START_WORDS(entry_number) \
44 (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \
45 + ((entry_number) > PLT_DOUBLE_SIZE \
46 ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \
48 #define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries)
50 /* Macros to build PowerPC opcode words. */
51 #define OPCODE_ADDI(rd,ra,simm) \
52 (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
53 #define OPCODE_ADDIS(rd,ra,simm) \
54 (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
55 #define OPCODE_ADD(rd,ra,rb) \
56 (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11)
57 #define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc))
58 #define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc))
59 #define OPCODE_BCTR() 0x4e800420
60 #define OPCODE_LWZ(rd,d,ra) \
61 (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
62 #define OPCODE_LWZU(rd,d,ra) \
63 (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
64 #define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21)
65 #define OPCODE_RLWINM(ra,rs,sh,mb,me) \
66 (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1)
68 #define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm)
69 #define OPCODE_ADDIS_HI(rd,ra,value) \
70 OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16)
71 #define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value)
72 #define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh)
75 #define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
76 #define PPC_SYNC asm volatile ("sync" : : : "memory")
77 #define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
78 #define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory")
79 #define PPC_DIE asm volatile ("tweq 0,0")
81 /* Use this when you've modified some code, but it won't be in the
82 instruction fetch queue (or when it doesn't matter if it is). */
83 #define MODIFIED_CODE_NOQUEUE(where) \
84 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0)
85 /* Use this when it might be in the instruction queue. */
86 #define MODIFIED_CODE(where) \
87 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0)
90 /* The idea here is that to conform to the ABI, we are supposed to try
91 to load dynamic objects between 0x10000 (we actually use 0x40000 as
92 the lower bound, to increase the chance of a memory reference from
93 a null pointer giving a segfault) and the program's load address;
94 this may allow us to use a branch instruction in the PLT rather
95 than a computed jump. The address is only used as a preference for
96 mmap, so if we get it wrong the worst that happens is that it gets
97 mapped somewhere else. */
100 __elf_preferred_address(struct link_map *loader, size_t maplength,
101 ElfW(Addr) mapstartpref)
103 ElfW(Addr) low, high;
106 /* If the object has a preference, load it there! */
107 if (mapstartpref != 0)
110 /* Otherwise, quickly look for a suitable gap between 0x3FFFF and
111 0x70000000. 0x3FFFF is so that references off NULL pointers will
112 cause a segfault, 0x70000000 is just paranoia (it should always
113 be superceded by the program's load address). */
116 for (l = _dl_loaded; l; l = l->l_next)
118 ElfW(Addr) mapstart, mapend;
119 mapstart = l->l_map_start & ~(_dl_pagesize - 1);
120 mapend = l->l_map_end | (_dl_pagesize - 1);
121 assert (mapend > mapstart);
123 if (mapend >= high && high >= mapstart)
125 else if (mapend >= low && low >= mapstart)
127 else if (high >= mapend && mapstart >= low)
129 if (high - mapend >= mapstart - low)
136 high -= 0x10000; /* Allow some room between objects. */
137 maplength = (maplength | (_dl_pagesize-1)) + 1;
138 if (high <= low || high - low < maplength )
140 return high - maplength; /* Both high and maplength are page-aligned. */
143 /* Set up the loaded object described by L so its unrelocated PLT
144 entries will jump to the on-demand fixup code in dl-runtime.c.
145 Also install a small trampoline to be used by entries that have
146 been relocated to an address too far away for a single branch. */
148 /* There are many kinds of PLT entries:
150 (1) A direct jump to the actual routine, either a relative or
151 absolute branch. These are set up in __elf_machine_fixup_plt.
153 (2) Short lazy entries. These cover the first 8192 slots in
154 the PLT, and look like (where 'index' goes from 0 to 8191):
157 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1]
159 (3) Short indirect jumps. These replace (2) when a direct jump
160 wouldn't reach. They look the same except that the branch
161 is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'.
163 (4) Long lazy entries. These cover the slots when a short entry
164 won't fit ('index*4' overflows its field), and look like:
166 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
167 lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
168 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS]
171 (5) Long indirect jumps. These replace (4) when a direct jump
172 wouldn't reach. They look like:
174 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
175 lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
179 (6) Long direct jumps. These are used when thread-safety is not
180 required. They look like:
182 lis %r12, %hi(finaladdr)
183 addi %r12, %r12, %lo(finaladdr)
188 The lazy entries, (2) and (4), are set up here in
189 __elf_machine_runtime_setup. (1), (3), and (5) are set up in
190 __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed
191 in __process_machine_rela.
193 The reason for the somewhat strange construction of the long
194 entries, (4) and (5), is that we need to ensure thread-safety. For
195 (1) and (3), this is obvious because only one instruction is
196 changed and the PPC architecture guarantees that aligned stores are
197 atomic. For (5), this is more tricky. When changing (4) to (5),
198 the `b' instruction is first changed to to `mtctr'; this is safe
199 and is why the `lwzu' instruction is not just a simple `addi'.
200 Once this is done, and is visible to all processors, the `lwzu' can
201 safely be changed to a `lwz'. */
203 __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
205 if (map->l_info[DT_JMPREL])
208 Elf32_Word *plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
209 Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
210 / sizeof (Elf32_Rela));
211 Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries);
212 Elf32_Word data_words = (Elf32_Word) (plt + rel_offset_words);
213 Elf32_Word size_modified;
215 extern void _dl_runtime_resolve (void);
216 extern void _dl_prof_resolve (void);
218 /* Convert the index in r11 into an actual address, and get the
219 word at that address. */
220 plt[PLT_LONGBRANCH_ENTRY_WORDS] = OPCODE_ADDIS_HI (11, 11, data_words);
221 plt[PLT_LONGBRANCH_ENTRY_WORDS + 1] = OPCODE_LWZ (11, data_words, 11);
223 /* Call the procedure at that address. */
224 plt[PLT_LONGBRANCH_ENTRY_WORDS + 2] = OPCODE_MTCTR (11);
225 plt[PLT_LONGBRANCH_ENTRY_WORDS + 3] = OPCODE_BCTR ();
229 Elf32_Word *tramp = plt + PLT_TRAMPOLINE_ENTRY_WORDS;
230 Elf32_Word dlrr = (Elf32_Word)(profile
232 : _dl_runtime_resolve);
235 if (profile && _dl_name_match_p (_dl_profile, map))
236 /* This is the object we are looking for. Say that we really
237 want profiling and the timers are started. */
238 _dl_profile_map = map;
240 /* For the long entries, subtract off data_words. */
241 tramp[0] = OPCODE_ADDIS_HI (11, 11, -data_words);
242 tramp[1] = OPCODE_ADDI (11, 11, -data_words);
244 /* Multiply index of entry by 3 (in r11). */
245 tramp[2] = OPCODE_SLWI (12, 11, 1);
246 tramp[3] = OPCODE_ADD (11, 12, 11);
247 if (dlrr <= 0x01fffffc || dlrr >= 0xfe000000)
249 /* Load address of link map in r12. */
250 tramp[4] = OPCODE_LI (12, (Elf32_Word) map);
251 tramp[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
253 /* Call _dl_runtime_resolve. */
254 tramp[6] = OPCODE_BA (dlrr);
258 /* Get address of _dl_runtime_resolve in CTR. */
259 tramp[4] = OPCODE_LI (12, dlrr);
260 tramp[5] = OPCODE_ADDIS_HI (12, 12, dlrr);
261 tramp[6] = OPCODE_MTCTR (12);
263 /* Load address of link map in r12. */
264 tramp[7] = OPCODE_LI (12, (Elf32_Word) map);
265 tramp[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
267 /* Call _dl_runtime_resolve. */
268 tramp[9] = OPCODE_BCTR ();
271 /* Set up the lazy PLT entries. */
272 offset = PLT_INITIAL_ENTRY_WORDS;
274 while (i < num_plt_entries && i < PLT_DOUBLE_SIZE)
276 plt[offset ] = OPCODE_LI (11, i * 4);
277 plt[offset+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + 2
283 while (i < num_plt_entries)
285 plt[offset ] = OPCODE_LIS_HI (11, i * 4 + data_words);
286 plt[offset+1] = OPCODE_LWZU (12, i * 4 + data_words, 11);
287 plt[offset+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS
290 plt[offset+3] = OPCODE_BCTR ();
296 /* Now, we've modified code. We need to write the changes from
297 the data cache to a second-level unified cache, then make
298 sure that stale data in the instruction cache is removed.
299 (In a multiprocessor system, the effect is more complex.)
300 Most of the PLT shouldn't be in the instruction cache, but
301 there may be a little overlap at the start and the end.
303 Assumes that dcbst and icbi apply to lines of 16 bytes or
304 more. Current known line sizes are 16, 32, and 128 bytes. */
306 size_modified = lazy ? rel_offset_words : 6;
307 for (i = 0; i < size_modified; i += 4)
309 PPC_DCBST (plt + size_modified - 1);
312 PPC_ICBI (plt + size_modified - 1);
320 __elf_machine_fixup_plt(struct link_map *map, const Elf32_Rela *reloc,
321 Elf32_Addr *reloc_addr, Elf32_Addr finaladdr)
323 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
324 if (delta << 6 >> 6 == delta)
325 *reloc_addr = OPCODE_B (delta);
326 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
327 *reloc_addr = OPCODE_BA (finaladdr);
330 Elf32_Word *plt, *data_words;
331 Elf32_Word index, offset, num_plt_entries;
333 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
334 / sizeof(Elf32_Rela));
335 plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
336 offset = reloc_addr - plt;
337 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
338 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
342 if (index < PLT_DOUBLE_SIZE)
344 data_words[index] = finaladdr;
346 *reloc_addr = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS - (offset+1))
351 index -= (index - PLT_DOUBLE_SIZE)/2;
353 data_words[index] = finaladdr;
356 reloc_addr[1] = OPCODE_MTCTR (12);
357 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
360 reloc_addr[0] = OPCODE_LWZ (12,
361 (Elf32_Word) (data_words + index), 11);
364 MODIFIED_CODE (reloc_addr);
369 dl_reloc_overflow (struct link_map *map,
371 Elf32_Addr *const reloc_addr)
375 t = stpcpy (buffer, name);
376 t = stpcpy (t, " relocation at 0x00000000");
377 _itoa_word ((unsigned) reloc_addr, t, 16, 0);
378 t = stpcpy (t, " out of range");
379 _dl_signal_error (0, map->l_name, buffer);
383 __process_machine_rela (struct link_map *map,
384 const Elf32_Rela *reloc,
385 const Elf32_Sym *sym,
386 const Elf32_Sym *refsym,
387 Elf32_Addr *const reloc_addr,
388 Elf32_Addr const finaladdr,
400 *reloc_addr = finaladdr;
404 if (finaladdr > 0x01fffffc && finaladdr < 0xfe000000)
405 dl_reloc_overflow (map, "R_PPC_ADDR24", reloc_addr);
406 *reloc_addr = (*reloc_addr & 0xfc000003) | (finaladdr & 0x3fffffc);
411 if (finaladdr > 0x7fff && finaladdr < 0x8000)
412 dl_reloc_overflow (map, "R_PPC_ADDR16", reloc_addr);
413 *(Elf32_Half*) reloc_addr = finaladdr;
416 case R_PPC_ADDR16_LO:
417 *(Elf32_Half*) reloc_addr = finaladdr;
420 case R_PPC_ADDR16_HI:
421 *(Elf32_Half*) reloc_addr = finaladdr >> 16;
424 case R_PPC_ADDR16_HA:
425 *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16;
429 case R_PPC_ADDR14_BRTAKEN:
430 case R_PPC_ADDR14_BRNTAKEN:
431 if (finaladdr > 0x7fff && finaladdr < 0x8000)
432 dl_reloc_overflow (map, "R_PPC_ADDR14", reloc_addr);
433 *reloc_addr = (*reloc_addr & 0xffff0003) | (finaladdr & 0xfffc);
434 if (rinfo != R_PPC_ADDR14)
435 *reloc_addr = ((*reloc_addr & 0xffdfffff)
436 | ((rinfo == R_PPC_ADDR14_BRTAKEN)
437 ^ (finaladdr >> 31)) << 21);
442 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
443 if (delta << 6 >> 6 != delta)
444 dl_reloc_overflow (map, "R_PPC_REL14", reloc_addr);
445 *reloc_addr = (*reloc_addr & 0xfc000003) | (delta & 0x3fffffc);
451 /* This can happen in trace mode when an object could not be
454 if (sym->st_size > refsym->st_size
455 || (_dl_verbose && sym->st_size < refsym->st_size))
459 strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
460 _dl_sysdep_error (_dl_argv[0] ?: "<program name unknown>",
461 ": Symbol `", strtab + refsym->st_name,
462 "' has different size in shared object, "
463 "consider re-linking\n", NULL);
465 memcpy (reloc_addr, (char *) finaladdr, MIN (sym->st_size,
470 *reloc_addr = finaladdr - (Elf32_Word) reloc_addr;
474 /* It used to be that elf_machine_fixup_plt was used here,
475 but that doesn't work when ld.so relocates itself
476 for the second time. On the bright side, there's
477 no need to worry about thread-safety here. */
479 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
480 if (delta << 6 >> 6 == delta)
481 *reloc_addr = OPCODE_B (delta);
482 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
483 *reloc_addr = OPCODE_BA (finaladdr);
486 Elf32_Word *plt, *data_words;
487 Elf32_Word index, offset, num_plt_entries;
489 plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
490 offset = reloc_addr - plt;
492 if (offset < PLT_DOUBLE_SIZE*2 + PLT_INITIAL_ENTRY_WORDS)
494 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
495 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
496 / sizeof(Elf32_Rela));
497 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
498 data_words[index] = finaladdr;
499 reloc_addr[0] = OPCODE_LI (11, index * 4);
500 reloc_addr[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS
503 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
507 reloc_addr[0] = OPCODE_LIS_HI (12, finaladdr);
508 reloc_addr[1] = OPCODE_ADDI (12, 12, finaladdr);
509 reloc_addr[2] = OPCODE_MTCTR (12);
510 reloc_addr[3] = OPCODE_BCTR ();
511 MODIFIED_CODE_NOQUEUE (reloc_addr + 3);
518 _dl_reloc_bad_type (map, rinfo, 0);
522 MODIFIED_CODE_NOQUEUE (reloc_addr);