[CELL] oprofile: add support to OProfile for profiling CELL BE SPUs
[safe/jmp/linux-2.6] / arch / powerpc / oprofile / cell / vma_map.c
1 /*
2  * Cell Broadband Engine OProfile Support
3  *
4  * (C) Copyright IBM Corporation 2006
5  *
6  * Author: Maynard Johnson <maynardj@us.ibm.com>
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  */
13
14 /* The code in this source file is responsible for generating
15  * vma-to-fileOffset maps for both overlay and non-overlay SPU
16  * applications.
17  */
18
19 #include <linux/mm.h>
20 #include <linux/string.h>
21 #include <linux/uaccess.h>
22 #include <linux/elf.h>
23 #include "pr_util.h"
24
25
26 void vma_map_free(struct vma_to_fileoffset_map *map)
27 {
28         while (map) {
29                 struct vma_to_fileoffset_map *next = map->next;
30                 kfree(map);
31                 map = next;
32         }
33 }
34
35 unsigned int
36 vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
37                const struct spu *aSpu, int *grd_val)
38 {
39         /*
40          * Default the offset to the physical address + a flag value.
41          * Addresses of dynamically generated code can't be found in the vma
42          * map.  For those addresses the flagged value will be sent on to
43          * the user space tools so they can be reported rather than just
44          * thrown away.
45          */
46         u32 offset = 0x10000000 + vma;
47         u32 ovly_grd;
48
49         for (; map; map = map->next) {
50                 if (vma < map->vma || vma >= map->vma + map->size)
51                         continue;
52
53                 if (map->guard_ptr) {
54                         ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
55                         if (ovly_grd != map->guard_val)
56                                 continue;
57                         *grd_val = ovly_grd;
58                 }
59                 offset = vma - map->vma + map->offset;
60                 break;
61         }
62
63         return offset;
64 }
65
66 static struct vma_to_fileoffset_map *
67 vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
68             unsigned int size, unsigned int offset, unsigned int guard_ptr,
69             unsigned int guard_val)
70 {
71         struct vma_to_fileoffset_map *new =
72                 kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
73         if (!new) {
74                 printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
75                        __FUNCTION__, __LINE__);
76                 vma_map_free(map);
77                 return NULL;
78         }
79
80         new->next = map;
81         new->vma = vma;
82         new->size = size;
83         new->offset = offset;
84         new->guard_ptr = guard_ptr;
85         new->guard_val = guard_val;
86
87         return new;
88 }
89
90
91 /* Parse SPE ELF header and generate a list of vma_maps.
92  * A pointer to the first vma_map in the generated list
93  * of vma_maps is returned.  */
94 struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
95                                              unsigned long spu_elf_start)
96 {
97         static const unsigned char expected[EI_PAD] = {
98                 [EI_MAG0] = ELFMAG0,
99                 [EI_MAG1] = ELFMAG1,
100                 [EI_MAG2] = ELFMAG2,
101                 [EI_MAG3] = ELFMAG3,
102                 [EI_CLASS] = ELFCLASS32,
103                 [EI_DATA] = ELFDATA2MSB,
104                 [EI_VERSION] = EV_CURRENT,
105                 [EI_OSABI] = ELFOSABI_NONE
106         };
107
108         int grd_val;
109         struct vma_to_fileoffset_map *map = NULL;
110         struct spu_overlay_info ovly;
111         unsigned int overlay_tbl_offset = -1;
112         unsigned long phdr_start, shdr_start;
113         Elf32_Ehdr ehdr;
114         Elf32_Phdr phdr;
115         Elf32_Shdr shdr, shdr_str;
116         Elf32_Sym sym;
117         int i, j;
118         char name[32];
119
120         unsigned int ovly_table_sym = 0;
121         unsigned int ovly_buf_table_sym = 0;
122         unsigned int ovly_table_end_sym = 0;
123         unsigned int ovly_buf_table_end_sym = 0;
124         unsigned long ovly_table;
125         unsigned int n_ovlys;
126
127         /* Get and validate ELF header.  */
128
129         if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
130                 goto fail;
131
132         if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
133                 printk(KERN_ERR "SPU_PROF: "
134                        "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
135                        __FUNCTION__, __LINE__);
136                 goto fail;
137         }
138         if (ehdr.e_machine != EM_SPU) {
139                 printk(KERN_ERR "SPU_PROF: "
140                        "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
141                        __FUNCTION__,  __LINE__);
142                 goto fail;
143         }
144         if (ehdr.e_type != ET_EXEC) {
145                 printk(KERN_ERR "SPU_PROF: "
146                        "%s, line %d: Unexpected e_type parsing SPU ELF\n",
147                        __FUNCTION__, __LINE__);
148                 goto fail;
149         }
150         phdr_start = spu_elf_start + ehdr.e_phoff;
151         shdr_start = spu_elf_start + ehdr.e_shoff;
152
153         /* Traverse program headers.  */
154         for (i = 0; i < ehdr.e_phnum; i++) {
155                 if (copy_from_user(&phdr,
156                                    (void *) (phdr_start + i * sizeof(phdr)),
157                                    sizeof(phdr)))
158                         goto fail;
159
160                 if (phdr.p_type != PT_LOAD)
161                         continue;
162                 if (phdr.p_flags & (1 << 27))
163                         continue;
164
165                 map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
166                                   phdr.p_offset, 0, 0);
167                 if (!map)
168                         goto fail;
169         }
170
171         pr_debug("SPU_PROF: Created non-overlay maps\n");
172         /* Traverse section table and search for overlay-related symbols.  */
173         for (i = 0; i < ehdr.e_shnum; i++) {
174                 if (copy_from_user(&shdr,
175                                    (void *) (shdr_start + i * sizeof(shdr)),
176                                    sizeof(shdr)))
177                         goto fail;
178
179                 if (shdr.sh_type != SHT_SYMTAB)
180                         continue;
181                 if (shdr.sh_entsize != sizeof (sym))
182                         continue;
183
184                 if (copy_from_user(&shdr_str,
185                                    (void *) (shdr_start + shdr.sh_link *
186                                              sizeof(shdr)),
187                                    sizeof(shdr)))
188                         goto fail;
189
190                 if (shdr_str.sh_type != SHT_STRTAB)
191                         goto fail;;
192
193                 for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
194                         if (copy_from_user(&sym, (void *) (spu_elf_start +
195                                                        shdr.sh_offset + j *
196                                                            sizeof (sym)),
197                                            sizeof (sym)))
198                                 goto fail;
199
200                         if (copy_from_user(name, (void *)
201                                            (spu_elf_start + shdr_str.sh_offset +
202                                             sym.st_name),
203                                            20))
204                                 goto fail;
205
206                         if (memcmp(name, "_ovly_table", 12) == 0)
207                                 ovly_table_sym = sym.st_value;
208                         if (memcmp(name, "_ovly_buf_table", 16) == 0)
209                                 ovly_buf_table_sym = sym.st_value;
210                         if (memcmp(name, "_ovly_table_end", 16) == 0)
211                                 ovly_table_end_sym = sym.st_value;
212                         if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
213                                 ovly_buf_table_end_sym = sym.st_value;
214                 }
215         }
216
217         /* If we don't have overlays, we're done.  */
218         if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
219             || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
220                 pr_debug("SPU_PROF: No overlay table found\n");
221                 goto out;
222         } else {
223                 pr_debug("SPU_PROF: Overlay table found\n");
224         }
225
226         /* The _ovly_table symbol represents a table with one entry
227          * per overlay section.  The _ovly_buf_table symbol represents
228          * a table with one entry per overlay region.
229          * The struct spu_overlay_info gives the structure of the _ovly_table
230          * entries.  The structure of _ovly_table_buf is simply one
231          * u32 word per entry.
232          */
233         overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
234                                             aSpu, &grd_val);
235         if (overlay_tbl_offset < 0) {
236                 printk(KERN_ERR "SPU_PROF: "
237                        "%s, line %d: Error finding SPU overlay table\n",
238                        __FUNCTION__, __LINE__);
239                 goto fail;
240         }
241         ovly_table = spu_elf_start + overlay_tbl_offset;
242
243         n_ovlys = (ovly_table_end_sym -
244                    ovly_table_sym) / sizeof (ovly);
245
246         /* Traverse overlay table.  */
247         for (i = 0; i < n_ovlys; i++) {
248                 if (copy_from_user(&ovly, (void *)
249                                    (ovly_table + i * sizeof (ovly)),
250                                    sizeof (ovly)))
251                         goto fail;
252
253                 /* The ovly.vma/size/offset arguments are analogous to the same
254                  * arguments used above for non-overlay maps.  The final two
255                  * args are referred to as the guard pointer and the guard
256                  * value.
257                  * The guard pointer is an entry in the _ovly_buf_table,
258                  * computed using ovly.buf as the index into the table.  Since
259                  * ovly.buf values begin at '1' to reference the first (or 0th)
260                  * entry in the _ovly_buf_table, the computation subtracts 1
261                  * from ovly.buf.
262                  * The guard value is stored in the _ovly_buf_table entry and
263                  * is an index (starting at 1) back to the _ovly_table entry
264                  * that is pointing at this _ovly_buf_table entry.  So, for
265                  * example, for an overlay scenario with one overlay segment
266                  * and two overlay sections:
267                  *      - Section 1 points to the first entry of the
268                  *        _ovly_buf_table, which contains a guard value
269                  *        of '1', referencing the first (index=0) entry of
270                  *        _ovly_table.
271                  *      - Section 2 points to the second entry of the
272                  *        _ovly_buf_table, which contains a guard value
273                  *        of '2', referencing the second (index=1) entry of
274                  *        _ovly_table.
275                  */
276                 map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
277                                   ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
278                 if (!map)
279                         goto fail;
280         }
281         goto out;
282
283  fail:
284         map = NULL;
285  out:
286         return map;
287 }