/* Generate assembler source containing symbol information
*
* Copyright 2002 by Kai Germaschewski
*
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
*
* Usage: kallsyms [--all-symbols] [--absolute-percpu]
* [--base-relative] [--lto-clang] in.map > out.S
*
* Table compression uses all the unused char codes on the symbols and
* maps these to the most used substrings (tokens). For instance, it might
* map char code 0xF7 to represent "write_" and then in every symbol where
* "write_" appears it can be replaced by 0xF7, saving 5 bytes.
* The used codes themselves are also placed in the table so that the
* decompresion can work without "special cases".
* Applied to kernel symbols, this usually produces a compression ratio
* of about 50%.
*
*/
#include <errno.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#define KSYM_NAME_LEN 512
struct sym_entry {
unsigned long long addr;
unsigned int len;
unsigned int seq;
unsigned int start_pos;
unsigned int percpu_absolute;
unsigned char sym[];
};
struct addr_range {
const char *start_sym, *end_sym;
unsigned long long start, end;
};
static unsigned long long _text;
static unsigned long long relative_base;
static struct addr_range text_ranges[] = {
{ "_stext", "_etext" },
{ "_sinittext", "_einittext" },
};
#define text_range_text (&text_ranges[0])
#define text_range_inittext (&text_ranges[1])
static struct addr_range percpu_range = {
"__per_cpu_start", "__per_cpu_end", -1ULL, 0
};
static struct sym_entry **table;
static unsigned int table_size, table_cnt;
static int all_symbols;
static int absolute_percpu;
static int base_relative;
static int lto_clang;
static int token_profit[0x10000];
/* the table that holds the result of the compression */
static unsigned char best_table[256][2];
static unsigned char best_table_len[256];
static void usage(void)
{
fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
"[--base-relative] [--lto-clang] in.map > out.S\n");
exit(1);
}
static char *sym_name(const struct sym_entry *s)
{
return (char *)s->sym + 1;
}
static bool is_ignored_symbol(const char *name, char type)
{
if (type == 'u' || type == 'n')
return true;
if (toupper(type) == 'A') {
/* Keep these useful absolute symbols */
if (strcmp(name, "__kernel_syscall_via_break") &&
strcmp(name, "__kernel_syscall_via_epc") &&
strcmp(name, "__kernel_sigtramp") &&
strcmp(name, "__gp"))
return true;
}
return false;
}
static void check_symbol_range(const char *sym, unsigned long long addr,
struct addr_range *ranges, int entries)
{
size_t i;
struct addr_range *ar;
for (i = 0; i < entries; ++i) {
ar = &ranges[i];
if (strcmp(sym, ar->start_sym) == 0) {
ar->start = addr;
return;
} else if (strcmp(sym, ar->end_sym) == 0) {
ar->end = addr;
return;
}
}
}
static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
{
char *name, type, *p;
unsigned long long addr;
size_t len;
ssize_t readlen;
struct sym_entry *sym;
errno = 0;
readlen = getline(buf, buf_len, in);
if (readlen < 0) {
if (errno) {
perror("read_symbol");
exit(EXIT_FAILURE);
}
return NULL;
}
if ((*buf)[readlen - 1] == '\n')
(*buf)[readlen - 1] = 0;
addr = strtoull(*buf, &p, 16);
if (*buf == p || *p++ != ' ' || !isascii((type = *p++)) || *p++ != ' ') {
fprintf(stderr, "line format error\n");
exit(EXIT_FAILURE);
}
name = p;
len = strlen(name);
if (len >= KSYM_NAME_LEN) {
fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n"
"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
name, len, KSYM_NAME_LEN);
return NULL;
}
if (strcmp(name, "_text") == 0)
_text = addr;
/* Ignore most absolute/undefined (?) symbols. */
if (is_ignored_symbol(name, type))
return NULL;
check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
check_symbol_range(name, addr,