kallsyms.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. /* Generate assembler source containing symbol information
  2. *
  3. * Copyright 2002 by Kai Germaschewski
  4. *
  5. * This software may be used and distributed according to the terms
  6. * of the GNU General Public License, incorporated herein by reference.
  7. *
  8. * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
  9. *
  10. * Table compression uses all the unused char codes on the symbols and
  11. * maps these to the most used substrings (tokens). For instance, it might
  12. * map char code 0xF7 to represent "write_" and then in every symbol where
  13. * "write_" appears it can be replaced by 0xF7, saving 5 bytes.
  14. * The used codes themselves are also placed in the table so that the
  15. * decompresion can work without "special cases".
  16. * Applied to kernel symbols, this usually produces a compression ratio
  17. * of about 50%.
  18. *
  19. */
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <ctype.h>
  24. #ifndef ARRAY_SIZE
  25. #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
  26. #endif
  27. #define KSYM_NAME_LEN 64
  28. struct sym_entry {
  29. unsigned long long addr;
  30. unsigned int len;
  31. unsigned int start_pos;
  32. unsigned char *sym;
  33. };
  34. struct text_range {
  35. const char *stext, *etext;
  36. unsigned long long start, end;
  37. };
  38. static unsigned long long _text;
  39. static struct text_range text_ranges[] = {
  40. { "_image_text_start", "_image_text_end" },
  41. { "_ramfunc_ram_start", "_ramfunc_ram_end" },
  42. };
  43. #define text_range_text (&text_ranges[0])
  44. #define text_range_text_ramfunc (&text_ranges[1])
  45. static struct sym_entry *table;
  46. static unsigned int table_size, table_cnt;
  47. static int all_symbols = 0;
  48. static char symbol_prefix_char = '\0';
  49. static unsigned long long kernel_start_addr = 0;
  50. static int no_symbols_name = 0;
  51. static FILE * fbin;
  52. int token_profit[0x10000];
  53. /* the table that holds the result of the compression */
  54. unsigned char best_table[256][2];
  55. unsigned char best_table_len[256];
  56. static void usage(void)
  57. {
  58. fprintf(stderr, "Usage: kallsyms [--all-symbols] "
  59. "[--symbol-prefix=<prefix char>] "
  60. "[--page-offset=<CONFIG_PAGE_OFFSET>] "
  61. "[--fbin=file "
  62. "< in.map > out.S\n");
  63. exit(1);
  64. }
  65. #define SRAM_ADDR 0x1000000
  66. #define check_in_sram(addr) (addr < (SRAM_ADDR+0x100000))
  67. static void check_text_se(void)
  68. {
  69. int i;
  70. struct sym_entry *tbl;
  71. if(text_range_text->start == 0){
  72. for (i = 0; i < table_cnt; i++) {
  73. tbl = &table[i];
  74. if(check_in_sram(tbl->addr))
  75. continue;
  76. if(text_range_text->start == 0){
  77. text_range_text->start = tbl->addr;
  78. }
  79. }
  80. tbl = &table[i-1];
  81. text_range_text->end = tbl->addr;
  82. fprintf(stderr, "text start=0x%llx, end=0x%llx\n", text_range_text->start, text_range_text->end);
  83. }
  84. if(text_range_text_ramfunc->start == 0){
  85. for (i = 0; i < table_cnt; i++) {
  86. tbl = &table[i];
  87. if(check_in_sram(tbl->addr)){
  88. if(text_range_text_ramfunc->start == 0)
  89. text_range_text_ramfunc->start = tbl->addr;
  90. }else{
  91. if(text_range_text_ramfunc->start != 0 ){
  92. break;
  93. }
  94. }
  95. }
  96. tbl = &table[i-1];
  97. text_range_text_ramfunc->end = tbl->addr;
  98. fprintf(stderr, "text ramfunc start=0x%llx, end=0x%llx\n", text_range_text_ramfunc->start, text_range_text_ramfunc->end);
  99. }
  100. }
  101. /*
  102. * This ignores the intensely annoying "mapping symbols" found
  103. * in ARM ELF files: $a, $t and $d.
  104. */
  105. static inline int is_arm_mapping_symbol(const char *str)
  106. {
  107. return str[0] == '$' && strchr("atd", str[1])
  108. && (str[2] == '\0' || str[2] == '.');
  109. }
  110. static int read_symbol_tr(const char *sym, unsigned long long addr)
  111. {
  112. size_t i;
  113. struct text_range *tr;
  114. for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) {
  115. tr = &text_ranges[i];
  116. if (strcmp(sym, tr->stext) == 0) {
  117. tr->start = addr;
  118. return 0;
  119. } else if (strcmp(sym, tr->etext) == 0) {
  120. tr->end = addr;
  121. return 0;
  122. }
  123. }
  124. return 1;
  125. }
  126. static int read_symbol(FILE *in, struct sym_entry *s)
  127. {
  128. char str[500];
  129. char *sym, stype;
  130. int rc;
  131. rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
  132. if (rc != 3) {
  133. if (rc != EOF && fgets(str, 500, in) == NULL)
  134. fprintf(stderr, "Read error or end of file.\n");
  135. return -1;
  136. }
  137. sym = str;
  138. /* skip prefix char */
  139. if (symbol_prefix_char && str[0] == symbol_prefix_char)
  140. sym++;
  141. /* Ignore most absolute/undefined (?) symbols. */
  142. if (read_symbol_tr(sym, s->addr) == 0)
  143. /* nothing to do */;
  144. else if (toupper(stype) == 'A')
  145. {
  146. #if 0
  147. /* Keep these useful absolute symbols */
  148. if (strcmp(sym, "__kernel_syscall_via_break") &&
  149. strcmp(sym, "__kernel_syscall_via_epc") &&
  150. strcmp(sym, "__kernel_sigtramp") &&
  151. strcmp(sym, "__gp"))
  152. return -1;
  153. #endif
  154. return -1;
  155. }
  156. else if (toupper(stype) == 'U' ||
  157. is_arm_mapping_symbol(sym))
  158. return -1;
  159. /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
  160. else if (str[0] == '$')
  161. return -1;
  162. /* exclude debugging symbols */
  163. else if (stype == 'N')
  164. return -1;
  165. else if (toupper(stype) == 'D')
  166. return -1;
  167. else if (strlen(str) > 64)
  168. return -1;
  169. /* include the type field in the symbol name, so that it gets
  170. * compressed together */
  171. s->len = strlen(str) + 1;
  172. s->sym = malloc(s->len + 1);
  173. if (!s->sym) {
  174. fprintf(stderr, "kallsyms failure: "
  175. "unable to allocate required amount of memory\n");
  176. exit(EXIT_FAILURE);
  177. }
  178. strcpy((char *)s->sym + 1, str);
  179. s->sym[0] = stype;
  180. return 0;
  181. }
  182. static int symbol_valid_tr(struct sym_entry *s)
  183. {
  184. size_t i;
  185. struct text_range *tr;
  186. for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) {
  187. tr = &text_ranges[i];
  188. if (s->addr >= tr->start && s->addr <= tr->end)
  189. return 1;
  190. }
  191. return 0;
  192. }
  193. static int symbol_valid(struct sym_entry *s)
  194. {
  195. /* Symbols which vary between passes. Passes 1 and 2 must have
  196. * identical symbol lists. The kallsyms_* symbols below are only added
  197. * after pass 1, they would be included in pass 2 when --all-symbols is
  198. * specified so exclude them to get a stable symbol list.
  199. */
  200. static char *special_symbols[] = {
  201. "kallsyms_addresses",
  202. "kallsyms_num_syms",
  203. "kallsyms_names",
  204. "kallsyms_markers",
  205. "kallsyms_token_table",
  206. "kallsyms_token_index",
  207. /* Exclude linker generated symbols which vary between passes */
  208. "_SDA_BASE_", /* ppc */
  209. "_SDA2_BASE_", /* ppc */
  210. NULL };
  211. int i;
  212. int offset = 1;
  213. if (s->addr < kernel_start_addr)
  214. return 0;
  215. /* skip prefix char */
  216. if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
  217. offset++;
  218. /* if --all-symbols is not specified, then symbols outside the text
  219. * and inittext sections are discarded */
  220. if (!all_symbols) {
  221. if (symbol_valid_tr(s) == 0)
  222. return 0;
  223. /* Corner case. Discard any symbols with the same value as
  224. * _etext _einittext; they can move between pass 1 and 2 when
  225. * the kallsyms data are added. If these symbols move then
  226. * they may get dropped in pass 2, which breaks the kallsyms
  227. * rules.
  228. */
  229. if ((s->addr == text_range_text->end &&
  230. strcmp((char *)s->sym + offset, text_range_text->etext)) ||
  231. (s->addr == text_range_text_ramfunc->end &&
  232. strcmp((char *)s->sym + offset, text_range_text_ramfunc->etext)))
  233. return 0;
  234. }
  235. /* Exclude symbols which vary between passes. */
  236. if (strstr((char *)s->sym + offset, "_compiled."))
  237. return 0;
  238. for (i = 0; special_symbols[i]; i++)
  239. if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
  240. return 0;
  241. return 1;
  242. }
  243. static void read_map(FILE *in)
  244. {
  245. while (!feof(in)) {
  246. if (table_cnt >= table_size) {
  247. table_size += 10000;
  248. table = realloc(table, sizeof(*table) * table_size);
  249. if (!table) {
  250. fprintf(stderr, "out of memory\n");
  251. exit (1);
  252. }
  253. }
  254. if (read_symbol(in, &table[table_cnt]) == 0) {
  255. table[table_cnt].start_pos = table_cnt;
  256. table_cnt++;
  257. }
  258. }
  259. _text = text_range_text->start;
  260. }
  261. static void output_label(char *label)
  262. {
  263. if (symbol_prefix_char)
  264. printf(".globl %c%s\n", symbol_prefix_char, label);
  265. else
  266. printf(".globl %s\n", label);
  267. printf("\tALGN\n");
  268. if (symbol_prefix_char)
  269. printf("%c%s:\n", symbol_prefix_char, label);
  270. else
  271. printf("%s:\n", label);
  272. }
  273. /* uncompress a compressed symbol. When this function is called, the best table
  274. * might still be compressed itself, so the function needs to be recursive */
  275. static int expand_symbol(unsigned char *data, int len, char *result)
  276. {
  277. int c, rlen, total=0;
  278. while (len) {
  279. c = *data;
  280. /* if the table holds a single char that is the same as the one
  281. * we are looking for, then end the search */
  282. if (best_table[c][0]==c && best_table_len[c]==1) {
  283. *result++ = c;
  284. total++;
  285. } else {
  286. /* if not, recurse and expand */
  287. rlen = expand_symbol(best_table[c], best_table_len[c], result);
  288. total += rlen;
  289. result += rlen;
  290. }
  291. data++;
  292. len--;
  293. }
  294. *result=0;
  295. return total;
  296. }
  297. //TLV
  298. /*
  299. 0x0- 0x03 magic
  300. 0x04-0x07 len
  301. 0x08-0x0b checksum
  302. 0x0c-0x10 resrve
  303. 0x10--- tlv
  304. */
  305. #define TLV_MAGIC 0x59355935
  306. #define TYPE_KYMS_ADDR 0x01 //kallsyms_addresses
  307. #define TYPE_KYMS_NUM 0x02 //kallsyms_num_syms
  308. #define TYPE_KYMS_NAME 0x03 //kallsyms_names
  309. #define TYPE_KYMS_MARKERS 0x04 //kallsyms_markers
  310. #define TYPE_KYMS_TOKEN_TABEL 0x05 //kallsyms_token_table
  311. #define TYPE_KYMS_TOKEN_INDEX 0x06 //kallsyms_token_index
  312. int fbin_write(const void *buf, int len)
  313. {
  314. if(fbin != NULL){
  315. if(fwrite(buf,len, 1, fbin) <=0)
  316. fprintf(stderr,"fbin write err\n");
  317. }
  318. return 0;
  319. }
  320. int fbin_addr(unsigned int addr)
  321. {
  322. return fbin_write(&addr,4);
  323. }
  324. int fbin_byte(unsigned char ch)
  325. {
  326. return fbin_write(&ch,1);
  327. }
  328. int fbin_short(unsigned short ch)
  329. {
  330. return fbin_write(&ch,2);
  331. }
  332. int fbin_seek(int offset)
  333. {
  334. if(fbin != NULL)
  335. return fseek(fbin, offset, SEEK_SET);
  336. return 0;
  337. }
  338. int fbin_tell(void)
  339. {
  340. if(fbin != NULL)
  341. return ftell(fbin);
  342. return 0;
  343. }
  344. int fbin_type_len(unsigned int type, unsigned int len, int offset)
  345. {
  346. unsigned int buf[2],tlen;
  347. char cbuf[4];
  348. if(fbin == NULL)
  349. return 0;
  350. if(len&0x3){//algin 4
  351. tlen = 4- (len&0x3);
  352. memset(cbuf,0,4);
  353. fbin_write(cbuf, tlen);
  354. fprintf(stderr,"algin =0x%x, panding=0x%x\n", len, tlen);
  355. len += tlen;
  356. }
  357. buf[0] = type;
  358. buf[1] = len;
  359. fbin_seek(offset);
  360. fbin_write(buf, 8);
  361. fprintf(stderr,"offset=0x%x,type=%d,len=0x%x\n", offset, type, len);
  362. fseek(fbin, 0, SEEK_END);
  363. return 0;
  364. }
  365. void fbin_init(void)
  366. {
  367. if(fbin == NULL)
  368. return ;
  369. fbin_addr(TLV_MAGIC);
  370. fbin_addr(0);
  371. fbin_addr(0);
  372. fbin_addr(0);
  373. }
  374. void fbin_end(void)
  375. {
  376. unsigned int *pbuf;
  377. unsigned int len ,ck, ret, i;
  378. if(fbin == NULL)
  379. return ;
  380. fseek(fbin, 0, SEEK_END);
  381. len = ftell(fbin);
  382. pbuf = (unsigned int *)malloc(len+4);
  383. if(pbuf == NULL){
  384. fprintf(stderr,"malloc %d fail\n", len);
  385. return;
  386. }
  387. fseek(fbin, 0, SEEK_SET);
  388. ret = fread(pbuf,len, 1, fbin);
  389. if((ret != 1) || (len < 16)) {
  390. fprintf(stderr,"read fail, len=0x%x, ret=%d\n", len, ret);
  391. free(pbuf);
  392. return;
  393. }
  394. ck = 0;
  395. for(i = 4; i < len/4; i++)
  396. ck += pbuf[i];
  397. fseek(fbin, 4, SEEK_SET);
  398. fbin_addr(len-16);
  399. fbin_addr(ck);
  400. fprintf(stderr,"ok, len=0x%x, ck=0x%x\n", len, ck);
  401. free(pbuf);
  402. fclose(fbin);
  403. }
  404. static FILE * ftbl;
  405. static void ftbl_init(void)
  406. {
  407. int i, fline=0;
  408. struct sym_entry *tbl;
  409. ftbl = fopen("ktbl.map","w");
  410. if(ftbl != NULL)
  411. fprintf(stderr, "create table file ok\n");
  412. else
  413. fprintf(stderr, "create table file error\n");
  414. for (i = 0; i < table_cnt; i++) {
  415. tbl = &table[i];
  416. fprintf(ftbl, "0x%08x 0x%08llx %s\n", fline++, tbl->addr, (char*)(tbl->sym+1));
  417. }
  418. fclose(ftbl);
  419. }
  420. static void write_src(void)
  421. {
  422. unsigned int i, k, off,foffset, f_start;
  423. unsigned int best_idx[256];
  424. unsigned int *markers;
  425. char buf[KSYM_NAME_LEN];
  426. // printf("#include <asm/types.h>\n");
  427. printf("#if BITS_PER_LONG == 64\n");
  428. printf("#define PTR .quad\n");
  429. printf("#define ALGN .align 8\n");
  430. printf("#else\n");
  431. printf("#define PTR .long\n");
  432. printf("#define ALGN .align 4\n");
  433. printf("#endif\n");
  434. printf("\t.section .rodata, \"a\"\n");
  435. /* Provide proper symbols relocatability by their '_text'
  436. * relativeness. The symbol names cannot be used to construct
  437. * normal symbol references as the list of symbols contains
  438. * symbols that are declared static and are private to their
  439. * .o files. This prevents .tmp_kallsyms.o or any other
  440. * object from referencing them.
  441. */
  442. fbin_init();
  443. f_start = 0x10;
  444. fbin_type_len(TYPE_KYMS_ADDR, 0, f_start);
  445. output_label("kallsyms_addresses");
  446. for (i = 0; i < table_cnt; i++) {
  447. if (toupper(table[i].sym[0]) != 'A') {
  448. if (_text <= table[i].addr)
  449. printf("\tPTR\t_image_text_start + %#llx\n",
  450. table[i].addr - _text);
  451. else
  452. printf("\tPTR\t_image_text_start - %#llx\n",
  453. _text - table[i].addr);
  454. } else {
  455. printf("\tPTR\t%#llx\n", table[i].addr);
  456. }
  457. fbin_addr(table[i].addr);
  458. }
  459. printf("\n");
  460. foffset = fbin_tell();
  461. fbin_type_len(TYPE_KYMS_ADDR, foffset-f_start-8, f_start);
  462. f_start = fbin_tell();
  463. fbin_type_len(TYPE_KYMS_NUM, 0, f_start);
  464. output_label("kallsyms_num_syms");
  465. printf("\tPTR\t%d\n", table_cnt);
  466. printf("\n");
  467. fbin_addr(table_cnt);
  468. foffset = fbin_tell();
  469. fbin_type_len(TYPE_KYMS_NUM, foffset-f_start-8, f_start);
  470. f_start = fbin_tell();
  471. /* table of offset markers, that give the offset in the compressed stream
  472. * every 256 symbols */
  473. markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
  474. if (!markers) {
  475. fprintf(stderr, "kallsyms failure: "
  476. "unable to allocate required memory\n");
  477. exit(EXIT_FAILURE);
  478. }
  479. if (!no_symbols_name) {
  480. fbin_type_len(TYPE_KYMS_NAME, 0, f_start);
  481. output_label("kallsyms_names");
  482. off = 0;
  483. for (i = 0; i < table_cnt; i++) {
  484. if ((i & 0xFF) == 0)
  485. markers[i >> 8] = off;
  486. printf("\t.byte 0x%02x", table[i].len);
  487. fbin_byte(table[i].len);
  488. for (k = 0; k < table[i].len; k++)
  489. printf(", 0x%02x", table[i].sym[k]);
  490. printf("\n");
  491. fbin_write(table[i].sym, table[i].len);
  492. off += table[i].len + 1;
  493. }
  494. printf("\n");
  495. foffset = fbin_tell();
  496. fbin_type_len(TYPE_KYMS_NAME, foffset-f_start-8, f_start);
  497. f_start = fbin_tell();
  498. }
  499. fbin_type_len(TYPE_KYMS_MARKERS, 0, f_start);
  500. output_label("kallsyms_markers");
  501. for (i = 0; i < ((table_cnt + 255) >> 8); i++){
  502. printf("\tPTR\t%d\n", markers[i]);
  503. fbin_addr(markers[i]);
  504. }
  505. printf("\n");
  506. free(markers);
  507. foffset = fbin_tell();
  508. fbin_type_len(TYPE_KYMS_MARKERS, foffset-f_start-8, f_start);
  509. f_start = fbin_tell();
  510. fbin_type_len(TYPE_KYMS_TOKEN_TABEL, 0, f_start);
  511. output_label("kallsyms_token_table");
  512. off = 0;
  513. for (i = 0; i < 256; i++) {
  514. best_idx[i] = off;
  515. expand_symbol(best_table[i], best_table_len[i], buf);
  516. printf("\t.asciz\t\"%s\"\n", buf);
  517. fbin_write(buf, strlen(buf) + 1);
  518. off += strlen(buf) + 1;
  519. }
  520. printf("\n");
  521. foffset = fbin_tell();
  522. fbin_type_len(TYPE_KYMS_TOKEN_TABEL, foffset-f_start-8, f_start);
  523. f_start = fbin_tell();
  524. fbin_type_len(TYPE_KYMS_TOKEN_INDEX, 0, f_start);
  525. output_label("kallsyms_token_index");
  526. for (i = 0; i < 256; i++){
  527. printf("\t.short\t%d\n", best_idx[i]);
  528. fbin_short(best_idx[i]);
  529. }
  530. printf("\n");
  531. foffset = fbin_tell();
  532. fbin_type_len(TYPE_KYMS_TOKEN_INDEX, foffset-f_start-8, f_start);
  533. f_start = fbin_tell();
  534. fbin_end();
  535. }
  536. /* table lookup compression functions */
  537. /* count all the possible tokens in a symbol */
  538. static void learn_symbol(unsigned char *symbol, int len)
  539. {
  540. int i;
  541. for (i = 0; i < len - 1; i++)
  542. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
  543. }
  544. /* decrease the count for all the possible tokens in a symbol */
  545. static void forget_symbol(unsigned char *symbol, int len)
  546. {
  547. int i;
  548. for (i = 0; i < len - 1; i++)
  549. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
  550. }
  551. /* remove all the invalid symbols from the table and do the initial token count */
  552. static void build_initial_tok_table(void)
  553. {
  554. unsigned int i, pos;
  555. pos = 0;
  556. for (i = 0; i < table_cnt; i++) {
  557. if ( symbol_valid(&table[i]) ) {
  558. if (pos != i)
  559. table[pos] = table[i];
  560. learn_symbol(table[pos].sym, table[pos].len);
  561. pos++;
  562. }
  563. }
  564. table_cnt = pos;
  565. }
  566. static void *find_token(unsigned char *str, int len, unsigned char *token)
  567. {
  568. int i;
  569. for (i = 0; i < len - 1; i++) {
  570. if (str[i] == token[0] && str[i+1] == token[1])
  571. return &str[i];
  572. }
  573. return NULL;
  574. }
  575. /* replace a given token in all the valid symbols. Use the sampled symbols
  576. * to update the counts */
  577. static void compress_symbols(unsigned char *str, int idx)
  578. {
  579. unsigned int i, len, size;
  580. unsigned char *p1, *p2;
  581. for (i = 0; i < table_cnt; i++) {
  582. len = table[i].len;
  583. p1 = table[i].sym;
  584. /* find the token on the symbol */
  585. p2 = find_token(p1, len, str);
  586. if (!p2) continue;
  587. /* decrease the counts for this symbol's tokens */
  588. forget_symbol(table[i].sym, len);
  589. size = len;
  590. do {
  591. *p2 = idx;
  592. p2++;
  593. size -= (p2 - p1);
  594. memmove(p2, p2 + 1, size);
  595. p1 = p2;
  596. len--;
  597. if (size < 2) break;
  598. /* find the token on the symbol */
  599. p2 = find_token(p1, size, str);
  600. } while (p2);
  601. table[i].len = len;
  602. /* increase the counts for this symbol's new tokens */
  603. learn_symbol(table[i].sym, len);
  604. }
  605. }
  606. /* search the token with the maximum profit */
  607. static int find_best_token(void)
  608. {
  609. int i, best, bestprofit;
  610. bestprofit=-10000;
  611. best = 0;
  612. for (i = 0; i < 0x10000; i++) {
  613. if (token_profit[i] > bestprofit) {
  614. best = i;
  615. bestprofit = token_profit[i];
  616. }
  617. }
  618. return best;
  619. }
  620. /* this is the core of the algorithm: calculate the "best" table */
  621. static void optimize_result(void)
  622. {
  623. int i, best;
  624. /* using the '\0' symbol last allows compress_symbols to use standard
  625. * fast string functions */
  626. for (i = 255; i >= 0; i--) {
  627. /* if this table slot is empty (it is not used by an actual
  628. * original char code */
  629. if (!best_table_len[i]) {
  630. /* find the token with the breates profit value */
  631. best = find_best_token();
  632. if (token_profit[best] == 0)
  633. break;
  634. /* place it in the "best" table */
  635. best_table_len[i] = 2;
  636. best_table[i][0] = best & 0xFF;
  637. best_table[i][1] = (best >> 8) & 0xFF;
  638. /* replace this token in all the valid symbols */
  639. compress_symbols(best_table[i], i);
  640. }
  641. }
  642. }
  643. /* start by placing the symbols that are actually used on the table */
  644. static void insert_real_symbols_in_table(void)
  645. {
  646. unsigned int i, j, c;
  647. memset(best_table, 0, sizeof(best_table));
  648. memset(best_table_len, 0, sizeof(best_table_len));
  649. for (i = 0; i < table_cnt; i++) {
  650. for (j = 0; j < table[i].len; j++) {
  651. c = table[i].sym[j];
  652. best_table[c][0]=c;
  653. best_table_len[c]=1;
  654. }
  655. }
  656. }
  657. static void optimize_token_table(void)
  658. {
  659. build_initial_tok_table();
  660. insert_real_symbols_in_table();
  661. /* When valid symbol is not registered, exit to error */
  662. if (!table_cnt) {
  663. fprintf(stderr, "No valid symbol.\n");
  664. exit(1);
  665. }
  666. optimize_result();
  667. }
  668. /* guess for "linker script provide" symbol */
  669. static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
  670. {
  671. const char *symbol = (char *)se->sym + 1;
  672. int len = se->len - 1;
  673. if (len < 8)
  674. return 0;
  675. if (symbol[0] != '_' || symbol[1] != '_')
  676. return 0;
  677. /* __start_XXXXX */
  678. if (!memcmp(symbol + 2, "start_", 6))
  679. return 1;
  680. /* __stop_XXXXX */
  681. if (!memcmp(symbol + 2, "stop_", 5))
  682. return 1;
  683. /* __end_XXXXX */
  684. if (!memcmp(symbol + 2, "end_", 4))
  685. return 1;
  686. /* __XXXXX_start */
  687. if (!memcmp(symbol + len - 6, "_start", 6))
  688. return 1;
  689. /* __XXXXX_end */
  690. if (!memcmp(symbol + len - 4, "_end", 4))
  691. return 1;
  692. return 0;
  693. }
  694. static int prefix_underscores_count(const char *str)
  695. {
  696. const char *tail = str;
  697. while (*tail == '_')
  698. tail++;
  699. return tail - str;
  700. }
  701. static int compare_symbols(const void *a, const void *b)
  702. {
  703. const struct sym_entry *sa;
  704. const struct sym_entry *sb;
  705. int wa, wb;
  706. sa = a;
  707. sb = b;
  708. /* sort by address first */
  709. if (sa->addr > sb->addr)
  710. return 1;
  711. if (sa->addr < sb->addr)
  712. return -1;
  713. /* sort by "weakness" type */
  714. wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W');
  715. wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W');
  716. if (wa != wb)
  717. return wa - wb;
  718. /* sort by "linker script provide" type */
  719. wa = may_be_linker_script_provide_symbol(sa);
  720. wb = may_be_linker_script_provide_symbol(sb);
  721. if (wa != wb)
  722. return wa - wb;
  723. /* sort by the number of prefix underscores */
  724. wa = prefix_underscores_count((const char *)sa->sym + 1);
  725. wb = prefix_underscores_count((const char *)sb->sym + 1);
  726. if (wa != wb)
  727. return wa - wb;
  728. /* sort by initial order, so that other symbols are left undisturbed */
  729. return sa->start_pos - sb->start_pos;
  730. }
  731. static void sort_symbols(void)
  732. {
  733. qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols);
  734. check_text_se();
  735. }
  736. int main(int argc, char **argv)
  737. {
  738. fbin = NULL;
  739. if (argc >= 2) {
  740. int i;
  741. for (i = 1; i < argc; i++) {
  742. if(strcmp(argv[i], "--all-symbols") == 0)
  743. all_symbols = 1;
  744. else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
  745. char *p = &argv[i][16];
  746. /* skip quote */
  747. if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
  748. p++;
  749. symbol_prefix_char = *p;
  750. } else if (strncmp(argv[i], "--page-offset=", 14) == 0) {
  751. const char *p = &argv[i][14];
  752. kernel_start_addr = strtoull(p, NULL, 16);
  753. } else if(strcmp(argv[i], "--no_symbols_name") == 0) {
  754. no_symbols_name = 1;
  755. }else if(strncmp(argv[i], "--fbin=", 7) == 0) {
  756. fbin = fopen(&argv[i][7],"wb+");
  757. if(fbin != NULL)
  758. fprintf(stderr, "create file =%s ok\n", &argv[i][7]);
  759. else
  760. fprintf(stderr, "create file =%s error\n", &argv[i][7]);
  761. }
  762. else
  763. usage();
  764. }
  765. } else if (argc != 1)
  766. usage();
  767. read_map(stdin);
  768. sort_symbols();
  769. ftbl_init();
  770. optimize_token_table();
  771. write_src();
  772. return 0;
  773. }