xml_cleaner.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235
  1. /*
  2. * xml_cleaner.c
  3. * xml文件清理程序,需要指定至少1组关键词kw及其kwstr
  4. * 作用方式:
  5. * -d 若标签名"name"包含关键词,则删除整个标签。
  6. * -k 若标签名"name"包含关键词则保留,否则删除整个标签。
  7. * -a 在文件未尾追加信息,kwstr是包含追加信息的文件。"-a null"是保留的,用于追加自定义音效的config。
  8. * -h 如果标签是enum, struct, config类型,则保留,否则删除。
  9. * -x 删除某个symbol及其值。此关键词仅对config类型的标签起作用,对item不起作用。
  10. * -i 删除某个symbol及其值。次关键词仅对enum和struct类型的item起作用。
  11. *
  12. * xml_cleaner.exe xml_file[ out_xmlfile] kw1 kwstr1 [kw2 kwstr2 ...]
  13. */
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. #include <string.h>
  17. #include <ctype.h>
  18. #define YES 1
  19. #define NO 0
  20. #define KWSTR_LEN (0x10)
  21. //define all your keywords here
  22. static const char KeyWords[] = {'d', 'h', 'x', 'i', 'k', 'a'};
  23. typedef struct
  24. {
  25. char *xmlfile;
  26. char *outfile;
  27. char *xmldata;
  28. int xmlsize;
  29. int kwnums;
  30. char **kwstr; // kwstr[i] points to a kwstr inputted
  31. char kw[0]; // variable length array(malloc)
  32. } xml_clean_t;
  33. /* malloc before used and free after used */
  34. xml_clean_t *xmlc;
  35. static inline int get_metachr(char* p, int* m, int* n)
  36. {
  37. int i = 1, j = 1;
  38. if (*p == '\0') return 0;
  39. if (*p == '\\') i = 2;
  40. if (*p == '[')
  41. {
  42. while (j > 0)
  43. {
  44. if (p[i] == '[') j++;
  45. if (p[i] == ']') j--;
  46. i += (p[i] == '\\') ? 2 : 1;
  47. }
  48. }
  49. if (m != NULL && n != NULL)
  50. {
  51. *m = 1; *n = 1;
  52. if (p[i] == '*') { *m = 0; *n = -1; i += 1; }
  53. if (p[i] == '+') { *m = 1; *n = -1; i += 1; }
  54. if (p[i] == '?') { *m = 0; *n = 1; i += 1; }
  55. }
  56. return i;
  57. }
  58. static int chr_match(char c, char* p)
  59. {
  60. if (p[0] == '.')
  61. {
  62. return (c != '\n') ? 1 : 0;
  63. }
  64. if (p[0] == '\\' && p[1] == 'd')
  65. {
  66. return chr_match(c, "[0-9]") ? 1 : 0;
  67. }
  68. if (p[0] == '\\' && p[1] == 's')
  69. {
  70. return chr_match(c, "[ \f\n\r\t\v]") ? 1 : 0;
  71. }
  72. if (p[0] == '\\' && p[1] == 'w')
  73. {
  74. return chr_match(c, "[A-Za-z0-9_]") ? 1 : 0;
  75. }
  76. if (p[0] == '\\')
  77. {
  78. return (c == p[1]) ? 1 : 0;
  79. }
  80. if (p[0] != '\0' && p[1] == '-')
  81. {
  82. return (c >= p[0] && c <= p[2]) ? 1 : 0;
  83. }
  84. if (p[0] == '$')
  85. {
  86. return (c == '\0') ? 1 : 0;
  87. }
  88. if (p[0] == '[' && p[1] != '^')
  89. {
  90. int i = 1;
  91. while (p[i] != ']')
  92. {
  93. if (chr_match(c, &p[i]))
  94. return 1;
  95. i += get_metachr(&p[i], NULL, NULL);
  96. }
  97. return 0;
  98. }
  99. if (p[0] == '[' && p[1] == '^')
  100. {
  101. int i = 2;
  102. while (p[i] != ']')
  103. {
  104. if (chr_match(c, &p[i]))
  105. return 0;
  106. i += get_metachr(&p[i], NULL, NULL);
  107. }
  108. return 1;
  109. }
  110. return (c == p[0]) ? 1 : 0;
  111. }
  112. static inline int str_match_1(char* str, int len, char* p)
  113. {
  114. int i, m, n;
  115. if (get_metachr(p, &m, &n) == 0)
  116. return 0;
  117. for (i = 0; i < m && i <= len; i++)
  118. {
  119. char c = (i < len) ? str[i] : '\0';
  120. if (!chr_match(c, p))
  121. return -1;
  122. }
  123. for (i = m; i != n && i < len; i++)
  124. {
  125. if (!chr_match(str[i], p))
  126. break;
  127. }
  128. return (i > len) ? len : i;
  129. }
  130. static int str_match(char* str, int len, char* pattern)
  131. {
  132. char* p = pattern;
  133. int i, j, k, m, n, r;
  134. i = j = k = 0;
  135. m = n = r = 0;
  136. len = (len <= 0) ? (int)strlen(str) : len;
  137. while (j < (int)strlen((char*)p))
  138. {
  139. r = str_match_1(&str[i], len-i, &p[j]);
  140. while (r < 0 && i-- > k + m)
  141. {
  142. r = str_match_1(&str[i], len-i, &p[j]);
  143. }
  144. if (r < 0) return -1;
  145. j += get_metachr(&p[j], &m, &n);
  146. k = i; i += r;
  147. }
  148. return i;
  149. }
  150. #define CUSTOM_DAE \
  151. ("<config name=\"BTMusic_Multi_Dae_Custom\" title=\"自定义音效\" \
  152. cfg_id=\"0xF0\" size=\"142\" num_items=\"2\" category=\"ASET\" attr=\"adjust_online\">\r\n\
  153. \t<item type=\"Type_DAE_Settings\" name=\"Dae_Settings\" title=\"音效参数\" offs=\"0\" \
  154. size=\"126\" refer=\"Type_DAE_Settings\" attr=\"click_popup\" />\r\n\
  155. \t<item type=\"uint8\" name=\"Name\" title=\"音效名称\" \
  156. offs=\"126\" size=\"16\" range=\"\" array=\"16\" attr=\"string\" />\r\n\
  157. </config>\r\n\r\n")
  158. typedef enum
  159. {
  160. KWS_LOGIC_OR = 0,
  161. KWS_LOGIC_AND,
  162. } kws_logic_e;
  163. static int get_file_size(char *fname)
  164. {
  165. FILE *file = NULL;
  166. int size = 0;
  167. if ((file = fopen(fname, "rb")) == NULL)
  168. return 0;
  169. fseek(file, 0, SEEK_END);
  170. size = ftell(file);
  171. fclose(file);
  172. return size;
  173. }
  174. static int load_file(char *fname, char *buf)
  175. {
  176. FILE *file = NULL;
  177. int fsize = 0;
  178. if ((file = fopen(fname, "rb")) == NULL)
  179. {
  180. printf("load %s fail\n", fname);
  181. return 0;
  182. }
  183. fseek(file, 0, SEEK_END);
  184. fsize = ftell(file);
  185. fseek(file, 0, SEEK_SET);
  186. fread(buf, 1, fsize, file);
  187. fclose(file);
  188. file = NULL;
  189. return fsize;
  190. }
  191. static inline char *realloc_safe(char *ptr, int size)
  192. {
  193. char *temp = realloc(ptr, size);
  194. if(!temp)
  195. free(ptr);
  196. return temp;
  197. }
  198. static inline int count_to_end(char *str, char c)
  199. {
  200. int j = 0;
  201. while(str[j] != c && str[j] != '\0')
  202. j++;
  203. return j;
  204. }
  205. /* only replace a char once
  206. */
  207. static inline void str_replace_c(char *str, char find, char replace)
  208. {
  209. char *c = str;
  210. while(*c)
  211. {
  212. if(*c == find)
  213. *c = replace;
  214. c++;
  215. }
  216. }
  217. static inline int str_count_c(char *str, char find)
  218. {
  219. int i = 0;
  220. char *c = str;
  221. while(*c)
  222. {
  223. if (*c == find)
  224. i++;
  225. c++;
  226. }
  227. return i;
  228. }
  229. /* split str into "num" strings, and "ptr[i]" points to each of them
  230. */
  231. static void str_split_c(char *str, char find, char *ptr[], int num)
  232. {
  233. int i = 0;
  234. char *c = str;
  235. if (num <= 0)
  236. return ;
  237. ptr[i++] = str;
  238. while(*c)
  239. {
  240. if (*c == find)
  241. {
  242. *c = '\0';
  243. ptr[i++] = c + 1;
  244. }
  245. if(i == num)
  246. break;
  247. c++;
  248. }
  249. }
  250. static int check_kw_valid(char c)
  251. {
  252. int i, n;
  253. n = sizeof(KeyWords) / sizeof(KeyWords[0]);
  254. for (i = 0; i < n; i++)
  255. {
  256. if (c == KeyWords[i])
  257. return YES;
  258. }
  259. return NO;
  260. }
  261. static int check_kwstr_logic(char *name, char *kws[], int num, kws_logic_e mode)
  262. {
  263. int i;
  264. if (num == 0 || name == NULL)
  265. return NO;
  266. for (i = 0; i < num; i++)
  267. {
  268. if (strstr(name, kws[i]))
  269. {
  270. if (mode == KWS_LOGIC_OR)
  271. return YES;
  272. }
  273. else
  274. {
  275. if (mode == KWS_LOGIC_AND)
  276. return NO;
  277. }
  278. }
  279. if (mode == KWS_LOGIC_AND)
  280. return YES;
  281. return NO;
  282. }
  283. static int check_is_xml_file(char *name)
  284. {
  285. int len = strlen(name);
  286. if (strcmp(&name[len - 4], ".xml") == 0)
  287. return YES;
  288. return NO;
  289. }
  290. static int get_tag(char *text, int len, char *name, char *start, char *end)
  291. {
  292. int i, j;
  293. if (strncmp(&text[0], start, strlen(start)) != 0)
  294. return 0;
  295. i = strlen(start);
  296. // there is at least 1 space or blank
  297. if ((j = str_match(&text[i], len - i, "\\s+")) < 0)
  298. return 0;
  299. i += j;
  300. if (name)
  301. {
  302. if (strncmp(&text[i], "name=", 5) != 0)
  303. return 0;
  304. i += 5;
  305. if (text[i] != '\"')
  306. return 0;
  307. i += 1;
  308. if ((j = str_match(&text[i], len -i, "\\w+")) < 0)
  309. return 0;
  310. memcpy(name, &text[i], j);
  311. name[j] = '\0';
  312. i += j;
  313. if ((j = str_match(&text[i], len - i, "\\s*\"")) < 0)
  314. return 0;
  315. i += j;
  316. }
  317. while(i < len)
  318. {
  319. if (strncmp(&text[i], end, strlen(end)) == 0)
  320. {
  321. i += strlen(end);
  322. // deal with blanks, otherwise there would be new blank lines after deleted
  323. if ((j = str_match(&text[i], len - i, "\\s*")) > 0)
  324. i += j;
  325. break;
  326. }
  327. i++;
  328. }
  329. return i;
  330. }
  331. /* text[*s_pos] is the beginning of symbol, while *s_len shows its length.
  332. * Return checked length as soon as one symbol found.
  333. */
  334. static int get_symbol(char *text, int len, char *symbol, int *s_pos, int *s_len)
  335. {
  336. int i = 0, j = 0, n;
  337. if (!symbol)
  338. return 0;
  339. n = strlen(symbol);
  340. // 3 = strlen(="")
  341. if (len < n + 3)
  342. return 0;
  343. // locate to the start of symbol
  344. while(i + n+3 < len)
  345. {
  346. if (strncmp(&text[i], symbol, n) == 0)
  347. break;
  348. i++;
  349. }
  350. if (i +n+3 >= len)
  351. return 0;
  352. if (s_pos)
  353. *s_pos = i;
  354. i += n;
  355. // spaces around '=' ?
  356. if ((j = str_match(&text[i], len - i, "\\s*=\\s*\"")) < 0)
  357. return 0;
  358. i += j;
  359. // match the corresponding '\"'
  360. while(i < len)
  361. {
  362. if (text[i] == '\"')
  363. {
  364. i += 1;
  365. break;
  366. }
  367. i++;
  368. }
  369. // deal with spaces, otherwise there would be new spaces after deleted
  370. if ((j = str_match(&text[i], len - i, "\\s*")) > 0)
  371. i += j;
  372. if (s_len)
  373. *s_len = i - *s_pos;
  374. return i;
  375. }
  376. /* Delete all the symbol in given "text" with "condition" checked ok,
  377. * so you'd better use a small range of "text" input.
  378. * "condition" is optional, "symbol"(and its value) will be deleted directly without "condition".
  379. */
  380. static int delete_symbol(char *text, int len, char *ndata, char *symbol, char *condition)
  381. {
  382. int i, cdt;
  383. int s_pos, s_len, nsize;
  384. if (!symbol)
  385. return 0;
  386. if (condition)
  387. {
  388. i = 0;
  389. cdt = 0;
  390. while(i < len)
  391. {
  392. if (!strncmp(&text[i], condition, strlen(condition)))
  393. {
  394. cdt = 1;
  395. break;
  396. }
  397. i++;
  398. }
  399. if (cdt == 0)
  400. {
  401. memcpy(ndata, text, len);
  402. return len;
  403. }
  404. }
  405. i = 0;
  406. nsize = 0;
  407. while(i < len)
  408. {
  409. s_pos = 0;
  410. s_len = 0;
  411. if (get_symbol(&text[i], len - i, symbol, &s_pos, &s_len) > 0)
  412. {
  413. memcpy(&ndata[nsize], &text[i], s_pos);
  414. nsize += s_pos;
  415. i += s_pos + s_len;
  416. continue;
  417. }
  418. else
  419. { //no symbol anymore, just copy data and break
  420. memcpy(&ndata[nsize], &text[i], len - i);
  421. nsize += len - i;
  422. break;
  423. }
  424. }
  425. return nsize;
  426. }
  427. static int xml_get_enum(char *text, int len, char *name)
  428. {
  429. return get_tag(text, len, name, "<enum", "</enum>");
  430. }
  431. static int xml_get_struct(char *text, int len, char *name)
  432. {
  433. return get_tag(text, len, name, "<struct", "</struct>");
  434. }
  435. static int xml_get_config(char *text, int len, char *name)
  436. {
  437. return get_tag(text, len, name, "<config", "</config>");
  438. }
  439. /* -d kwstr
  440. * delete the tags that contain kwstr
  441. */
  442. static int xmlc_delete_tag(char *ndata, char *kwstr, int ks_nums)
  443. {
  444. int i, j, nsize, istag;
  445. char tagname[64];
  446. char *kws[ks_nums];
  447. str_split_c(kwstr, ',', kws, ks_nums);
  448. i = nsize = 0;
  449. while(i < xmlc->xmlsize)
  450. {
  451. istag = 0;
  452. if ((j = xml_get_enum(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  453. istag = 1;
  454. else if ((j = xml_get_struct(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  455. istag = 1;
  456. else if ((j = xml_get_config(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  457. istag = 1;
  458. if (istag)
  459. {
  460. if (!check_kwstr_logic(tagname, kws, ks_nums, KWS_LOGIC_AND))
  461. {
  462. memcpy(&ndata[nsize], &xmlc->xmldata[i], j);
  463. nsize += j;
  464. }
  465. i += j;
  466. continue;
  467. }
  468. ndata[nsize++] = xmlc->xmldata[i++];
  469. }
  470. return nsize;
  471. }
  472. /* -h
  473. * Delete the tags that are NOT one of enum, struct or config.
  474. */
  475. static int xmlc_delete_misc(char *ndata)
  476. {
  477. int i, j, nsize, istag;
  478. i = nsize = 0;
  479. while(i < xmlc->xmlsize)
  480. {
  481. istag = 0;
  482. if ((j = xml_get_enum(&xmlc->xmldata[i], xmlc->xmlsize - i, NULL)) > 0)
  483. istag = 1;
  484. else if ((j = xml_get_struct(&xmlc->xmldata[i], xmlc->xmlsize - i, NULL)) > 0)
  485. istag = 1;
  486. else if ((j = xml_get_config(&xmlc->xmldata[i], xmlc->xmlsize - i, NULL)) > 0)
  487. istag = 1;
  488. if (istag)
  489. {
  490. memcpy(&ndata[nsize], &xmlc->xmldata[i], j);
  491. nsize += j;
  492. i += j;
  493. continue;
  494. }
  495. i++;
  496. }
  497. return nsize;
  498. }
  499. /* -x kwstr
  500. * <config ...kwstr="*"... >
  501. * Delete the string (kwstr="*") as above, without doing effect to its item.
  502. * It only works for "config" tag and would NOT delete same symbol in "enum" or "struct".
  503. */
  504. static int xmlc_delete_config_symbol(char *ndata, char *kwstr, int ks_nums)
  505. {
  506. int i, j, nsize;
  507. char *kws[ks_nums];
  508. // only 1 symbol could be deleted once?
  509. if (ks_nums != 1){
  510. printf("[E] -x only receive 1 kwstr\n");
  511. return -1;
  512. }
  513. str_split_c(kwstr, ',', kws, ks_nums);
  514. i = nsize = 0;
  515. while(i < xmlc->xmlsize)
  516. {
  517. if ((j = get_tag(&xmlc->xmldata[i], xmlc->xmlsize - i, NULL, "<config", ">")) > 0)
  518. {
  519. nsize += delete_symbol(&xmlc->xmldata[i], j, &ndata[nsize], kwstr, NULL);
  520. i += j;
  521. continue;
  522. }
  523. ndata[nsize++] = xmlc->xmldata[i++];
  524. }
  525. return nsize;
  526. }
  527. /* -i kwstr
  528. * <item ...symbol="*"...cdt="*"... />
  529. * Delete the string (kwstr="*") as above in item of "struct" and "config".
  530. * cdt(condition) is just the symbol name, not caring its value and it is optional.
  531. * cdt is optional, symbol(and its valude) will be deleted directly without cdt.
  532. */
  533. static int xmlc_delete_item_symbol(char *ndata, char *kwstr, int ks_nums)
  534. {
  535. int i, j, n;
  536. int nsize, istag;
  537. char *data = xmlc->xmldata;
  538. char *cdt = NULL;
  539. char *kws[ks_nums];
  540. if (ks_nums != 1 && ks_nums != 2)
  541. return -1;
  542. str_split_c(kwstr, ',', kws, ks_nums);
  543. if (ks_nums == 2)
  544. cdt = kws[1];
  545. i = nsize = 0;
  546. while(i < xmlc->xmlsize)
  547. {
  548. istag = 0;
  549. if ((j = xml_get_struct(&data[i], xmlc->xmlsize - i, NULL)) > 0)
  550. istag = 1;
  551. else if ((j = xml_get_config(&data[i], xmlc->xmlsize - i, NULL)) > 0)
  552. istag = 1;
  553. if (istag)
  554. {
  555. n = i;
  556. while(n < xmlc->xmlsize && j > 0)
  557. {
  558. if(!strncmp(&data[n], "<item", strlen("<item")))
  559. break;
  560. n++;
  561. j--;
  562. }
  563. memcpy(&ndata[nsize], &data[i], n - i);
  564. nsize += n - i;
  565. i = n;
  566. while(j > 0)
  567. {
  568. // it's supposed to get item every time
  569. if ((n = get_tag(&data[i], j, NULL, "<item", "/>")) > 0)
  570. {
  571. nsize += delete_symbol(&data[i], n, &ndata[nsize], kws[0], cdt);
  572. i += n;
  573. j -= n;
  574. }
  575. else
  576. {
  577. memcpy(&ndata[nsize], &data[i], j);
  578. nsize += j;
  579. i += j;
  580. break;
  581. }
  582. }
  583. continue;
  584. }
  585. ndata[nsize++] = xmlc->xmldata[i++];
  586. }
  587. return nsize;
  588. }
  589. /* -k kwstr
  590. * Only remain the tags that contain kwstr, and who don't would be deleted.
  591. */
  592. static int xmlc_keep_tag(char *ndata, char *kwstr, int ks_nums)
  593. {
  594. int i, j, nsize, istag;
  595. char tagname[64];
  596. char *kws[ks_nums];
  597. str_split_c(kwstr, ',', kws, ks_nums);
  598. i = nsize = 0;
  599. while(i < xmlc->xmlsize)
  600. {
  601. istag = 0;
  602. if ((j = xml_get_enum(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  603. istag = 1;
  604. else if ((j = xml_get_struct(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  605. istag = 1;
  606. else if ((j = xml_get_config(&xmlc->xmldata[i], xmlc->xmlsize - i, tagname)) > 0)
  607. istag = 1;
  608. if (istag)
  609. {
  610. if (check_kwstr_logic(tagname, kws, ks_nums, KWS_LOGIC_OR))
  611. {
  612. memcpy(&ndata[nsize], &xmlc->xmldata[i], j);
  613. nsize += j;
  614. }
  615. i += j;
  616. continue;
  617. }
  618. ndata[nsize++] = xmlc->xmldata[i++];
  619. }
  620. return nsize;
  621. }
  622. /* -a kwstr
  623. * Append rawdata to xmlfile, expect a file input, but "-a null" is reserved for custom dae.
  624. * it would finish other append even though some append fail, and return new-size appended successfully,
  625. * while return -1 if all append fail.
  626. */
  627. static int xmlc_append_tag(char *ndata, char *kwfiles, int ks_nums)
  628. {
  629. int i, asize;
  630. int hastail = 0, nsize = 0;
  631. char *endtag = NULL;
  632. char *kws[ks_nums];
  633. str_split_c(kwfiles, ',', kws, ks_nums);
  634. memcpy(ndata, xmlc->xmldata, xmlc->xmlsize);
  635. nsize = xmlc->xmlsize;
  636. i = nsize;
  637. while (i >= 0)
  638. {
  639. /* Append in the front if there is "</config_file>" at the tail.
  640. * Break as soon as '<' is checked.
  641. */
  642. if (ndata[i] == '<')
  643. {
  644. if (strncmp(&ndata[i], "</config_file>", strlen("</config_file>")) == 0)
  645. {
  646. endtag = malloc(nsize - i + 1);
  647. if (!endtag)
  648. break;
  649. memset(endtag, 0, nsize - i);
  650. memcpy(endtag, &ndata[i], nsize - i);
  651. endtag[nsize - i] = '\0';
  652. memset(&ndata[i], 0, nsize - i);
  653. nsize = i;
  654. hastail = 1;
  655. }
  656. break;
  657. }
  658. i--;
  659. }
  660. for (i = 0; i < ks_nums; i++)
  661. {
  662. if (strcmp(kws[i], "null") == 0
  663. || strcmp(kws[i], "NULL") == 0)
  664. {
  665. //append custom dae
  666. nsize += sprintf(&ndata[nsize], CUSTOM_DAE);
  667. }
  668. else
  669. {
  670. // only xml file could be appended?
  671. if (!check_is_xml_file(kws[i]))
  672. continue;
  673. asize = load_file(kws[i], &ndata[nsize]);
  674. if (asize > 0)
  675. nsize += asize;
  676. }
  677. }
  678. if (hastail)
  679. {
  680. nsize += sprintf(&ndata[nsize], endtag);
  681. free(endtag);
  682. endtag = NULL;
  683. }
  684. // nsize = xmlc->xmlsize if all append fail
  685. return (nsize == xmlc->xmlsize) ? -1 : nsize;
  686. }
  687. /* calc the extra size to be appended
  688. */
  689. static int calc_append_size()
  690. {
  691. int i, j, n = 0;
  692. int anum = 0, asize = 0;
  693. char *akwstr = NULL;
  694. char aname[0x20 + 1];
  695. for(i = 0; i < xmlc->kwnums; i++)
  696. if (xmlc->kw[i] == 'a')
  697. break;
  698. if (i == xmlc->kwnums)
  699. return 0;
  700. // "-a" without input, default custom dae
  701. if (xmlc->kwstr[i][0] == '\0')
  702. strcpy(xmlc->kwstr[i], "null");
  703. akwstr = xmlc->kwstr[i];
  704. anum = str_count_c(akwstr, ',') + 1;
  705. for (i = 0; i < anum; i++)
  706. {
  707. memset(aname, 0, 0x20 + 1);
  708. if ((j = count_to_end(&akwstr[n], ',')) <= 0)
  709. break;
  710. // file name is too long
  711. if (j > 0x20)
  712. {
  713. printf("append fail: file name longer than 0x20\n");
  714. return -1;
  715. }
  716. memcpy(aname, &akwstr[n], j);
  717. aname[j] = '\0';
  718. // +1: to skip separator ','
  719. n += j + 1;
  720. // Reserved for custom dae
  721. if (strcmp(aname, "null") == 0
  722. || strcmp(aname, "NULL") == 0)
  723. {
  724. // +1: for EOF
  725. asize += strlen(CUSTOM_DAE) + 1;
  726. continue;
  727. }
  728. // only xml file could be append?
  729. if (!check_is_xml_file(aname))
  730. {
  731. printf("append fail: %s is not xml file\n", aname);
  732. return -1;
  733. }
  734. asize += get_file_size(aname);
  735. }
  736. return asize;
  737. }
  738. static void do_clean(void)
  739. {
  740. int i = 0, kwstr_nums;
  741. int nsize, asize;
  742. char *ndata = NULL;
  743. FILE *outfile = NULL;
  744. if (xmlc->xmlsize <= 0)
  745. return ;
  746. asize = calc_append_size();
  747. // +2: 1byte to avoid problem caused by asize = -1
  748. // : 1byte to place a guard '\0'(0x00)
  749. nsize = xmlc->xmlsize + asize + 2;
  750. ndata = malloc(nsize);
  751. xmlc->xmldata = malloc(nsize);
  752. if (!ndata || !xmlc->xmldata)
  753. goto end;
  754. memset(xmlc->xmldata, 0, nsize);
  755. if (load_file(xmlc->xmlfile, xmlc->xmldata) <= 0)
  756. goto end;
  757. while(i < xmlc->kwnums)
  758. {
  759. // pre-clean the data for next process
  760. memset(ndata, 0, nsize);
  761. nsize = 0;
  762. // count the kwstr nums
  763. kwstr_nums = str_count_c(xmlc->kwstr[i], ',') + 1;
  764. if (xmlc->kwstr[i][0] == '\0')
  765. kwstr_nums = 0;
  766. switch (xmlc->kw[i])
  767. {
  768. case 'd':
  769. nsize = xmlc_delete_tag(ndata, xmlc->kwstr[i], kwstr_nums);
  770. break;
  771. case 'h':
  772. nsize = xmlc_delete_misc(ndata);
  773. break;
  774. case 'x':
  775. nsize = xmlc_delete_config_symbol(ndata, xmlc->kwstr[i], kwstr_nums);
  776. break;
  777. case 'i':
  778. nsize = xmlc_delete_item_symbol(ndata, xmlc->kwstr[i], kwstr_nums);
  779. break;
  780. case 'k':
  781. nsize = xmlc_keep_tag(ndata, xmlc->kwstr[i], kwstr_nums);
  782. break;
  783. case 'a':
  784. // append file error: not xml file or file name too long?
  785. if (asize < 0)
  786. {
  787. nsize = -1;
  788. break;
  789. }
  790. nsize = xmlc_append_tag(ndata, xmlc->kwstr[i], kwstr_nums);
  791. break;
  792. default :
  793. nsize = -1;
  794. break;
  795. }
  796. // updata new data for next process
  797. if (nsize >= 0 && nsize != xmlc->xmlsize)
  798. {
  799. memset(xmlc->xmldata, 0, xmlc->xmlsize);
  800. memcpy(xmlc->xmldata, ndata, nsize);
  801. xmlc->xmlsize = nsize;
  802. }
  803. i++;
  804. }
  805. if ((outfile = fopen(xmlc->outfile, "wb")) == NULL)
  806. goto end;
  807. fwrite(xmlc->xmldata, xmlc->xmlsize, 1, outfile);
  808. fclose(outfile);
  809. outfile = NULL;
  810. end:
  811. if (ndata)
  812. {
  813. free(ndata);
  814. ndata = NULL;
  815. }
  816. if (xmlc->xmldata)
  817. {
  818. free(xmlc->xmldata);
  819. xmlc->xmldata = NULL;
  820. }
  821. }
  822. static void memory_release(void)
  823. {
  824. int i;
  825. for (i = 0; i < xmlc->kwnums; i++)
  826. {
  827. if (xmlc->kwstr[i])
  828. free(xmlc->kwstr[i]);
  829. xmlc->kwstr[i] = NULL;
  830. }
  831. if (xmlc->kwstr)
  832. {
  833. free(xmlc->kwstr);
  834. xmlc->kwstr = NULL;
  835. }
  836. if (xmlc->xmldata)
  837. {
  838. free(xmlc->xmldata);
  839. xmlc->xmldata = NULL;
  840. }
  841. free(xmlc);
  842. xmlc = NULL;
  843. }
  844. /* eg: xml_cleaner.exe src.xml[ dst.xml] -h -i sym[ cdt] -k "TM_"
  845. */
  846. static int read_args(int argc, char *argv[])
  847. {
  848. int i, iskw, nums = 0;
  849. // need 3 args at least, eg: xml_cleaner.exe src.xml -a
  850. if ((argc < 3) || (argc == 3 && argv[2][0] != '-'))
  851. goto err_args2;
  852. // argv[2] or argv[3] must be kw who is with '-'
  853. if ((argv[2][0] != '-')
  854. && (argc >= 4 && argv[3][0] != '-'))
  855. goto err_args2;
  856. for (i = 2; i < argc; i++)
  857. if (argv[i][0] == '-')
  858. nums++;
  859. // nums * 1 = sizeof(xmlc->kw)
  860. if (!(xmlc = malloc(sizeof(xml_clean_t) + nums * 1)))
  861. goto err_args2;
  862. memset(xmlc, 0, sizeof(xml_clean_t) + nums * 1);
  863. xmlc->kwstr = malloc(sizeof(xmlc->kwstr[0]) * nums);
  864. xmlc->xmlfile = argv[1];
  865. xmlc->outfile = argv[1];
  866. // output file specified?
  867. if (argv[2][0] != '-')
  868. xmlc->outfile = argv[2];
  869. xmlc->kwnums = nums;
  870. if (!check_is_xml_file(xmlc->xmlfile))
  871. goto err_args1;
  872. if (!check_is_xml_file(xmlc->outfile))
  873. goto err_args1;
  874. nums = 0;
  875. iskw = 0;
  876. for (i = 2; i < argc; i++)
  877. {
  878. if (!strcmp(argv[i], xmlc->outfile))
  879. continue;
  880. if (argv[i][0] == '-')
  881. {
  882. if (nums >= xmlc->kwnums)
  883. goto err_args1;
  884. xmlc->kw[nums] = argv[i][1];
  885. if (!check_kw_valid(xmlc->kw[nums]))
  886. goto err_args1;
  887. //pre-malloc for kwstr to avoid null pointer
  888. xmlc->kwstr[nums] = malloc(KWSTR_LEN);
  889. memset(xmlc->kwstr[nums], 0, KWSTR_LEN);
  890. iskw = 1;
  891. nums++;
  892. }
  893. else
  894. {
  895. // nums - 1: make sure kwstr is the same index as xmlc->kw
  896. char *temp = xmlc->kwstr[nums - 1];
  897. int len = strlen(temp);
  898. // another 1byte for EOF
  899. len += strlen(argv[i]) + 1;
  900. // 2nd or more kwstr for a kw, another 1byte for ','
  901. if (!iskw)
  902. len += 1;
  903. if (len > KWSTR_LEN)
  904. {
  905. temp = realloc_safe(temp, len);
  906. xmlc->kwstr[nums - 1] = temp;
  907. }
  908. if (temp)
  909. {
  910. /* replace blank to KWSTR_SEP */
  911. str_replace_c(argv[i], ' ', ',');
  912. if(!iskw)
  913. strcat(temp, ",");
  914. strcat(temp, argv[i]);
  915. }
  916. iskw = 0;
  917. }
  918. }
  919. xmlc->kwnums = nums;
  920. return 0;
  921. err_args1:
  922. memory_release();
  923. err_args2:
  924. printf("\nusage: xml_cleaner.exe xml_file[ out_xmlfile] kw1 kws1 ... [kw2 kws2 ...]\n");
  925. printf("Content in [] is optional. Each kw can receive mulitple input unless stated itself\n");
  926. printf("\t-d kws ... : delete the whole tag whose \"name\" does contain kws\n");
  927. printf("\t-k kws ... : only keep tags whose \"name\" does contain kws\n");
  928. printf("\t-a kws ... : append all raw data to xml_file, \"-a null\" is reserved for custom dae\n");
  929. printf("\t-x kws : only 1 symbol, delete symbol in config tag header and do no effect to its items\n");
  930. printf("\t-i kws[ cdt] : no more than 2 inputs, delete symbol in items of struct and config, cdt is optional\n");
  931. printf("\t-h : no kws! delete the tags that are NOT one of enum, struct or config\n");
  932. return -1;
  933. }
  934. int main(int argc, char *argv[])
  935. {
  936. if (read_args(argc, argv) < 0)
  937. return -1;
  938. if ((xmlc->xmlsize = get_file_size(xmlc->xmlfile)) <= 0)
  939. goto err;
  940. do_clean();
  941. memory_release();
  942. return 0;
  943. err:
  944. printf("fail\n");
  945. memory_release();
  946. return -1;
  947. }