nls_base.c 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. /*
  2. * Copyright (c) 2020 Actions Corporation.
  3. * Copy from Linux
  4. *
  5. * SPDX-License-Identifier: Apache-2.0
  6. */
  7. #include <errno.h>
  8. #include <nls.h>
  9. /*
  10. * Sample implementation from Unicode home page.
  11. * http://www.stonehand.com/unicode/standard/fss-utf.html
  12. */
  13. struct utf8_table {
  14. int cmask;
  15. int cval;
  16. int shift;
  17. long lmask;
  18. long lval;
  19. };
  20. static const struct utf8_table utf8_table[] =
  21. {
  22. {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
  23. {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
  24. {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
  25. {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
  26. {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
  27. {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
  28. {0, /* end of table */}
  29. };
  30. #define UNICODE_MAX 0x0010ffff
  31. #define PLANE_SIZE 0x00010000
  32. #define SURROGATE_MASK 0xfffff800
  33. #define SURROGATE_PAIR 0x0000d800
  34. #define SURROGATE_LOW 0x00000400
  35. #define SURROGATE_BITS 0x000003ff
  36. int utf8_to_utf32(const u8_t *s, int inlen, unicode_t *pu)
  37. {
  38. unsigned long l;
  39. int c0, c, nc;
  40. const struct utf8_table *t;
  41. nc = 0;
  42. c0 = *s;
  43. l = c0;
  44. for (t = utf8_table; t->cmask; t++) {
  45. nc++;
  46. if ((c0 & t->cmask) == t->cval) {
  47. l &= t->lmask;
  48. if (l < t->lval || l > UNICODE_MAX ||
  49. (l & SURROGATE_MASK) == SURROGATE_PAIR)
  50. return -1;
  51. *pu = (unicode_t) l;
  52. return nc;
  53. }
  54. if (inlen <= nc)
  55. return -1;
  56. s++;
  57. c = (*s ^ 0x80) & 0xFF;
  58. if (c & 0xC0)
  59. return -1;
  60. l = (l << 6) | c;
  61. }
  62. return -1;
  63. }
  64. int utf32_to_utf8(unicode_t u, u8_t *s, int maxout)
  65. {
  66. unsigned long l;
  67. int c, nc;
  68. const struct utf8_table *t;
  69. if (!s)
  70. return 0;
  71. l = u;
  72. if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
  73. return -1;
  74. nc = 0;
  75. for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
  76. nc++;
  77. if (l <= t->lmask) {
  78. c = t->shift;
  79. *s = (u8_t) (t->cval | (l >> c));
  80. while (c > 0) {
  81. c -= 6;
  82. s++;
  83. *s = (u8_t) (0x80 | ((l >> c) & 0x3F));
  84. }
  85. return nc;
  86. }
  87. }
  88. return -1;
  89. }