utf8.h 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. /*
  2. (c) Copyright 2001-2009 The world wide DirectFB Open Source Community (directfb.org)
  3. (c) Copyright 2000-2004 Convergence (integrated media) GmbH
  4. All rights reserved.
  5. Written by Denis Oliver Kropp <dok@directfb.org>,
  6. Andreas Hundt <andi@fischlustig.de>,
  7. Sven Neumann <neo@directfb.org>,
  8. Ville Syrjälä <syrjala@sci.fi> and
  9. Claudio Ciccani <klan@users.sf.net>.
  10. UTF8 routines ported from glib-2.0 and optimized
  11. This library is free software; you can redistribute it and/or
  12. modify it under the terms of the GNU Lesser General Public
  13. License as published by the Free Software Foundation; either
  14. version 2 of the License, or (at your option) any later version.
  15. This library is distributed in the hope that it will be useful,
  16. but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. Lesser General Public License for more details.
  19. You should have received a copy of the GNU Lesser General Public
  20. License along with this library; if not, write to the
  21. Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22. Boston, MA 02111-1307, USA.
  23. */
  24. #ifndef __DIRECT__UTF8_H__
  25. #define __DIRECT__UTF8_H__
  26. #include <direct/types.h>
  27. #define DIRECT_UTF8_SKIP(c) (((u8)(c) < 0xc0) ? 1 : __direct_utf8_skip[(u8)(c)&0x3f])
  28. #define DIRECT_UTF8_GET_CHAR(p) (*(const u8*)(p) < 0xc0 ? \
  29. *(const u8*)(p) : __direct_utf8_get_char((const u8*)(p)))
  30. /*
  31. * Actually the last two fields used to be zero since they indicate an
  32. * invalid UTF-8 string. Changed it to 1 to avoid endless looping on
  33. * invalid input.
  34. */
  35. static const char __direct_utf8_skip[64] = {
  36. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  37. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
  38. };
  39. static __inline__ unichar __direct_utf8_get_char( const u8 *p )
  40. {
  41. int len;
  42. register unichar result = p[0];
  43. if (result < 0xc0)
  44. return result;
  45. if (result > 0xfd)
  46. return (unichar) -1;
  47. len = __direct_utf8_skip[result & 0x3f];
  48. result &= 0x7c >> len;
  49. while (--len) {
  50. int c = *(++p);
  51. if ((c & 0xc0) != 0x80)
  52. return (unichar) -1;
  53. result = (result << 6) | (c & 0x3f);
  54. }
  55. return result;
  56. }
  57. #endif