19 private static $mechs = array(
'ucs4',
'ucs4array',
'utf8',
'utf7',
'utf7imap');
20 private static $allow_overlong =
false;
21 private static $safe_mode;
22 private static $safe_char;
35 public static function convert($data, $from, $to, $safe_mode =
false, $safe_char = 0xFFFC)
37 self::$safe_mode = ($safe_mode) ?
true :
false;
38 self::$safe_char = ($safe_char) ? $safe_char : 0xFFFC;
39 if (self::$safe_mode) self::$allow_overlong =
true;
40 if (!in_array($from, self::$mechs))
throw new Exception(
'Invalid input format specified');
41 if (!in_array($to, self::$mechs))
throw new Exception(
'Invalid output format specified');
42 if ($from !=
'ucs4array') eval(
'$data = self::'.$from.
'_ucs4array($data);');
43 if ($to !=
'ucs4array') eval(
'$data = self::ucs4array_'.$to.
'($data);');
54 private static function utf8_ucs4array($input)
58 $inp_len = strlen($input);
61 for ($k = 0; $k < $inp_len; ++$k) {
68 if (self::$safe_mode) {
69 $output[$out_len-2] = self::$safe_char;
72 throw new Exception(
'Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
77 if (
'next' == $mode) {
84 } elseif ($v >> 4 == 14) {
86 $v = ($v - 224) << 12;
87 } elseif ($v >> 3 == 30) {
89 $v = ($v - 240) << 18;
90 } elseif (self::$safe_mode) {
92 $output[$out_len] = self::$safe_char;
96 throw new Exception(
'This might be UTF-8, but I don\'t understand it at byte '.$k);
98 if ($inp_len-$k-$next_byte < 2) {
99 $output[$out_len] = self::$safe_char;
104 if (
'add' == $mode) {
110 if (
'add' == $mode) {
111 if (!self::$allow_overlong && $test ==
'range') {
113 if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
114 throw new Exception(
'Bogus UTF-8 character detected (out of legal range) at byte '.$k);
118 $v = ($v-128) << ($next_byte*6);
122 if (self::$safe_mode) {
123 $output[$out_len-1] = ord(self::$safe_char);
128 throw new Exception(
'Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
131 if ($next_byte < 0) {
144 private static function ucs4array_utf8($input)
147 foreach ($input as $v) {
150 } elseif ($v < (1 << 11)) {
151 $output .= chr(192+($v >> 6)).chr(128+($v & 63));
152 } elseif ($v < (1 << 16)) {
153 $output .= chr(224+($v >> 12)).chr(128+(($v >> 6) & 63)).chr(128+($v & 63));
154 } elseif ($v < (1 << 21)) {
155 $output .= chr(240+($v >> 18)).chr(128+(($v >> 12) & 63)).chr(128+(($v >> 6) & 63)).chr(128+($v & 63));
156 } elseif (self::$safe_mode) {
159 throw new Exception(
'Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k);
165 private static function utf7imap_ucs4array($input)
167 return self::utf7_ucs4array(str_replace(
',',
'/', $input),
'&');
170 private static function utf7_ucs4array($input, $sc =
'+')
174 $inp_len = strlen($input);
178 for ($k = 0; $k < $inp_len; ++$k) {
180 if (0 == ord($c))
continue;
183 if (!preg_match(
'![A-Za-z0-9/'.preg_quote($sc,
'!').
']!', $c)) {
192 $tmp = base64_decode($b64);
193 $tmp = substr($tmp, -1 * (strlen($tmp) % 2));
194 for ($i = 0; $i < strlen($tmp); $i++) {
196 $output[$out_len] += ord($tmp{$i});
199 $output[$out_len] = ord($tmp{$i}) << 8;
221 private static function ucs4array_utf7imap($input)
223 return str_replace(
'/',
',', self::ucs4array_utf7($input,
'&'));
226 private static function ucs4array_utf7($input, $sc =
'+')
232 $v = (!empty($input)) ? array_shift($input) :
false;
233 $is_direct = (
false !== $v) ? (0x20 <= $v && $v <= 0x7e && $v != ord($sc)) :
true;
236 if ($b64 == chr(0).$sc) {
240 $output .= $sc.str_replace(
'=',
'', base64_encode($b64)).
'-';
244 } elseif (
false !== $v) {
245 $b64 .= chr(($v >> 8) & 255). chr($v & 255);
248 if ($mode ==
'd' &&
false !== $v) {
252 $b64 = chr(($v >> 8) & 255). chr($v & 255);
256 if (
false === $v && $b64 ==
'')
break;
265 private static function ucs4array_ucs4($input)
268 foreach ($input as $v) {
269 $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255);
278 private static function ucs4_ucs4array($input)
282 $inp_len = strlen($input);
285 throw new Exception(
'Input UCS4 string is broken');
290 for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
295 $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
static convert($data, $from, $to, $safe_mode=false, $safe_char=0xFFFC)