diff options
| author | Tristan Zur <tzur@web.web.ccwn.org> | 2014-03-27 22:27:47 +0100 |
|---|---|---|
| committer | Tristan Zur <tzur@web.web.ccwn.org> | 2014-03-27 22:27:47 +0100 |
| commit | b62676ca5d3d6f6ba3f019ea3f99722e165a98d8 (patch) | |
| tree | 86722cb80f07d4569f90088eeaea2fc2f6e2ef94 /webmail/program/lib/utf8.class.php | |
Diffstat (limited to 'webmail/program/lib/utf8.class.php')
| -rw-r--r-- | webmail/program/lib/utf8.class.php | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/webmail/program/lib/utf8.class.php b/webmail/program/lib/utf8.class.php new file mode 100644 index 0000000..0446159 --- /dev/null +++ b/webmail/program/lib/utf8.class.php @@ -0,0 +1,171 @@ +<?php +/* +utf8 1.0 +Copyright: Left +--------------------------------------------------------------------------------- +Version: 1.0 +Date: 23 November 2004 +--------------------------------------------------------------------------------- +Author: Alexander Minkovsky (a_minkovsky@hotmail.com) +--------------------------------------------------------------------------------- +License: Choose the more appropriated for You - I don't care. +--------------------------------------------------------------------------------- +Description: + Class provides functionality to convert single byte strings, such as CP1251 + ti UTF-8 multibyte format and vice versa. + Class loads a concrete charset map, for example CP1251. + (Refer to ftp://ftp.unicode.org/Public/MAPPINGS/ for map files) + Directory containing MAP files is predefined as constant. + Each charset is also predefined as constant pointing to the MAP file. +--------------------------------------------------------------------------------- +Example usage: + Pass the desired charset in the class constructor: + $utfConverter = new utf8(CP1251); //defaults to CP1250. + or load the charset MAP using loadCharset method like this: + $utfConverter->loadCharset(CP1252); + Then call + $res = $utfConverter->strToUtf8($str); + or + $res = $utfConverter->utf8ToStr($utf); + to get the needed encoding. +--------------------------------------------------------------------------------- +Note: + Rewrite or Override the onError method if needed. It's the error handler used from everywhere and takes 2 parameters: + err_code and err_text. By default it just prints out a message about the error. +*/ + +// Charset maps +// Adapted to fit Roundcube +define("UTF8_MAP_DIR", "program/lib/encoding"); + +//Error constants +define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE"); + +//Class definition +Class utf8 { + + var $charset = "ISO-8859-1"; + var $ascMap = array(); + var $utfMap = array(); + var $aliases = array( + 'KOI8-R' => 'KOI8R' + ); + var $error = null; + + function __construct($charset="ISO-8859-1") { + $this->loadCharset($charset); + } + + //Load charset + function loadCharset($charset) { + + $charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset); + if (isset($this->aliases[$charset])) + $charset = $this->aliases[$charset]; + + $this->charset = $charset; + + if (empty($this->ascMap[$charset])) + { + $file = UTF8_MAP_DIR.'/'.$charset.'.map'; + + if (!is_file($file)) { + $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset"); + return; + } + + $lines = file_get_contents($file); + $lines = preg_replace("/#.*$/m","",$lines); + $lines = preg_replace("/\n\n/","",$lines); + $lines = explode("\n",$lines); + + foreach($lines as $line){ + $parts = explode('0x',$line); + if(count($parts)==3){ + $asc=hexdec(substr($parts[1],0,2)); + $utf=hexdec(substr($parts[2],0,4)); + $this->ascMap[$charset][$asc]=$utf; + } + } + + $this->utfMap = array_flip($this->ascMap[$charset]); + } + } + + //Error handler + function onError($err_code,$err_text){ + $this->error = $err_text; + return null; + } + + //Translate string ($str) to UTF-8 from given charset + function strToUtf8($str){ + if (empty($this->ascMap[$this->charset])) + return null; + + $chars = unpack('C*', $str); + $cnt = count($chars); + for($i=1; $i<=$cnt; $i++) + $this->_charToUtf8($chars[$i]); + + return implode("",$chars); + } + + //Translate UTF-8 string to single byte string in the given charset + function utf8ToStr($utf){ + if (empty($this->ascMap[$this->charset])) + return null; + + $chars = unpack('C*', $utf); + $cnt = count($chars); + $res = ""; //No simple way to do it in place... concatenate char by char + + for ($i=1; $i<=$cnt; $i++) + $res .= $this->_utf8ToChar($chars, $i); + + return $res; + } + + //Char to UTF-8 sequence + function _charToUtf8(&$char){ + $c = (int)$this->ascMap[$this->charset][$char]; + if ($c < 0x80){ + $char = chr($c); + } + else if($c<0x800) // 2 bytes + $char = (chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F)); + else if($c<0x10000) // 3 bytes + $char = (chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F)); + else if($c<0x200000) // 4 bytes + $char = (chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F)); + } + + //UTF-8 sequence to single byte character + function _utf8ToChar(&$chars, &$idx){ + if(($chars[$idx] >= 240) && ($chars[$idx] <= 255)){ // 4 bytes + $utf = (intval($chars[$idx]-240) << 18) + + (intval($chars[++$idx]-128) << 12) + + (intval($chars[++$idx]-128) << 6) + + (intval($chars[++$idx]-128) << 0); + } + else if (($chars[$idx] >= 224) && ($chars[$idx] <= 239)){ // 3 bytes + $utf = (intval($chars[$idx]-224) << 12) + + (intval($chars[++$idx]-128) << 6) + + (intval($chars[++$idx]-128) << 0); + } + else if (($chars[$idx] >= 192) && ($chars[$idx] <= 223)){ // 2 bytes + $utf = (intval($chars[$idx]-192) << 6) + + (intval($chars[++$idx]-128) << 0); + } + else{ // 1 byte + $utf = $chars[$idx]; + } + if(array_key_exists($utf,$this->utfMap)) + return chr($this->utfMap[$utf]); + else + return "?"; + } + +} + +?> |
