Initial commit of intern.ccwn.org contentsHEAD master

author: Tristan Zur <tzur@web.web.ccwn.org> 2014-03-27 22:27:47 +0100
committer: Tristan Zur <tzur@web.web.ccwn.org> 2014-03-27 22:27:47 +0100
commit: b62676ca5d3d6f6ba3f019ea3f99722e165a98d8 (patch)
tree: 86722cb80f07d4569f90088eeaea2fc2f6e2ef94 /webmail/program/lib/utf8.class.php
1 files changed, 171 insertions, 0 deletions
diff --git a/webmail/program/lib/utf8.class.php b/webmail/program/lib/utf8.class.php
new file mode 100644
index 0000000..0446159
--- /dev/null
+++ b/webmail/program/lib/utf8.class.php
@@ -0,0 +1,171 @@
+<?php
+/*
+utf8 1.0
+Copyright: Left
+---------------------------------------------------------------------------------
+Version:        1.0
+Date:           23 November 2004
+---------------------------------------------------------------------------------
+Author:         Alexander Minkovsky (a_minkovsky@hotmail.com)
+---------------------------------------------------------------------------------
+License:        Choose the more appropriated for You - I don't care.
+---------------------------------------------------------------------------------
+Description:
+    Class provides functionality to convert single byte strings, such as CP1251
+    ti UTF-8 multibyte format and vice versa.
+    Class loads a concrete charset map, for example CP1251.
+    (Refer to ftp://ftp.unicode.org/Public/MAPPINGS/ for map files)
+    Directory containing MAP files is predefined as constant.
+    Each charset is also predefined as constant pointing to the MAP file.
+---------------------------------------------------------------------------------
+Example usage:
+    Pass the desired charset in the class constructor:
+    $utfConverter = new utf8(CP1251); //defaults to CP1250.
+    or load the charset MAP using loadCharset method like this:
+    $utfConverter->loadCharset(CP1252);
+    Then call
+    $res = $utfConverter->strToUtf8($str);
+    or
+    $res = $utfConverter->utf8ToStr($utf);
+    to get the needed encoding.
+---------------------------------------------------------------------------------
+Note:
+    Rewrite or Override the onError method if needed. It's the error handler used from everywhere and takes 2 parameters:
+    err_code and err_text. By default it just prints out a message about the error.
+*/
+
+// Charset maps
+// Adapted to fit Roundcube
+define("UTF8_MAP_DIR", "program/lib/encoding");
+
+//Error constants
+define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE");
+
+//Class definition
+Class utf8 {
+
+  var $charset = "ISO-8859-1";
+  var $ascMap = array();
+  var $utfMap = array();
+  var $aliases = array(
+    'KOI8-R' => 'KOI8R'
+  );
+  var $error = null;
+
+  function __construct($charset="ISO-8859-1") {
+    $this->loadCharset($charset);
+  }
+  
+  //Load charset
+  function loadCharset($charset) {
+    
+    $charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset);
+    if (isset($this->aliases[$charset]))
+      $charset = $this->aliases[$charset];
+    
+    $this->charset = $charset;
+
+    if (empty($this->ascMap[$charset]))
+      {
+      $file = UTF8_MAP_DIR.'/'.$charset.'.map';
+    
+      if (!is_file($file)) {
+        $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset");
+        return;
+      }
+    
+      $lines = file_get_contents($file);
+      $lines = preg_replace("/#.*$/m","",$lines);
+      $lines = preg_replace("/\n\n/","",$lines);
+      $lines = explode("\n",$lines);
+
+      foreach($lines as $line){
+        $parts = explode('0x',$line);
+        if(count($parts)==3){
+          $asc=hexdec(substr($parts[1],0,2));
+          $utf=hexdec(substr($parts[2],0,4));
+          $this->ascMap[$charset][$asc]=$utf;
+        }
+      }
+      
+      $this->utfMap = array_flip($this->ascMap[$charset]);
+    }
+  }
+
+  //Error handler
+  function onError($err_code,$err_text){
+    $this->error = $err_text;
+    return null;
+  }
+
+  //Translate string ($str) to UTF-8 from given charset
+  function strToUtf8($str){
+    if (empty($this->ascMap[$this->charset]))
+      return null;
+
+    $chars = unpack('C*', $str);
+    $cnt = count($chars);
+    for($i=1; $i<=$cnt; $i++)
+      $this->_charToUtf8($chars[$i]);
+
+    return implode("",$chars);
+  }
+
+  //Translate UTF-8 string to single byte string in the given charset
+  function utf8ToStr($utf){
+    if (empty($this->ascMap[$this->charset]))
+      return null;
+
+    $chars = unpack('C*', $utf);
+    $cnt = count($chars);
+    $res = ""; //No simple way to do it in place... concatenate char by char
+
+    for ($i=1; $i<=$cnt; $i++)
+      $res .= $this->_utf8ToChar($chars, $i);
+
+    return $res;
+  }
+
+  //Char to UTF-8 sequence
+  function _charToUtf8(&$char){
+    $c = (int)$this->ascMap[$this->charset][$char];
+    if ($c < 0x80){
+      $char = chr($c);
+    }
+    else if($c<0x800) // 2 bytes
+      $char = (chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F));
+    else if($c<0x10000) // 3 bytes
+      $char = (chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F));
+    else if($c<0x200000) // 4 bytes
+      $char = (chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F));
+  }
+
+  //UTF-8 sequence to single byte character
+  function _utf8ToChar(&$chars, &$idx){
+    if(($chars[$idx] >= 240) && ($chars[$idx] <= 255)){ // 4 bytes
+      $utf =    (intval($chars[$idx]-240)   << 18) +
+                (intval($chars[++$idx]-128) << 12) +
+                (intval($chars[++$idx]-128) << 6) +
+                (intval($chars[++$idx]-128) << 0);
+    }
+    else if (($chars[$idx] >= 224) && ($chars[$idx] <= 239)){ // 3 bytes
+      $utf =    (intval($chars[$idx]-224)   << 12) +
+                (intval($chars[++$idx]-128) << 6) +
+                (intval($chars[++$idx]-128) << 0);
+    }
+    else if (($chars[$idx] >= 192) && ($chars[$idx] <= 223)){ // 2 bytes
+      $utf =    (intval($chars[$idx]-192)   << 6) +
+                (intval($chars[++$idx]-128) << 0);
+    }
+    else{ // 1 byte
+      $utf = $chars[$idx];
+    }
+    if(array_key_exists($utf,$this->utfMap))
+      return chr($this->utfMap[$utf]);
+    else
+      return "?";
+  }
+
+}
+
+?>
author	Tristan Zur <tzur@web.web.ccwn.org>	2014-03-27 22:27:47 +0100
committer	Tristan Zur <tzur@web.web.ccwn.org>	2014-03-27 22:27:47 +0100
commit	b62676ca5d3d6f6ba3f019ea3f99722e165a98d8 (patch)
tree	86722cb80f07d4569f90088eeaea2fc2f6e2ef94 /webmail/program/lib/utf8.class.php