diff options
Diffstat (limited to 'js/dojo/dojox/string')
| -rw-r--r-- | js/dojo/dojox/string/BidiComplex.js | 316 | ||||
| -rw-r--r-- | js/dojo/dojox/string/BidiEngine.js | 1507 | ||||
| -rw-r--r-- | js/dojo/dojox/string/Builder.js | 134 | ||||
| -rw-r--r-- | js/dojo/dojox/string/README | 39 | ||||
| -rw-r--r-- | js/dojo/dojox/string/sprintf.js | 410 | ||||
| -rw-r--r-- | js/dojo/dojox/string/tokenize.js | 45 |
6 files changed, 2451 insertions, 0 deletions
diff --git a/js/dojo/dojox/string/BidiComplex.js b/js/dojo/dojox/string/BidiComplex.js new file mode 100644 index 0000000..4ec5131 --- /dev/null +++ b/js/dojo/dojox/string/BidiComplex.js @@ -0,0 +1,316 @@ +//>>built +// summary: +// BiDiComplex module handles complex expression issues known when using BiDi characters +// in File Paths, URLs, E-mail Address, XPATH, etc. +// this module adds property listeners to the text fields to correct the text representation +// in both static text and dynamic text during user input. + +define("dojox/string/BidiComplex", ["dojo/_base/kernel", "dojo/_base/lang", "dojo/_base/array", "dojo/_base/connect", "dojo/_base/sniff", + "dojo/keys"], + function(dojo, lang, arr, hub, has, keys){ + dojo.experimental("dojox.string.BidiComplex"); + var bdc = lang.getObject("string.BidiComplex", true, dojox); + + var _str0 = []; //FIXME: shared reference here among various functions means the functions can't be reused + + bdc.attachInput = function(/*DOMNode*/field, /*String*/pattern){ + // summary: + // Attach key listeners to the INPUT field to accomodate dynamic complex BiDi expressions + // field: INPUT DOM node + // pattern: Complex Expression Pattern type. One of "FILE_PATH", "URL", "EMAIL", "XPATH" + + field.alt = pattern; + + hub.connect(field, "onkeydown", this, "_ceKeyDown"); + hub.connect(field, "onkeyup", this, "_ceKeyUp"); + + hub.connect(field, "oncut", this, "_ceCutText"); + hub.connect(field, "oncopy", this, "_ceCopyText"); + + field.value = bdc.createDisplayString(field.value, field.alt); + }; + + bdc.createDisplayString = function(/*String*/str, /*String*/pattern){ + // summary: + // Create the display string by adding the Unicode direction Markers + // pattern: Complex Expression Pattern type. One of "FILE_PATH", "URL", "EMAIL", "XPATH" + + str = bdc.stripSpecialCharacters(str); + var segmentsPointers = bdc._parse(str, pattern); + + var buf = '\u202A'/*LRE*/ + str; + var shift = 1; + arr.forEach(segmentsPointers, function(n){ + if(n != null){ + var preStr = buf.substring(0, n + shift); + var postStr = buf.substring(n + shift, buf.length); + buf = preStr + '\u200E'/*LRM*/ + postStr; + shift++; + } + }); + return buf; + }; + + bdc.stripSpecialCharacters = function(str){ + // summary: + // removes all Unicode directional markers from the string + + return str.replace(/[\u200E\u200F\u202A-\u202E]/g, ""); // String + }; + + bdc._ceKeyDown = function(event){ + var elem = has("ie") ? event.srcElement : event.target; + _str0 = elem.value; + }; + + bdc._ceKeyUp = function(event){ + var LRM = '\u200E'; + var elem = has("ie") ? event.srcElement : event.target; + + var str1 = elem.value; + var ieKey = event.keyCode; + + if((ieKey == keys.HOME) + || (ieKey == keys.END) + || (ieKey == keys.SHIFT)){ + return; + } + + var cursorStart, cursorEnd; + var selection = bdc._getCaretPos(event, elem); + if(selection){ + cursorStart = selection[0]; + cursorEnd = selection[1]; + } + + //Jump over a cursor processing + if(has("ie")){ + var cursorStart1 = cursorStart, cursorEnd1 = cursorEnd; + + if(ieKey == keys.LEFT_ARROW){ + if((str1.charAt(cursorEnd-1) == LRM) + && (cursorStart == cursorEnd)){ + bdc._setSelectedRange(elem,cursorStart - 1, cursorEnd - 1); + } + return; + } + + if(ieKey == keys.RIGHT_ARROW){ + if(str1.charAt(cursorEnd-1) == LRM){ + cursorEnd1 = cursorEnd + 1; + if(cursorStart == cursorEnd){ + cursorStart1 = cursorStart + 1; + } + } + + bdc._setSelectedRange(elem, cursorStart1, cursorEnd1); + return; + } + }else{ //Firefox + if(ieKey == keys.LEFT_ARROW){ + if(str1.charAt(cursorEnd-1) == LRM){ + bdc._setSelectedRange(elem, cursorStart - 1, cursorEnd - 1); + } + return; + } + if(ieKey == keys.RIGHT_ARROW){ + if(str1.charAt(cursorEnd-1) == LRM){ + bdc._setSelectedRange(elem, cursorStart + 1, cursorEnd + 1); + } + return; + } + } + + var str2 = bdc.createDisplayString(str1, elem.alt); + + if(str1 != str2) + { + window.status = str1 + " c=" + cursorEnd; + elem.value = str2; + + if((ieKey == keys.DELETE) && (str2.charAt(cursorEnd)==LRM)){ + elem.value = str2.substring(0, cursorEnd) + str2.substring(cursorEnd+2, str2.length); + } + + if(ieKey == keys.DELETE){ + bdc._setSelectedRange(elem,cursorStart,cursorEnd); + }else if(ieKey == keys.BACKSPACE){ + if((_str0.length >= cursorEnd) && (_str0.charAt(cursorEnd-1)==LRM)){ + bdc._setSelectedRange(elem, cursorStart - 1, cursorEnd - 1); + }else{ + bdc._setSelectedRange(elem, cursorStart, cursorEnd); + } + }else if(elem.value.charAt(cursorEnd) != LRM){ + bdc._setSelectedRange(elem, cursorStart + 1, cursorEnd + 1); + } + } + }; + + bdc._processCopy = function(elem, text, isReverse){ + // summary: + // This function strips the unicode directional controls when the text copied to the Clipboard + + if(text == null){ + if(has("ie")){ + var range = document.selection.createRange(); + text = range.text; + }else{ + text = elem.value.substring(elem.selectionStart, elem.selectionEnd); + } + } + + var textToClipboard = bdc.stripSpecialCharacters(text); + + if(has("ie")){ + window.clipboardData.setData("Text", textToClipboard); + } + return true; + }; + + bdc._ceCopyText = function(elem){ + if(has("ie")){ + elem.returnValue = false; + } + return bdc._processCopy(elem, null, false); + }; + + bdc._ceCutText = function(elem){ + + var ret = bdc._processCopy(elem, null, false); + if(!ret){ + return false; + } + + if(has("ie")){ + // curPos = elem.selectionStart; + document.selection.clear(); + }else{ + var curPos = elem.selectionStart; + elem.value = elem.value.substring(0, curPos) + elem.value.substring(elem.selectionEnd); + elem.setSelectionRange(curPos, curPos); + } + + return true; + }; + + // is there dijit code to do this? + bdc._getCaretPos = function(event, elem){ + if(has("ie")){ + var position = 0, + range = document.selection.createRange().duplicate(), + range2 = range.duplicate(), + rangeLength = range.text.length; + + if(elem.type == "textarea"){ + range2.moveToElementText(elem); + }else{ + range2.expand('textedit'); + } + while(range.compareEndPoints('StartToStart', range2) > 0){ + range.moveStart('character', -1); + ++position; + } + + return [position, position + rangeLength]; + } + + return [event.target.selectionStart, event.target.selectionEnd]; + }; + + // is there dijit code to do this? + bdc._setSelectedRange = function(elem,selectionStart,selectionEnd){ + if(has("ie")){ + var range = elem.createTextRange(); + if(range){ + if(elem.type == "textarea"){ + range.moveToElementText(elem); + }else{ + range.expand('textedit'); + } + + range.collapse(); + range.moveEnd('character', selectionEnd); + range.moveStart('character', selectionStart); + range.select(); + } + }else{ + elem.selectionStart = selectionStart; + elem.selectionEnd = selectionEnd; + } + }; + + var _isBidiChar = function(c){ + return (c >= '\u0030' && c <= '\u0039') || (c > '\u00ff'); + }; + + var _isLatinChar = function(c){ + return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A'); + }; + + var _isCharBeforeBiDiChar = function(buffer, i, previous){ + while(i > 0){ + if(i == previous){ + return false; + } + i--; + if(_isBidiChar(buffer.charAt(i))){ + return true; + } + if(_isLatinChar(buffer.charAt(i))){ + return false; + } + } + return false; + }; + + + bdc._parse = function(/*String*/str, /*String*/pattern){ + var previous = -1, segmentsPointers = []; + var delimiters = { + FILE_PATH: "/\\:.", + URL: "/:.?=&#", + XPATH: "/\\:.<>=[]", + EMAIL: "<>@.,;" + }[pattern]; + + switch(pattern){ + case "FILE_PATH": + case "URL": + case "XPATH": + arr.forEach(str, function(ch, i){ + if(delimiters.indexOf(ch) >= 0 && _isCharBeforeBiDiChar(str, i, previous)){ + previous = i; + segmentsPointers.push(i); + } + }); + break; + case "EMAIL": + var inQuotes = false; // FIXME: unused? + + arr.forEach(str, function(ch, i){ + if(ch== '\"'){ + if(_isCharBeforeBiDiChar(str, i, previous)){ + previous = i; + segmentsPointers.push(i); + } + i++; + var i1 = str.indexOf('\"', i); + if(i1 >= i){ + i = i1; + } + if(_isCharBeforeBiDiChar(str, i, previous)){ + previous = i; + segmentsPointers.push(i); + } + } + + if(delimiters.indexOf(ch) >= 0 && _isCharBeforeBiDiChar(str, i, previous)){ + previous = i; + segmentsPointers.push(i); + } + }); + } + return segmentsPointers; + }; + return dojox.string.BidiComplex; +}); diff --git a/js/dojo/dojox/string/BidiEngine.js b/js/dojo/dojox/string/BidiEngine.js new file mode 100644 index 0000000..13571ba --- /dev/null +++ b/js/dojo/dojox/string/BidiEngine.js @@ -0,0 +1,1507 @@ +//>>built +define("dojox/string/BidiEngine", ["dojo/_base/lang", "dojo/_base/declare"], + function(lang,declare){ +lang.getObject("string", true, dojox); + +declare("dojox.string.BidiEngine", null, { + // summary: + // This class provides a bidi transformation engine, i.e. + // functions for reordering and shaping bidi text. + // description: + // Bidi stands for support for languages with a bidirectional script. + // + // Usually Unicode Bidi Algorithm used by OS platform (and web browsers) is capable of properly transforming + // Bidi text and as a result it is adequately displayed on the screen. However, in some situations, + // Unicode Bidi Algorithm is not invoked or is not properly applied. This may occur in situation in which software + // responsible for rendering the text is not leveraging Unicode Bidi Algorithm implemented by OS (e.g. dojox.GFX renderers). + // + // Bidi engine provided in this class implements Unicode Bidi Algorithm as specified at: + // http://www.unicode.org/reports/tr9/. + // + // For more information on basic Bidi concepts please read following article: + // "Bidirectional script support - A primer" available from: + // http://www.ibm.com/developerworks/websphere/library/techarticles/bidi/bidigen.html + // + // As of February 2011, Bidi engine has following limitations: + // 1. No support for following numeric shaping options: + // H - Hindi, + // C - Contextual, + // N - Nominal. + // 2. No support for following shaping options: + // I - Initial shaping, + // M - Middle shaping, + // F - Final shaping, + // B - Isolated shaping. + // 3. No support for source-to-target or/and target-to-source maps. + // 4. No support for LRE/RLE/LRO/RLO/PDF (they are handled like neutrals). + // 5. No support for Windows compatibility. + // 6. No support for insert/remove marks. + // 7. No support for code pages (currently only UTF-8 is supported. Ideally we should convert from any code page to UTF-8). + + bidiTransform: function (/*String*/text, /*String*/formatIn, /*String*/formatOut){ + // summary: + // Central public API for Bidi engine. Transforms the text according to formatIn, formatOut parameters. + // If formatIn or formatOut parametrs are not valid throws an exception. + // inputText: + // Input text subject to application of Bidi transformation. + // formatIn: + // Input Bidi layout in which inputText is passed to the function. + // formatOut: + // Output Bidi layout to which inputText should be transformed. + // description: + // Both formatIn and formatOut parameters are 5 letters long strings. + // For example - "ILYNN". Each letter is associated with specific attribute of Bidi layout. + // Possible and default values for each one of the letters are provided below: + // + // First letter: + // Letter position/index: + // 1 + // Letter meaning: + // Ordering Schema. + // Possible values: + // I - Implicit (Logical). + // V - Visual. + // Default value: + // I + // + // Second letter: + // Letter position/index: + // 2 + // Letter meaning: + // Orientation. + // Possible values: + // L - Left To Right. + // R - Right To Left. + // C - Contextual Left to Right. + // D - Contextual Right to Left. + // Default value: + // L + // + // Third letter: + // Letter position/index: + // 3 + // Letter meaning: + // Symmetric Swapping. + // Possible values: + // Y - Symmetric swapping is on. + // N - Symmetric swapping is off. + // Default value: + // Y + // + // Fourth letter: + // Letter position/index: + // 4 + // Letter meaning: + // Shaping. + // Possible values: + // S - Text is shaped. + // N - Text is not shaped. + // Default value: + // N + // + // Fifth letter: + // Letter position/index: + // 5 + // Letter meaning: + // Numeric Shaping. + // Possible values: + // N - Nominal. + // Default value: + // N + // + // The output of this function is original text (passed via first argument) transformed from input Bidi layout (second argument) + // to output Bidi layout (last argument). + // + // Sample call: + // | mytext = bidiTransform("HELLO WORLD", "ILYNN", "VLYNN"); + // In this case, "HELLO WORLD" text is transformed from Logical - LTR to Visual - LTR Bidi layout with + // default values for symmetric swapping (Yes), shaping (Not shaped) and numeric shaping (Nominal). + // returns: /*String*/ or throws an exception. + // Original text transformed from input Bidi layout (second argument) + // to output Bidi layout (last argument). + // Throws an exception if the bidi layout strings are not valid. + // tags: + // public + + if(!text){ + return ''; + } + if(!formatIn && !formatOut){ + return text; + } + + // regex for format validation + // Allowed values for format string are: + // 1st letter- I, V + // 2nd letter- L, R, C, D + // 3rd letter- Y, N + // 4th letter- S, N + // 5th letter- N + var validFormat = /^[(I|V)][(L|R|C|D)][(Y|N)][(S|N)][N]$/; + if(!validFormat.test(formatIn) || !validFormat.test(formatOut)){ + throw new Error("dojox.string.BidiEngine: the bidi layout string is wrong!"); + } + + if(formatIn == formatOut){ + return text; + } + + var orientIn = getOrientation(formatIn.charAt(1)) + , orientOut = getOrientation(formatOut.charAt(1)) + , os_in = (formatIn.charAt(0) == 'I') ? 'L' : formatIn.charAt(0) + , os_out = (formatOut.charAt(0) == 'I') ? 'L' : formatOut.charAt(0) + , inFormat = os_in + orientIn + , outFormat = os_out + orientOut + , swap = formatIn.charAt(2) + formatOut.charAt(2) + ; + + if(inFormat){ + bdx.defInFormat = inFormat; + } + if(outFormat){ + bdx.defOutFormat = outFormat; + } + if(swap){ + bdx.defSwap = swap; + } + + var stage1_text = doBidiReorder(text, os_in + orientIn, os_out + orientOut, formatIn.charAt(2) + formatOut.charAt(2)) + , isRtl = false; + + if(formatOut.charAt(1) == 'R'){ + isRtl = true; + }else if(formatOut.charAt(1) == 'C' || formatOut.charAt(1) == 'D'){ + isRtl = this.checkContextual(stage1_text); + } + if(formatIn.charAt(3) == formatOut.charAt(3)){ + return stage1_text; + }else if(formatOut.charAt(3) == 'S'){ + return shape(isRtl, stage1_text, true); + } + if(formatOut.charAt(3) == 'N'){ + return deshape(stage1_text, isRtl, true); + } + }, + checkContextual: function(/*String*/text){ + // summary: + // Determine the base direction of a bidi text according + // to its first strong directional character. + // text: + // The text to check. + // returns: /*String*/ + // "ltr" or "rtl" according to the first strong character. + // If there is no strong character, returns the value of the + // document dir property. + // tags: + // public + var dir = firstStrongDir(text); + if(dir != "ltr" && dir != "rtl"){ + dir = document.dir.toLowerCase(); + if(dir != "ltr" && dir != "rtl"){dir = "ltr";} + } + return dir; + }, + hasBidiChar: function(/*String*/text){ + // summary: + // Return true if text contains RTL directed character. + // text: + // The source string. + // description: + // Iterates over the text string, letter by letter starting from its beginning, + // searching for RTL directed character. + // Return true if found else false. Needed for vml transformation. + // returns: /*Boolean*/ + // true - if text has a RTL directed character. + // false - otherwise. + // tags: + // public + + var type = null, uc = null, hi = null; + for(var i = 0; i < text.length; i++){ + uc = text.charAt(i).charCodeAt(0); + hi = MasterTable[uc >> 8]; + type = hi < TBBASE ? hi : UnicodeTable[hi - TBBASE][uc & 0xFF]; + if(type == UBAT_R || type == UBAT_AL){ + return true; + } + if(type == UBAT_B){ + break; + } + } + return false; + } + +}); + + +function doBidiReorder(/*String*/text, /*String*/inFormat, + /*String*/outFormat, /*String*/swap){ + // summary: + // Reorder the source text according to the bidi attributes + // of source and result. + // text: + // The text to reorder. + // inFormat: + // Ordering scheme and base direction of the source text. + // Can be "LLTR", "LRTL", "LCLR", "LCRL", "VLTR", "VRTL", + // "VCLR", "VCRL". + // The first letter is "L" for logical ordering scheme, + // "V" for visual ordering scheme. + // The other letters specify the base direction. + // "CLR" means contextual direction defaulting to LTR if + // there is no strong letter. + // "CRL" means contextual direction defaulting to RTL if + // there is no strong letter. + // The initial value is "LLTR", if none, the initial value is used. + // outFormat: + // Required ordering scheme and base direction of the + // result. Has the same format as inFormat. + // If none, the initial value "VLTR" is used. + // swap: + // Symmetric swapping attributes of source and result. + // The allowed values can be "YN", "NY", "YY" and "NN". + // The first letter reflects the symmetric swapping attribute + // of the source, the second letter that of the result. + // returns: + // Text reordered according to source and result attributes. + + if(inFormat == undefined){ + inFormat = bdx.defInFormat; + } + if(outFormat == undefined){ + outFormat = bdx.defOutFormat; + } + if(swap == undefined){ + swap = bdx.defSwap; + } + if(inFormat == outFormat){ + return text; + } + var dir, inOrdering = inFormat.substring(0,1) + , inOrientation = inFormat.substring(1,4) + , outOrdering = outFormat.substring(0,1) + , outOrientation = outFormat.substring(1,4) + ; + if(inOrientation.charAt(0) == "C"){ + dir = firstStrongDir(text); + if(dir == "ltr" || dir == "rtl"){ + inOrientation = dir.toUpperCase(); + }else{ + inOrientation = inFormat.charAt(2) == "L" ? "LTR" : "RTL"; + } + inFormat = inOrdering + inOrientation; + } + if(outOrientation.charAt(0) == "C"){ + dir = firstStrongDir(text); + if(dir == "rtl"){ + outOrientation = "RTL"; + }else if(dir == "ltr"){ + dir = lastStrongDir(text); + outOrientation = dir.toUpperCase(); + }else{ + outOrientation = outFormat.charAt(2) == "L" ? "LTR" : "RTL"; + } + outFormat = outOrdering + outOrientation; + } + if(inFormat == outFormat){ + return text; + } + bdx.inFormat = inFormat; + bdx.outFormat = outFormat; + bdx.swap = swap; + if((inOrdering == "L") && (outFormat == "VLTR")){ //core cases + //cases: LLTR->VLTR, LRTL->VLTR + if(inOrientation == "LTR"){ + bdx.dir = LTR; + return doReorder(text); + } + if(inOrientation == "RTL"){ + bdx.dir = RTL; + return doReorder(text); + } + } + if((inOrdering == "V") && (outOrdering == "V")){ + //inOrientation != outOrientation + //cases: VRTL->VLTR, VLTR->VRTL + return invertStr(text); + } + if((inOrdering == "L") && (outFormat == "VRTL")){ + //cases: LLTR->VRTL, LRTL->VRTL + if(inOrientation == "LTR"){ + bdx.dir = LTR; + text = doReorder(text); + }else{ + //inOrientation == RTL + bdx.dir = RTL; + text = doReorder(text); + } + return invertStr(text); + } + if((inFormat == "VLTR") && (outFormat == "LLTR")){ + //case: VLTR->LLTR + bdx.dir = LTR; + return doReorder(text); + } + if((inOrdering == "V") && (outOrdering == "L") && (inOrientation != outOrientation)){ + //cases: VLTR->LRTL, VRTL->LLTR + text = invertStr(text); + + return (inOrientation == "RTL") ? doBidiReorder(text, "LLTR","VLTR", swap) : doBidiReorder(text, "LRTL","VRTL", swap); + } + if((inFormat == "VRTL") && (outFormat == "LRTL")){ + //case VRTL->LRTL + return doBidiReorder(text, "LRTL","VRTL", swap); + } + if((inOrdering == "L") && (outOrdering == "L")){ + //inOrientation != outOrientation + //cases: LRTL->LLTR, LLTR->LRTL + var saveSwap = bdx.swap; + bdx.swap = saveSwap.substr(0, 1) + "N"; + if(inOrientation == "RTL"){ + //LRTL->LLTR + bdx.dir = RTL; + text = doReorder(text); + bdx.swap = "N" + saveSwap.substr(1, 2); + bdx.dir = LTR; + text = doReorder(text); + }else{ //LLTR->LRTL + bdx.dir = LTR; + text = doReorder(text); + bdx.swap = "N" + saveSwap.substr(1, 2); + text = doBidiReorder(text, "VLTR","LRTL", bdx.swap); + } + return text; + } + +}; + +function shape(/*boolean*/rtl, /*String*/text, /*boolean*/compress){ + // summary: + // Shape the source text. + // rtl: + // Flag indicating if the text is in RTL direction (logical + // direction for Arabic words). + // text: + // The text to shape. + // compress: + // A flag indicates to insert extra space after the lam alef compression + // to preserve the buffer size or not insert an extra space which will lead + // to decrease the buffer size. this option can be: + // - true (default) to not insert extra space after compressing Lam+Alef into one character Lamalef + // - false to insert an extra space after compressed Lamalef to preserve the buffer size + // returns: + // text shaped. + // tags: + // private. + + if(text.length == 0){ + return; + } + if(rtl == undefined){ + rtl = true; + } + if(compress == undefined){ + compress = true; + } + text = new String(text); + + var str06 = text.split("") + , Ix = 0 + , step = +1 + , nIEnd = str06.length + ; + if(!rtl){ + Ix = str06.length - 1; + step = -1; + nIEnd = 1; + } + var previousCursive = 0, compressArray = [], compressArrayIndx = 0; + for(var index = Ix; index * step < nIEnd; index = index + step){ + if(isArabicAlefbet(str06[index]) || isArabicDiacritics(str06[index])){ + // Arabic letter Lam + if(str06[index] == '\u0644'){ + if(isNextAlef(str06, (index + step), step, nIEnd)){ + str06[index] = (previousCursive == 0) ? getLamAlefFE(str06[index + step], LamAlefInialTableFE) : getLamAlefFE(str06[index + step], LamAlefMedialTableFE); + index += step; + setAlefToSpace(str06, index, step, nIEnd); + if(compress){ + compressArray[compressArrayIndx] = index; + compressArrayIndx++; + } + previousCursive = 0; + continue; + } + } + var currentChr = str06[index]; + if(previousCursive == 1){ + // if next is Arabic + //Character is in medial form + // else character is in final form + str06[index] = (isNextArabic(str06, (index + step), step, nIEnd)) ? + getMedialFormCharacterFE(str06[index]) : getFormCharacterFE(str06[index], FinalForm); + }else{ + if(isNextArabic(str06, (index + step), step, nIEnd) == true){ + //character is in Initial form + str06[index] = getFormCharacterFE(str06[index],InitialForm); + }else{ + str06[index] = getFormCharacterFE(str06[index], IsolatedForm); + } + } + //exam if the current character is cursive + if(!isArabicDiacritics(currentChr)){ + previousCursive = 1; + } + if(isStandAlonCharacter(currentChr) == true){ + previousCursive = 0; + } + }else{ + previousCursive = 0; + } + } + var outBuf = ""; + for(idx = 0; idx < str06.length; idx++){ + if(!(compress && indexOf(compressArray, compressArray.length, idx) > -1)){ + outBuf += str06[idx]; + } + } + return outBuf; +}; +function firstStrongDir(/*String*/text){ + // summary: + // Return the first strong character direction + // text: + // The source string. + // description: + // Iterates over the text string, letter by letter starting from its beginning, + // searching for first "strong" character. + // Returns if strong character was found with the direction defined by this + // character, if no strong character was found returns an empty string. + // returns: /*String*/ + // "ltr" - if the first strong character is Latin. + // "rtl" - if the first strong character is RTL directed character. + // "" - if the strong character wasn't found. + // tags: + // private + + var type = null, uc = null, hi = null; + for(var i = 0; i < text.length; i++){ + uc = text.charAt(i).charCodeAt(0); + hi = MasterTable[uc >> 8]; + type = hi < TBBASE ? hi : UnicodeTable[hi - TBBASE][uc & 0xFF]; + if(type == UBAT_R || type == UBAT_AL){ + return "rtl"; + } + if(type == UBAT_L){ + return "ltr"; + } + if(type == UBAT_B){ + break; + } + } + return ""; +}; +function lastStrongDir(text){ + // summary: + // Return the last strong character direction + // text: + // The source string. + // description: + // Iterates over the text string, letter by letter starting from its end, + // searching for first (from the end) "strong" character. + // Returns if strong character was found with the direction defined by this + // character, if no strong character was found returns an empty string. + // tags: + // private + var type = null; + for(var i = text.length - 1; i >= 0; i--){ + type = getCharacterType(text.charAt(i)); + if(type == UBAT_R || type == UBAT_AL){ + return "rtl"; + } + if(type == UBAT_L){ + return "ltr"; + } + if(type == UBAT_B){ + break; + } + } + return ""; +}; +function deshape(/*String*/text, /*boolean*/rtl, /*boolean*/consume_next_space){ + // summary: + // deshape the source text. + // text: + // the text to be deshape. + // rtl: + // flag indicating if the text is in RTL direction (logical + // direction for Arabic words). + // consume_next_space: + // flag indicating whether to consume the space next to the + // the lam alef if there is a space followed the Lamalef character to preserve the buffer size. + // In case there is no space next to the lam alef the buffer size will be increased due to the + // expansion of the lam alef one character into lam+alef two characters + // returns: text deshaped. + if(text.length == 0){ + return; + } + if(consume_next_space == undefined){ + consume_next_space = true; + } + if(rtl == undefined){ + rtl = true; + } + text = new String(text); + + var outBuf = "", strFE = [], textBuff = ""; + if(consume_next_space){ + for(var j = 0; j < text.length; j++){ + if(text.charAt(j) == ' '){ + if(rtl){ + if(j > 0){ + if(text.charAt(j - 1) >= '\uFEF5' && text.charAt(j - 1) <= '\uFEFC'){ + continue; + } + } + }else{ + if(j+1 < text.length){ + if(text.charAt(j + 1) >= '\uFEF5' && text.charAt(j + 1) <= '\uFEFC'){ + continue; + } + } + } + } + textBuff += text.charAt(j); + } + }else{ + textBuff = new String(text); + } + strFE = textBuff.split(""); + for(var i = 0; i < textBuff.length; i++){ + if(strFE[i] >= '\uFE70' && strFE[i] < '\uFEFF'){ + var chNum = textBuff.charCodeAt(i); + if(strFE[i] >= '\uFEF5' && strFE[i] <= '\uFEFC'){ + //expand the LamAlef + if(rtl){ + //Lam + Alef + outBuf += '\u0644'; + outBuf += AlefTable[parseInt((chNum - 65269) / 2)]; + }else{ + outBuf += AlefTable[parseInt((chNum - 65269) / 2)]; + outBuf += '\u0644'; + } + }else{ + outBuf += FETo06Table[chNum - 65136]; + } + }else{ + outBuf += strFE[i]; + } + } + return outBuf; +}; +function doReorder(str){ + // summary: + // Helper to the doBidiReorder. Manages the UBA. + // str: + // the string to reorder. + // returns: + // text reordered according to source and result attributes. + // tags: + // private + var chars = str.split(""), levels = []; + + computeLevels(chars, levels); + swapChars(chars, levels); + invertLevel(2, chars, levels); + invertLevel(1, chars, levels); + return chars.join(""); +}; +function computeLevels(chars, levels){ + var len = chars.length + , impTab = bdx.dir ? impTab_RTL : impTab_LTR + , prevState = null, newClass = null, newLevel = null, newState = 0 + , action = null, cond = null, condPos = -1, i = null, ix = null + , types = [] + , classes = [] + ; + bdx.hiLevel = bdx.dir; + bdx.lastArabic = false; + bdx.hasUBAT_AL = false, + bdx.hasUBAT_B = false; + bdx.hasUBAT_S = false; + for(i = 0; i < len; i++){ + types[i] = getCharacterType(chars[i]); + } + for(ix = 0; ix < len; ix++){ + prevState = newState; + classes[ix] = newClass = getCharClass(chars, types, classes, ix); + newState = impTab[prevState][newClass]; + action = newState & 0xF0; + newState &= 0x0F; + levels[ix] = newLevel = impTab[newState][ITIL]; + if(action > 0){ + if(action == 0x10){ // set conditional run to level 1 + for(i = condPos; i < ix; i++){ + levels[i] = 1; + } + condPos = -1; + }else{ // 0x20 confirm the conditional run + condPos = -1; + } + } + cond = impTab[newState][ITCOND]; + if(cond){ + if(condPos == -1){ + condPos = ix; + } + }else{ // unconditional level + if(condPos > -1){ + for(i = condPos; i < ix; i++){ + levels[i] = newLevel; + } + condPos = -1; + } + } + if(types[ix] == UBAT_B){ + levels[ix] = 0; + } + bdx.hiLevel |= newLevel; + } + if(bdx.hasUBAT_S){ + for(i = 0; i < len; i++){ + if(types[i] == UBAT_S){ + levels[i] = bdx.dir; + for(var j = i - 1; j >= 0; j--){ + if(types[j] == UBAT_WS){ + levels[j] = bdx.dir; + }else{ + break; + } + } + } + } + } +}; +function swapChars(chars, levels){ + // summary: + // Swap characters with symmetrical mirroring as all kinds of parenthesis. + // (When needed). + // chars: + // The source string as Array of characters. + // levels: + // An array (like hash) of flags for each character in the source string, + // that defines if swapping should be applied on the following character. + // description: + // First checks if the swapping should be applied, if not returns, else + // uses the levels "hash" to find what characters should be swapped. + // tags: + // private + + if(bdx.hiLevel == 0 || bdx.swap.substr(0, 1) == bdx.swap.substr(1, 2)){ + return; + }; + + //console.log("bdx.hiLevel == 0: " + bdx.hiLevel + "bdx.swap[0]: "+ bdx.swap[0] +" bdx.swap[1]: " +bdx.swap[1]); + for(var i = 0; i < chars.length; i++){ + if(levels[i] == 1){chars[i] = getMirror(chars[i]);} + } +}; +function getCharacterType(ch){ + // summary: + // Return the type of the character. + // ch: + // The character to be checked. + + // description: + // Check the type of the character according to MasterTable, + // type = LTR, RTL, neutral,Arabic-Indic digit etc. + // tags: + // private + var uc = ch.charCodeAt(0) + , hi = MasterTable[uc >> 8]; + return (hi < TBBASE) ? hi : UnicodeTable[hi - TBBASE][uc & 0xFF]; +}; +function invertStr(str){ + // summary: + // Return the reversed string. + // str: + // The string to be reversed. + // description: + // Reverse the string str. + // tags: + // private + var chars = str.split(""); + chars.reverse(); + return chars.join(""); +}; +function indexOf(cArray, cLength, idx){ + var counter = -1; + for(var i = 0; i < cLength; i++){ + if(cArray[i] == idx){ + return i; + } + } + return -1; +}; +function isArabicAlefbet(c){ + for(var i = 0; i < ArabicAlefBetIntervalsBegine.length; i++){ + if(c >= ArabicAlefBetIntervalsBegine[i] && c <= ArabicAlefBetIntervalsEnd[i]){ + return true; + } + } + return false; +}; +function isNextArabic(str06, index, step, nIEnd){ + while(((index) * step) < nIEnd && isArabicDiacritics(str06[index])){ + index += step; + } + if(((index) * step) < nIEnd && isArabicAlefbet(str06[index])){ + return true; + } + return false; +}; +function isNextAlef(str06, index, step, nIEnd){ + while(((index) * step) < nIEnd && isArabicDiacritics(str06[index])){ + index += step; + } + var c = ' '; + if(((index) * step) < nIEnd){ + c = str06[index]; + }else{ + return false; + } + for(var i = 0; i < AlefTable.length; i++){ + if(AlefTable[i] == c){ + return true; + } + } + return false; +}; +function invertLevel(lev, chars, levels){ + if(bdx.hiLevel < lev){ + return; + } + if(lev == 1 && bdx.dir == RTL && !bdx.hasUBAT_B){ + chars.reverse(); + return; + } + var len = chars.length, start = 0, end, lo, hi, tmp; + while(start < len){ + if(levels[start] >= lev){ + end = start + 1; + while(end < len && levels[end] >= lev){ + end++; + } + for(lo = start, hi = end - 1 ; lo < hi; lo++, hi--){ + tmp = chars[lo]; + chars[lo] = chars[hi]; + chars[hi] = tmp; + } + start = end; + } + start++; + } +}; +function getCharClass(chars, types, classes, ix){ + // summary: + // Return the class if ix character in chars. + // chars: + // The source string as Array of characters. + // types: + // Array of types, for each character in chars. + // classes: + // Array of classes that already been solved. + // ix: + // the index of checked character. + // tags: + // private + var cType = types[ix], wType, nType, len, i; + switch(cType){ + case UBAT_L: + case UBAT_R: + bdx.lastArabic = false; + case UBAT_ON: + case UBAT_AN: + return cType; + case UBAT_EN: + return bdx.lastArabic ? UBAT_AN : UBAT_EN; + case UBAT_AL: + bdx.lastArabic = true; + bdx.hasUBAT_AL = true; + return UBAT_R; + case UBAT_WS: + return UBAT_ON; + case UBAT_CS: + if(ix < 1 || (ix + 1) >= types.length || + ((wType = classes[ix - 1]) != UBAT_EN && wType != UBAT_AN) || + ((nType = types[ix + 1]) != UBAT_EN && nType != UBAT_AN)){ + return UBAT_ON; + } + if(bdx.lastArabic){nType = UBAT_AN;} + return nType == wType ? nType : UBAT_ON; + case UBAT_ES: + wType = ix > 0 ? classes[ix - 1] : UBAT_B; + if(wType == UBAT_EN && (ix + 1) < types.length && types[ix + 1] == UBAT_EN){ + return UBAT_EN; + } + return UBAT_ON; + case UBAT_ET: + if(ix > 0 && classes[ix - 1] == UBAT_EN){ + return UBAT_EN; + } + if(bdx.lastArabic){ + return UBAT_ON; + } + i = ix + 1; + len = types.length; + while(i < len && types[i] == UBAT_ET){ + i++; + } + if(i < len && types[i] == UBAT_EN){ + return UBAT_EN; + } + return UBAT_ON; + case UBAT_NSM: + if(bdx.inFormat == "VLTR"){ // visual to implicit transformation + len = types.length; + i = ix + 1; + while(i < len && types[i] == UBAT_NSM){ + i++; + } + if(i < len){ + var c = chars[ix] + , rtlCandidate = (c >= 0x0591 && c <= 0x08FF) || c == 0xFB1E + ; + wType = types[i]; + if(rtlCandidate && (wType == UBAT_R || wType == UBAT_AL)){ + return UBAT_R; + } + } + } + if(ix < 1 || (wType = types[ix - 1]) == UBAT_B){ + return UBAT_ON; + } + return classes[ix - 1]; + case UBAT_B: + lastArabic = false; + bdx.hasUBAT_B = true; + return bdx.dir; + case UBAT_S: + bdx.hasUBAT_S = true; + return UBAT_ON; + case UBAT_LRE: + case UBAT_RLE: + case UBAT_LRO: + case UBAT_RLO: + case UBAT_PDF: + lastArabic = false; + case UBAT_BN: + return UBAT_ON; + } +}; +function getMirror(c){ + // summary: + // Calculates the mirrored character of c + // c: + // The character to be mirrored. + // tags: + // private + var mid, low = 0, high = SwapTable.length - 1; + + while(low <= high){ + mid = Math.floor((low + high) / 2); + if(c < SwapTable[mid][0]){ + high = mid - 1; + }else if(c > SwapTable[mid][0]){ + low = mid + 1; + }else{ + return SwapTable[mid][1]; + } + } + return c; +}; +function isStandAlonCharacter(c){ + for(var i = 0; i < StandAlonForm.length; i++){ + if(StandAlonForm[i] == c){ + return true; + } + } + return false; +}; +function getMedialFormCharacterFE(c){ + for(var i = 0; i < BaseForm.length; i++){ + if(c == BaseForm[i]){ + return MedialForm[i]; + } + } + return c; +}; +function getFormCharacterFE(/*char*/ c, /*char[]*/formArr){ + for(var i = 0; i < BaseForm.length; i++){ + if(c == BaseForm[i]){ + return formArr[i]; + } + } + return c; +}; +function isArabicDiacritics(c){ + return (c >= '\u064b' && c <= '\u0655') ? true : false; +}; +function getOrientation(/*Char*/ oc){ + if(oc == 'L'){ + return "LTR"; + } + if(oc == 'R'){ + return "RTL"; + } + if(oc == 'C'){ + return "CLR"; + } + if(oc == 'D'){ + return "CRL"; + } +}; +function setAlefToSpace(str06, index, step, nIEnd){ + while(((index) * step) < nIEnd && isArabicDiacritics(str06[index])){ + index += step; + } + if(((index) * step) < nIEnd){ + str06[index] = ' '; + return true; + } + return false; +}; +function getLamAlefFE(alef06, LamAlefForm){ + for(var i = 0; i < AlefTable.length; i++){ + if(alef06 == AlefTable[i]){ + return LamAlefForm[i]; + } + } + return alef06; +}; +function LamAlef(alef){ + // summary: + // If the alef variable is an ARABIC ALEF letter, + // return the LamAlef code associated with the specific + // alef character. + // alef: + // The alef code type. + // description: + // If "alef" is an ARABIC ALEF letter, identify which alef is it, + // using AlefTable, then return the LamAlef associated with it. + // tags: + // private + for(var i = 0; i < AlefTable.length; i++){ + if(AlefTable[i] == alef){ + return AlefTable[i]; + } + } + return 0; +}; + +var bdx = { + dir: 0, + defInFormat: "LLTR", + defoutFormat: "VLTR", + defSwap: "YN", + inFormat: "LLTR", + outFormat: "VLTR", + swap: "YN", + hiLevel: 0, + lastArabic: false, + hasUBAT_AL: false, + hasBlockSep: false, + hasSegSep: false +}; + +var ITIL = 5; + +var ITCOND = 6; + +var LTR = 0; + +var RTL = 1; + +/****************************************************************************/ +/* Array in which directional characters are replaced by their symmetric. */ +/****************************************************************************/ +var SwapTable = [ + [ "\u0028", "\u0029" ], /* Round brackets */ + [ "\u0029", "\u0028" ], + [ "\u003C", "\u003E" ], /* Less than/greater than */ + [ "\u003E", "\u003C" ], + [ "\u005B", "\u005D" ], /* Square brackets */ + [ "\u005D", "\u005B" ], + [ "\u007B", "\u007D" ], /* Curly brackets */ + [ "\u007D", "\u007B" ], + [ "\u00AB", "\u00BB" ], /* Double angle quotation marks */ + [ "\u00BB", "\u00AB" ], + [ "\u2039", "\u203A" ], /* single angle quotation mark */ + [ "\u203A", "\u2039" ], + [ "\u207D", "\u207E" ], /* Superscript parentheses */ + [ "\u207E", "\u207D" ], + [ "\u208D", "\u208E" ], /* Subscript parentheses */ + [ "\u208E", "\u208D" ], + [ "\u2264", "\u2265" ], /* Less/greater than or equal */ + [ "\u2265", "\u2264" ], + [ "\u2329", "\u232A" ], /* Angle brackets */ + [ "\u232A", "\u2329" ], + [ "\uFE59", "\uFE5A" ], /* Small round brackets */ + [ "\uFE5A", "\uFE59" ], + [ "\uFE5B", "\uFE5C" ], /* Small curly brackets */ + [ "\uFE5C", "\uFE5B" ], + [ "\uFE5D", "\uFE5E" ], /* Small tortoise shell brackets */ + [ "\uFE5E", "\uFE5D" ], + [ "\uFE64", "\uFE65" ], /* Small less than/greater than */ + [ "\uFE65", "\uFE64" ] +]; +var AlefTable = ['\u0622', '\u0623', '\u0625', '\u0627']; + +var AlefTableFE = [0xFE81, 0xFE82, 0xFE83, 0xFE84, 0xFE87, 0xFE88, 0xFE8D, 0xFE8E]; + +var LamTableFE = [0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0]; + +var LamAlefInialTableFE = ['\ufef5', '\ufef7', '\ufef9', '\ufefb']; + +var LamAlefMedialTableFE = ['\ufef6', '\ufef8', '\ufefa', '\ufefc']; +/** + * Arabic Characters in the base form + */ +var BaseForm = ['\u0627', '\u0628', '\u062A', '\u062B', '\u062C', '\u062D', '\u062E', '\u062F', '\u0630', '\u0631', '\u0632', '\u0633', '\u0634', '\u0635', '\u0636', '\u0637', '\u0638', '\u0639', '\u063A', '\u0641', '\u0642', '\u0643', '\u0644', '\u0645', '\u0646', '\u0647', '\u0648', '\u064A', '\u0625', '\u0623', '\u0622', '\u0629', '\u0649', '\u06CC', '\u0626', '\u0624', '\u064B', '\u064C', '\u064D', '\u064E', '\u064F', '\u0650', '\u0651', '\u0652', '\u0621']; + +/** + * Arabic shaped characters in Isolated form + */ +var IsolatedForm = ['\uFE8D', '\uFE8F', '\uFE95', '\uFE99', '\uFE9D', '\uFEA1', '\uFEA5', '\uFEA9', '\uFEAB', '\uFEAD', '\uFEAF', '\uFEB1', '\uFEB5', '\uFEB9', '\uFEBD', '\uFEC1', '\uFEC5', '\uFEC9', '\uFECD', '\uFED1', '\uFED5', '\uFED9', '\uFEDD', '\uFEE1', '\uFEE5', '\uFEE9', '\uFEED', '\uFEF1', '\uFE87', '\uFE83', '\uFE81', '\uFE93', '\uFEEF', '\uFBFC', '\uFE89', '\uFE85', '\uFE70', '\uFE72', '\uFE74', '\uFE76', '\uFE78', '\uFE7A', '\uFE7C', '\uFE7E', '\uFE80']; + +/** + * Arabic shaped characters in Final form + */ +var FinalForm = ['\uFE8E', '\uFE90', '\uFE96', '\uFE9A', '\uFE9E', '\uFEA2', '\uFEA6', '\uFEAA', '\uFEAC', '\uFEAE', '\uFEB0', '\uFEB2', '\uFEB6', '\uFEBA', '\uFEBE', '\uFEC2', '\uFEC6', '\uFECA', '\uFECE', '\uFED2', '\uFED6', '\uFEDA', '\uFEDE', '\uFEE2', '\uFEE6', '\uFEEA', '\uFEEE', '\uFEF2', '\uFE88', '\uFE84', '\uFE82', '\uFE94', '\uFEF0', '\uFBFD', '\uFE8A', '\uFE86', '\uFE70', '\uFE72', '\uFE74', '\uFE76', '\uFE78', '\uFE7A', '\uFE7C', '\uFE7E', '\uFE80']; + +/** + * Arabic shaped characters in Media form + */ +var MedialForm = ['\uFE8E', '\uFE92', '\uFE98', '\uFE9C', '\uFEA0', '\uFEA4', '\uFEA8', '\uFEAA', '\uFEAC', '\uFEAE', '\uFEB0', '\uFEB4', '\uFEB8', '\uFEBC', '\uFEC0', '\uFEC4', '\uFEC8', '\uFECC', '\uFED0', '\uFED4', '\uFED8', '\uFEDC', '\uFEE0', '\uFEE4', '\uFEE8', '\uFEEC', '\uFEEE', '\uFEF4', '\uFE88', '\uFE84', '\uFE82', '\uFE94', '\uFEF0', '\uFBFF', '\uFE8C', '\uFE86', '\uFE71', '\uFE72', '\uFE74', '\uFE77', '\uFE79', '\uFE7B', '\uFE7D', '\uFE7F', '\uFE80']; + +/** + * Arabic shaped characters in Initial form + */ +var InitialForm = ['\uFE8D', '\uFE91', '\uFE97', '\uFE9B', '\uFE9F', '\uFEA3', '\uFEA7', '\uFEA9', '\uFEAB', '\uFEAD', '\uFEAF', '\uFEB3', '\uFEB7', '\uFEBB', '\uFEBF', '\uFEC3', '\uFEC7', '\uFECB', '\uFECF', '\uFED3', '\uFED7', '\uFEDB', '\uFEDF', '\uFEE3', '\uFEE7', '\uFEEB', '\uFEED', '\uFEF3', '\uFE87', '\uFE83', '\uFE81', '\uFE93', '\uFEEF', '\uFBFE', '\uFE8B', '\uFE85', '\uFE70', '\uFE72', '\uFE74', '\uFE76', '\uFE78', '\uFE7A', '\uFE7C', '\uFE7E', '\uFE80']; + +/** + * Arabic characters that couldn't join to the next character + */ +var StandAlonForm = ['\u0621', '\u0627', '\u062F', '\u0630', '\u0631', '\u0632', '\u0648', '\u0622', '\u0629', '\u0626', '\u0624', '\u0625', '\u0675', '\u0623']; + +var FETo06Table = ['\u064B', '\u064B', '\u064C', '\u061F', '\u064D', '\u061F', '\u064E', '\u064E', '\u064F', '\u064F', '\u0650', '\u0650', '\u0651', '\u0651', '\u0652', '\u0652', '\u0621', '\u0622', '\u0622', '\u0623', '\u0623', '\u0624', '\u0624', '\u0625', '\u0625', '\u0626', '\u0626', '\u0626', '\u0626', '\u0627', '\u0627', '\u0628', '\u0628', '\u0628', '\u0628', '\u0629', '\u0629', '\u062A', '\u062A', '\u062A', '\u062A', '\u062B', '\u062B', '\u062B', '\u062B', '\u062C', '\u062C', '\u062C', '\u062c', '\u062D', '\u062D', '\u062D', '\u062D', '\u062E', '\u062E', '\u062E', '\u062E', '\u062F', '\u062F', '\u0630', '\u0630', '\u0631', '\u0631', '\u0632', '\u0632', '\u0633', '\u0633', '\u0633', '\u0633', '\u0634', '\u0634', '\u0634', '\u0634', '\u0635', '\u0635', '\u0635', '\u0635', '\u0636', '\u0636', '\u0636', '\u0636', '\u0637', '\u0637', '\u0637', '\u0637', '\u0638', '\u0638', '\u0638', '\u0638', '\u0639', '\u0639', '\u0639', '\u0639', '\u063A', '\u063A', '\u063A', '\u063A', '\u0641', '\u0641', '\u0641', '\u0641', '\u0642', '\u0642', '\u0642', '\u0642', '\u0643', '\u0643', '\u0643', '\u0643', '\u0644', '\u0644', '\u0644', '\u0644', '\u0645', '\u0645', '\u0645', '\u0645', '\u0646', '\u0646', '\u0646', '\u0646', '\u0647', '\u0647', '\u0647', '\u0647', '\u0648', '\u0648', '\u0649', '\u0649', '\u064A', '\u064A', '\u064A', '\u064A', '\uFEF5', '\uFEF6', '\uFEF7', '\uFEF8', '\uFEF9', '\uFEFA', '\uFEFB', '\uFEFC', '\u061F', '\u061F', '\u061F']; + +var ArabicAlefBetIntervalsBegine = ['\u0621', '\u0641']; + +var ArabicAlefBetIntervalsEnd = ['\u063A', '\u064a']; + +var Link06 = [ + 1 + 32 + 256 * 0x11, + 1 + 32 + 256 * 0x13, + 1 + 256 * 0x15, + 1 + 32 + 256 * 0x17, + 1 + 2 + 256 * 0x19, + 1 + 32 + 256 * 0x1D, + 1 + 2 + 256 * 0x1F, + 1 + 256 * 0x23, + 1 + 2 + 256 * 0x25, + 1 + 2 + 256 * 0x29, + 1 + 2 + 256 * 0x2D, + 1 + 2 + 256 * 0x31, + 1 + 2 + 256 * 0x35, + 1 + 256 * 0x39, + 1 + 256 * 0x3B, + 1 + 256 * 0x3D, + 1 + 256 * 0x3F, + 1 + 2 + 256 * 0x41, + 1 + 2 + 256 * 0x45, + 1 + 2 + 256 * 0x49, + 1 + 2 + 256 * 0x4D, + 1 + 2 + 256 * 0x51, + 1 + 2 + 256 * 0x55, + 1 + 2 + 256 * 0x59, + 1 + 2 + 256 * 0x5D, + 0, 0, 0, 0, 0, /* 0x63B - 0x63F */ + 1 + 2, + 1 + 2 + 256 * 0x61, + 1 + 2 + 256 * 0x65, + 1 + 2 + 256 * 0x69, + 1 + 2 + 16 + 256 * 0x6D, + 1 + 2 + 256 * 0x71, + 1 + 2 + 256 * 0x75, + 1 + 2 + 256 * 0x79, + 1 + 256 * 0x7D, + 1 + 256 * 0x7F, + 1 + 2 + 256 * 0x81, + 4, 4, 4, 4, + 4, 4, 4, 4, /* 0x64B - 0x652 */ + 0, 0, 0, 0, 0, + 0, 0, 0, 0, /* 0x653 - 0x65B */ + 1 + 256 * 0x85, + 1 + 256 * 0x87, + 1 + 256 * 0x89, + 1 + 256 * 0x8B, + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,/* 0x660 - 0x66F */ + 4, + 0, + 1 + 32, + 1 + 32, + 0, + 1 + 32, + 1, 1, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1+2, 1+2, 1+2, 1+2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, + 1, + 1+2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1+2, + 1, + 1+2, 1+2, 1+2, 1+2, + 1, 1 +]; + +var LinkFE = [ + 1 + 2, + 1 + 2, + 1 + 2, 0, 1+ 2, 0, 1+ 2, + 1 + 2, + 1+ 2, 1 + 2, 1+2, 1 + 2, + 1+ 2, 1 + 2, 1+2, 1 + 2, + 0, 0 + 32, 1 + 32, 0 + 32, + 1 + 32, 0, 1, 0 + 32, + 1 + 32, 0, 2, 1 + 2, + 1, 0 + 32, 1 + 32, 0, + 2, 1 + 2, 1, 0, + 1, 0, 2, 1 + 2, + 1, 0, 2, 1 + 2, + 1, 0, 2, 1 + 2, + 1, 0, 2, 1 + 2, + 1, 0, 2, 1 + 2, + 1, 0, 1, 0, + 1, 0, 1, 0, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0 + 16, 2 + 16, 1 + 2 +16, + 1 + 16, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 2, 1+2, + 1, 0, 1, 0, + 1, 0, 2, 1+2, + 1, 0, 1, 0, + 1, 0, 1, 0, + 1 +]; +var impTab_LTR = [ + /* L, R, EN, AN, N, IL, Cond */ + /* 0 LTR text */ [ 0, 3, 0, 1, 0, 0, 0 ], + /* 1 LTR+AN */ [ 0, 3, 0, 1, 2, 2, 0 ], + /* 2 LTR+AN+N */ [ 0, 3, 0, 0x11, 2, 0, 1 ], + /* 3 RTL text */ [ 0, 3, 5, 5, 4, 1, 0 ], + /* 4 RTL cont */ [ 0, 3, 0x15, 0x15, 4, 0, 1 ], + /* 5 RTL+EN/AN */ [ 0, 3, 5, 5, 4, 2, 0 ] +]; +var impTab_RTL = [ + /* L, R, EN, AN, N, IL, Cond */ + /* 0 RTL text */ [ 2, 0, 1, 1, 0, 1, 0 ], + /* 1 RTL+EN/AN */ [ 2, 0, 1, 1, 0, 2, 0 ], + /* 2 LTR text */ [ 2, 0, 2, 1, 3, 2, 0 ], + /* 3 LTR+cont */ [ 2, 0, 2, 0x21, 3, 1, 1 ] +]; + +var UBAT_L = 0; /* left to right */ +var UBAT_R = 1; /* right to left */ +var UBAT_EN = 2; /* European digit */ +var UBAT_AN = 3; /* Arabic-Indic digit */ +var UBAT_ON = 4; /* neutral */ +var UBAT_B = 5; /* block separator */ +var UBAT_S = 6; /* segment separator */ +var UBAT_AL = 7; /* Arabic Letter */ +var UBAT_WS = 8; /* white space */ +var UBAT_CS = 9; /* common digit separator */ +var UBAT_ES = 10; /* European digit separator */ +var UBAT_ET = 11; /* European digit terminator */ +var UBAT_NSM = 12; /* Non Spacing Mark */ +var UBAT_LRE = 13; /* LRE */ +var UBAT_RLE = 14; /* RLE */ +var UBAT_PDF = 15; /* PDF */ +var UBAT_LRO = 16; /* LRO */ +var UBAT_RLO = 17; /* RLO */ +var UBAT_BN = 18; /* Boundary Neutral */ + +var TBBASE = 100; + +var TB00 = TBBASE + 0; +var TB05 = TBBASE + 1; +var TB06 = TBBASE + 2; +var TB07 = TBBASE + 3; +var TB20 = TBBASE + 4; +var TBFB = TBBASE + 5; +var TBFE = TBBASE + 6; +var TBFF = TBBASE + 7; + +var L = UBAT_L; +var R = UBAT_R; +var EN = UBAT_EN; +var AN = UBAT_AN; +var ON = UBAT_ON; +var B = UBAT_B; +var S = UBAT_S; +var AL = UBAT_AL; +var WS = UBAT_WS; +var CS = UBAT_CS; +var ES = UBAT_ES; +var ET = UBAT_ET; +var NSM = UBAT_NSM; +var LRE = UBAT_LRE; +var RLE = UBAT_RLE; +var PDF = UBAT_PDF; +var LRO = UBAT_LRO; +var RLO = UBAT_RLO; +var BN = UBAT_BN; + +var MasterTable = [ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ TB00, L , L , L , L , TB05, TB06, TB07, R , L , L , L , L , L , L , L , + /*1-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*2-*/ TB20, ON , ON , ON , L , ON , L , ON , L , ON , ON , ON , L , L , ON , ON , + /*3-*/ L , L , L , L , L , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*4-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , L , L , ON , + /*5-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*6-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*7-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*8-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*9-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , L , + /*A-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , + /*B-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*C-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*D-*/ ON , ON , ON , ON , ON , ON , ON , L , L , ON , ON , L , L , ON , ON , L , + /*E-*/ L , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*F-*/ ON , ON , ON , ON , ON , ON , ON , ON , L , L , L , TBFB, AL , AL , TBFE, TBFF +]; + +delete TB00; +delete TB05; +delete TB06; +delete TB07; +delete TB20; +delete TBFB; +delete TBFE; +delete TBFF; + +var UnicodeTable = [ + [ /* Table 00: Unicode 00xx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ BN , BN , BN , BN , BN , BN , BN , BN , BN , S , B , S , WS , B , BN , BN , + /*1-*/ BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , B , B , B , S , + /*2-*/ WS , ON , ON , ET , ET , ET , ON , ON , ON , ON , ON , ES , CS , ES , CS , CS , + /*3-*/ EN , EN , EN , EN , EN , EN , EN , EN , EN , EN , CS , ON , ON , ON , ON , ON , + /*4-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*5-*/ L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , ON , ON , + /*6-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*7-*/ L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , ON , BN , + /*8-*/ BN , BN , BN , BN , BN , B , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , + /*9-*/ BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , BN , + /*A-*/ CS , ON , ET , ET , ET , ET , ON , ON , ON , ON , L , ON , ON , BN , ON , ON , + /*B-*/ ET , ET , EN , EN , ON , L , ON , ON , ON , EN , L , ON , ON , ON , ON , ON , + /*C-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*D-*/ L , L , L , L , L , L , L , ON , L , L , L , L , L , L , L , L , + /*E-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*F-*/ L , L , L , L , L , L , L , ON , L , L , L , L , L , L , L , L + ], + [ /* Table 01: Unicode 05xx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*1-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*2-*/ L , L , L , L , L , L , L , L , ON , ON , ON , ON , ON , ON , ON , ON , + /*3-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*4-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*5-*/ L , L , L , L , L , L , L , ON , ON , L , L , L , L , L , L , L , + /*6-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*7-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*8-*/ L , L , L , L , L , L , L , L , ON , L , ON , ON , ON , ON , ON , ON , + /*9-*/ ON , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*A-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*B-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , R , NSM , + /*C-*/ R , NSM , NSM , R , NSM , NSM , R , NSM , ON , ON , ON , ON , ON , ON , ON , ON , + /*D-*/ R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , + /*E-*/ R , R , R , R , R , R , R , R , R , R , R , ON , ON , ON , ON , ON , + /*F-*/ R , R , R , R , R , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON + ], + [ /* Table 02: Unicode 06xx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ AN , AN , AN , AN , ON , ON , ON , ON , AL , ET , ET , AL , CS , AL , ON , ON , + /*1-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , AL , ON , ON , AL , AL , + /*2-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*3-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*4-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , NSM , NSM , NSM , NSM , NSM , + /*5-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*6-*/ AN , AN , AN , AN , AN , AN , AN , AN , AN , AN , ET , AN , AN , AL , AL , AL , + /*7-*/ NSM , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*8-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*9-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*A-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*B-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*C-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*D-*/ AL , AL , AL , AL , AL , AL , NSM , NSM , NSM , NSM , NSM , NSM , NSM , AN , ON , NSM , + /*E-*/ NSM , NSM , NSM , NSM , NSM , AL , AL , NSM , NSM , ON , NSM , NSM , NSM , NSM , AL , AL , + /*F-*/ EN , EN , EN , EN , EN , EN , EN , EN , EN , EN , AL , AL , AL , AL , AL , AL + ], + [ /* Table 03: Unicode 07xx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , ON , AL , + /*1-*/ AL , NSM , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*2-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*3-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*4-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , ON , ON , AL , AL , AL , + /*5-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*6-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*7-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*8-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*9-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*A-*/ AL , AL , AL , AL , AL , AL , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*B-*/ NSM , AL , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*C-*/ R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , + /*D-*/ R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , R , + /*E-*/ R , R , R , R , R , R , R , R , R , R , R , NSM , NSM , NSM , NSM , NSM , + /*F-*/ NSM , NSM , NSM , NSM , R , R , ON , ON , ON , ON , R , ON , ON , ON , ON , ON + ], + [ /* Table 04: Unicode 20xx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ WS , WS , WS , WS , WS , WS , WS , WS , WS , WS , WS , BN , BN , BN , L , R , + /*1-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*2-*/ ON , ON , ON , ON , ON , ON , ON , ON , WS , B , LRE , RLE , PDF , LRO , RLO , CS , + /*3-*/ ET , ET , ET , ET , ET , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*4-*/ ON , ON , ON , ON , CS , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*5-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , WS , + /*6-*/ BN , BN , BN , BN , BN , ON , ON , ON , ON , ON , BN , BN , BN , BN , BN , BN , + /*7-*/ EN , L , ON , ON , EN , EN , EN , EN , EN , EN , ES , ES , ON , ON , ON , L , + /*8-*/ EN , EN , EN , EN , EN , EN , EN , EN , EN , EN , ES , ES , ON , ON , ON , ON , + /*9-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , + /*A-*/ ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , + /*B-*/ ET , ET , ET , ET , ET , ET , ET , ET , ET , ET , ON , ON , ON , ON , ON , ON , + /*C-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*D-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*E-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*F-*/ NSM , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON + ], + [ /* Table 05: Unicode FBxx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ L , L , L , L , L , L , L , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*1-*/ ON , ON , ON , L , L , L , L , L , ON , ON , ON , ON , ON , R , NSM , R , + /*2-*/ R , R , R , R , R , R , R , R , R , ES , R , R , R , R , R , R , + /*3-*/ R , R , R , R , R , R , R , ON , R , R , R , R , R , ON , R , ON , + /*4-*/ R , R , ON , R , R , ON , R , R , R , R , R , R , R , R , R , R , + /*5-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*6-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*7-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*8-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*9-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*A-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*B-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*C-*/ AL , AL , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*D-*/ ON , ON , ON , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*E-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*F-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL + ], + [ /* Table 06: Unicode FExx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , NSM , + /*1-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*2-*/ NSM , NSM , NSM , NSM , NSM , NSM , NSM , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*3-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*4-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*5-*/ CS , ON , CS , ON , ON , CS , ON , ON , ON , ON , ON , ON , ON , ON , ON , ET , + /*6-*/ ON , ON , ES , ES , ON , ON , ON , ON , ON , ET , ET , ON , ON , ON , ON , ON , + /*7-*/ AL , AL , AL , AL , AL , ON , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*8-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*9-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*A-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*B-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*C-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*D-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*E-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , + /*F-*/ AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , AL , ON , ON , BN + ], + [ /* Table 07: Unicode FFxx */ + /************************************************************************************************************************************/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /************************************************************************************************************************************/ + /*0-*/ ON , ON , ON , ET , ET , ET , ON , ON , ON , ON , ON , ES , CS , ES , CS , CS , + /*1-*/ EN , EN , EN , EN , EN , EN , EN , EN , EN , EN , CS , ON , ON , ON , ON , ON , + /*2-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*3-*/ L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , ON , ON , + /*4-*/ ON , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*5-*/ L , L , L , L , L , L , L , L , L , L , L , ON , ON , ON , ON , ON , + /*6-*/ ON , ON , ON , ON , ON , ON , L , L , L , L , L , L , L , L , L , L , + /*7-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*8-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*9-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*A-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , + /*B-*/ L , L , L , L , L , L , L , L , L , L , L , L , L , L , L , ON , + /*C-*/ ON , ON , L , L , L , L , L , L , ON , ON , L , L , L , L , L , L , + /*D-*/ ON , ON , L , L , L , L , L , L , ON , ON , L , L , L , ON , ON , ON , + /*E-*/ ET , ET , ON , ON , ON , ET , ET , ON , ON , ON , ON , ON , ON , ON , ON , ON , + /*F-*/ ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON , ON + ] +]; + +delete L; +delete R; +delete EN; +delete AN; +delete ON; +delete B; +delete S; +delete AL; +delete WS; +delete CS; +delete ES; +delete ET; +delete NSM; +delete LRE; +delete RLE; +delete PDF; +delete LRO; +delete RLO; +delete BN; + +return dojox.string.BidiEngine; +}); diff --git a/js/dojo/dojox/string/Builder.js b/js/dojo/dojox/string/Builder.js new file mode 100644 index 0000000..7b00f68 --- /dev/null +++ b/js/dojo/dojox/string/Builder.js @@ -0,0 +1,134 @@ +//>>built +define("dojox/string/Builder", ["dojo/_base/lang"], + function(lang){ + lang.getObject("string", true, dojox).Builder = + function(/*String?*/str){ + // summary: + // A fast buffer for creating large strings. + // + // length: Number + // The current length of the internal string. + + // N.B. the public nature of the internal buffer is no longer + // needed because the IE-specific fork is no longer needed--TRT. + var b = ""; + this.length = 0; + + this.append = function(/* String... */s){ + // summary: Append all arguments to the end of the buffer + if(arguments.length>1){ + /* + This is a loop unroll was designed specifically for Firefox; + it would seem that static index access on an Arguments + object is a LOT faster than doing dynamic index access. + Therefore, we create a buffer string and take advantage + of JS's switch fallthrough. The peformance of this method + comes very close to straight up string concatenation (+=). + + If the arguments object length is greater than 9, we fall + back to standard dynamic access. + + This optimization seems to have no real effect on either + Safari or Opera, so we just use it for all. + + It turns out also that this loop unroll can increase performance + significantly with Internet Explorer, particularly when + as many arguments are provided as possible. + + Loop unroll per suggestion from Kris Zyp, implemented by + Tom Trenka. + + Note: added empty string to force a string cast if needed. + */ + var tmp="", l=arguments.length; + switch(l){ + case 9: tmp=""+arguments[8]+tmp; + case 8: tmp=""+arguments[7]+tmp; + case 7: tmp=""+arguments[6]+tmp; + case 6: tmp=""+arguments[5]+tmp; + case 5: tmp=""+arguments[4]+tmp; + case 4: tmp=""+arguments[3]+tmp; + case 3: tmp=""+arguments[2]+tmp; + case 2: { + b+=""+arguments[0]+arguments[1]+tmp; + break; + } + default: { + var i=0; + while(i<arguments.length){ + tmp += arguments[i++]; + } + b += tmp; + } + } + } else { + b += s; + } + this.length = b.length; + return this; // dojox.string.Builder + }; + + this.concat = function(/*String...*/s){ + // summary: + // Alias for append. + return this.append.apply(this, arguments); // dojox.string.Builder + }; + + this.appendArray = function(/*Array*/strings) { + // summary: + // Append an array of items to the internal buffer. + + // Changed from String.prototype.concat.apply because of IE. + return this.append.apply(this, strings); // dojox.string.Builder + }; + + this.clear = function(){ + // summary: + // Remove all characters from the buffer. + b = ""; + this.length = 0; + return this; // dojox.string.Builder + }; + + this.replace = function(/* String */oldStr, /* String */ newStr){ + // summary: + // Replace instances of one string with another in the buffer. + b = b.replace(oldStr,newStr); + this.length = b.length; + return this; // dojox.string.Builder + }; + + this.remove = function(/* Number */start, /* Number? */len){ + // summary: + // Remove len characters starting at index start. If len + // is not provided, the end of the string is assumed. + if(len===undefined){ len = b.length; } + if(len == 0){ return this; } + b = b.substr(0, start) + b.substr(start+len); + this.length = b.length; + return this; // dojox.string.Builder + }; + + this.insert = function(/* Number */index, /* String */str){ + // summary: + // Insert string str starting at index. + if(index == 0){ + b = str + b; + }else{ + b = b.slice(0, index) + str + b.slice(index); + } + this.length = b.length; + return this; // dojox.string.Builder + }; + + this.toString = function(){ + // summary: + // Return the string representation of the internal buffer. + return b; // String + }; + + // initialize the buffer. + if(str){ this.append(str); } + }; + return dojox.string.Builder; +}); diff --git a/js/dojo/dojox/string/README b/js/dojo/dojox/string/README new file mode 100644 index 0000000..c09d59e --- /dev/null +++ b/js/dojo/dojox/string/README @@ -0,0 +1,39 @@ +------------------------------------------------------------------------------- +DojoX String Utilities +------------------------------------------------------------------------------- +Version 0.9 +Release date: 05/08/2007 +------------------------------------------------------------------------------- +Project state: +dojox.string.Builder: production +dojox.string.sprintf: beta +dojox.string.tokenize: beta +------------------------------------------------------------------------------- +Project authors + Ben Lowery + Tom Trenka (ttrenka@gmail.com) + Neil Roberts +------------------------------------------------------------------------------- +Project description + +The DojoX String utilties project is a placeholder for miscellaneous string +utility functions. At the time of writing, only the Builder object has been +added; but we anticipate other string utilities may end up living here as well. +------------------------------------------------------------------------------- +Dependencies: + +Dojo Core (package loader). +------------------------------------------------------------------------------- +Documentation + +See the Dojo Toolkit API docs (http://dojotookit.org/api), dojo.string.Builder. +------------------------------------------------------------------------------- +Installation instructions + +Grab the following from the Dojo SVN Repository: +http://svn.dojotoolkit.org/var/src/dojo/dojox/trunk/string/* + +Install into the following directory structure: +/dojox/string/ + +...which should be at the same level as your Dojo checkout. diff --git a/js/dojo/dojox/string/sprintf.js b/js/dojo/dojox/string/sprintf.js new file mode 100644 index 0000000..51dc6f2 --- /dev/null +++ b/js/dojo/dojox/string/sprintf.js @@ -0,0 +1,410 @@ +//>>built +define("dojox/string/sprintf", [ + "dojo/_base/kernel", // dojo.getObject, dojo.mixin + "dojo/_base/lang", // dojo.extend + "dojo/_base/sniff", // dojo.isOpera + "./tokenize" +], function(dojo, lang, has, tokenize){ + var strLib = lang.getObject("string", true, dojox); + + strLib.sprintf = function(/*String*/ format, /*mixed...*/ filler){ + for(var args = [], i = 1; i < arguments.length; i++){ + args.push(arguments[i]); + } + var formatter = new strLib.sprintf.Formatter(format); + return formatter.format.apply(formatter, args); + }; + + strLib.sprintf.Formatter = function(/*String*/ format){ + var tokens = []; + this._mapped = false; + this._format = format; + this._tokens = tokenize(format, this._re, this._parseDelim, this); + }; + + lang.extend(strLib.sprintf.Formatter, { + _re: /\%(?:\(([\w_]+)\)|([1-9]\d*)\$)?([0 +\-\#]*)(\*|\d+)?(\.)?(\*|\d+)?[hlL]?([\%scdeEfFgGiouxX])/g, + _parseDelim: function(mapping, intmapping, flags, minWidth, period, precision, specifier){ + if(mapping){ + this._mapped = true; + } + return { + mapping: mapping, + intmapping: intmapping, + flags: flags, + _minWidth: minWidth, // May be dependent on parameters + period: period, + _precision: precision, // May be dependent on parameters + specifier: specifier + }; + }, + _specifiers: { + b: { + base: 2, + isInt: true + }, + o: { + base: 8, + isInt: true + }, + x: { + base: 16, + isInt: true + }, + X: { + extend: ["x"], + toUpper: true + }, + d: { + base: 10, + isInt: true + }, + i: { + extend: ["d"] + }, + u: { + extend: ["d"], + isUnsigned: true + }, + c: { + setArg: function(token){ + if(!isNaN(token.arg)){ + var num = parseInt(token.arg); + if(num < 0 || num > 127){ + throw new Error("invalid character code passed to %c in sprintf"); + } + token.arg = isNaN(num) ? "" + num : String.fromCharCode(num); + } + } + }, + s: { + setMaxWidth: function(token){ + token.maxWidth = (token.period == ".") ? token.precision : -1; + } + }, + e: { + isDouble: true, + doubleNotation: "e" + }, + E: { + extend: ["e"], + toUpper: true + }, + f: { + isDouble: true, + doubleNotation: "f" + }, + F: { + extend: ["f"] + }, + g: { + isDouble: true, + doubleNotation: "g" + }, + G: { + extend: ["g"], + toUpper: true + } + }, + format: function(/*mixed...*/ filler){ + if(this._mapped && typeof filler != "object"){ + throw new Error("format requires a mapping"); + } + + var str = ""; + var position = 0; + for(var i = 0, token; i < this._tokens.length; i++){ + token = this._tokens[i]; + if(typeof token == "string"){ + str += token; + }else{ + if(this._mapped){ + if(typeof filler[token.mapping] == "undefined"){ + throw new Error("missing key " + token.mapping); + } + token.arg = filler[token.mapping]; + }else{ + if(token.intmapping){ + var position = parseInt(token.intmapping) - 1; + } + if(position >= arguments.length){ + throw new Error("got " + arguments.length + " printf arguments, insufficient for '" + this._format + "'"); + } + token.arg = arguments[position++]; + } + + if(!token.compiled){ + token.compiled = true; + token.sign = ""; + token.zeroPad = false; + token.rightJustify = false; + token.alternative = false; + + var flags = {}; + for(var fi = token.flags.length; fi--;){ + var flag = token.flags.charAt(fi); + flags[flag] = true; + switch(flag){ + case " ": + token.sign = " "; + break; + case "+": + token.sign = "+"; + break; + case "0": + token.zeroPad = (flags["-"]) ? false : true; + break; + case "-": + token.rightJustify = true; + token.zeroPad = false; + break; + case "\#": + token.alternative = true; + break; + default: + throw Error("bad formatting flag '" + token.flags.charAt(fi) + "'"); + } + } + + token.minWidth = (token._minWidth) ? parseInt(token._minWidth) : 0; + token.maxWidth = -1; + token.toUpper = false; + token.isUnsigned = false; + token.isInt = false; + token.isDouble = false; + token.precision = 1; + if(token.period == '.'){ + if(token._precision){ + token.precision = parseInt(token._precision); + }else{ + token.precision = 0; + } + } + + var mixins = this._specifiers[token.specifier]; + if(typeof mixins == "undefined"){ + throw new Error("unexpected specifier '" + token.specifier + "'"); + } + if(mixins.extend){ + lang.mixin(mixins, this._specifiers[mixins.extend]); + delete mixins.extend; + } + lang.mixin(token, mixins); + } + + if(typeof token.setArg == "function"){ + token.setArg(token); + } + + if(typeof token.setMaxWidth == "function"){ + token.setMaxWidth(token); + } + + if(token._minWidth == "*"){ + if(this._mapped){ + throw new Error("* width not supported in mapped formats"); + } + token.minWidth = parseInt(arguments[position++]); + if(isNaN(token.minWidth)){ + throw new Error("the argument for * width at position " + position + " is not a number in " + this._format); + } + // negative width means rightJustify + if (token.minWidth < 0) { + token.rightJustify = true; + token.minWidth = -token.minWidth; + } + } + + if(token._precision == "*" && token.period == "."){ + if(this._mapped){ + throw new Error("* precision not supported in mapped formats"); + } + token.precision = parseInt(arguments[position++]); + if(isNaN(token.precision)){ + throw Error("the argument for * precision at position " + position + " is not a number in " + this._format); + } + // negative precision means unspecified + if (token.precision < 0) { + token.precision = 1; + token.period = ''; + } + } + + if(token.isInt){ + // a specified precision means no zero padding + if(token.period == '.'){ + token.zeroPad = false; + } + this.formatInt(token); + }else if(token.isDouble){ + if(token.period != '.'){ + token.precision = 6; + } + this.formatDouble(token); + } + this.fitField(token); + + str += "" + token.arg; + } + } + + return str; + }, + _zeros10: '0000000000', + _spaces10: ' ', + formatInt: function(token) { + var i = parseInt(token.arg); + if(!isFinite(i)){ // isNaN(f) || f == Number.POSITIVE_INFINITY || f == Number.NEGATIVE_INFINITY) + // allow this only if arg is number + if(typeof token.arg != "number"){ + throw new Error("format argument '" + token.arg + "' not an integer; parseInt returned " + i); + } + //return '' + i; + i = 0; + } + + // if not base 10, make negatives be positive + // otherwise, (-10).toString(16) is '-a' instead of 'fffffff6' + if(i < 0 && (token.isUnsigned || token.base != 10)){ + i = 0xffffffff + i + 1; + } + + if(i < 0){ + token.arg = (- i).toString(token.base); + this.zeroPad(token); + token.arg = "-" + token.arg; + }else{ + token.arg = i.toString(token.base); + // need to make sure that argument 0 with precision==0 is formatted as '' + if(!i && !token.precision){ + token.arg = ""; + }else{ + this.zeroPad(token); + } + if(token.sign){ + token.arg = token.sign + token.arg; + } + } + if(token.base == 16){ + if(token.alternative){ + token.arg = '0x' + token.arg; + } + token.arg = token.toUpper ? token.arg.toUpperCase() : token.arg.toLowerCase(); + } + if(token.base == 8){ + if(token.alternative && token.arg.charAt(0) != '0'){ + token.arg = '0' + token.arg; + } + } + }, + formatDouble: function(token) { + var f = parseFloat(token.arg); + if(!isFinite(f)){ // isNaN(f) || f == Number.POSITIVE_INFINITY || f == Number.NEGATIVE_INFINITY) + // allow this only if arg is number + if(typeof token.arg != "number"){ + throw new Error("format argument '" + token.arg + "' not a float; parseFloat returned " + f); + } + // C99 says that for 'f': + // infinity -> '[-]inf' or '[-]infinity' ('[-]INF' or '[-]INFINITY' for 'F') + // NaN -> a string starting with 'nan' ('NAN' for 'F') + // this is not commonly implemented though. + //return '' + f; + f = 0; + } + + switch(token.doubleNotation) { + case 'e': { + token.arg = f.toExponential(token.precision); + break; + } + case 'f': { + token.arg = f.toFixed(token.precision); + break; + } + case 'g': { + // C says use 'e' notation if exponent is < -4 or is >= prec + // ECMAScript for toPrecision says use exponential notation if exponent is >= prec, + // though step 17 of toPrecision indicates a test for < -6 to force exponential. + if(Math.abs(f) < 0.0001){ + //print("forcing exponential notation for f=" + f); + token.arg = f.toExponential(token.precision > 0 ? token.precision - 1 : token.precision); + }else{ + token.arg = f.toPrecision(token.precision); + } + + // In C, unlike 'f', 'gG' removes trailing 0s from fractional part, unless alternative format flag ("#"). + // But ECMAScript formats toPrecision as 0.00100000. So remove trailing 0s. + if(!token.alternative){ + //print("replacing trailing 0 in '" + s + "'"); + token.arg = token.arg.replace(/(\..*[^0])0*/, "$1"); + // if fractional part is entirely 0, remove it and decimal point + token.arg = token.arg.replace(/\.0*e/, 'e').replace(/\.0$/,''); + } + break; + } + default: throw new Error("unexpected double notation '" + token.doubleNotation + "'"); + } + + // C says that exponent must have at least two digits. + // But ECMAScript does not; toExponential results in things like "1.000000e-8" and "1.000000e+8". + // Note that s.replace(/e([\+\-])(\d)/, "e$10$2") won't work because of the "$10" instead of "$1". + // And replace(re, func) isn't supported on IE50 or Safari1. + token.arg = token.arg.replace(/e\+(\d)$/, "e+0$1").replace(/e\-(\d)$/, "e-0$1"); + + // Ensure a '0' before the period. + // Opera implements (0.001).toString() as '0.001', but (0.001).toFixed(1) is '.001' + if(has("opera")){ + token.arg = token.arg.replace(/^\./, '0.'); + } + + // if alt, ensure a decimal point + if(token.alternative){ + token.arg = token.arg.replace(/^(\d+)$/,"$1."); + token.arg = token.arg.replace(/^(\d+)e/,"$1.e"); + } + + if(f >= 0 && token.sign){ + token.arg = token.sign + token.arg; + } + + token.arg = token.toUpper ? token.arg.toUpperCase() : token.arg.toLowerCase(); + }, + zeroPad: function(token, /*Int*/ length) { + length = (arguments.length == 2) ? length : token.precision; + if(typeof token.arg != "string"){ + token.arg = "" + token.arg; + } + + var tenless = length - 10; + while(token.arg.length < tenless){ + token.arg = (token.rightJustify) ? token.arg + this._zeros10 : this._zeros10 + token.arg; + } + var pad = length - token.arg.length; + token.arg = (token.rightJustify) ? token.arg + this._zeros10.substring(0, pad) : this._zeros10.substring(0, pad) + token.arg; + }, + fitField: function(token) { + if(token.maxWidth >= 0 && token.arg.length > token.maxWidth){ + return token.arg.substring(0, token.maxWidth); + } + if(token.zeroPad){ + this.zeroPad(token, token.minWidth); + return; + } + this.spacePad(token); + }, + spacePad: function(token, /*Int*/ length) { + length = (arguments.length == 2) ? length : token.minWidth; + if(typeof token.arg != 'string'){ + token.arg = '' + token.arg; + } + + var tenless = length - 10; + while(token.arg.length < tenless){ + token.arg = (token.rightJustify) ? token.arg + this._spaces10 : this._spaces10 + token.arg; + } + var pad = length - token.arg.length; + token.arg = (token.rightJustify) ? token.arg + this._spaces10.substring(0, pad) : this._spaces10.substring(0, pad) + token.arg; + } + }); + return strLib.sprintf; +}); diff --git a/js/dojo/dojox/string/tokenize.js b/js/dojo/dojox/string/tokenize.js new file mode 100644 index 0000000..558fb57 --- /dev/null +++ b/js/dojo/dojox/string/tokenize.js @@ -0,0 +1,45 @@ +//>>built +define("dojox/string/tokenize", [ + "dojo/_base/lang", + "dojo/_base/sniff" +], function(lang, has){ + var tokenize = lang.getObject("dojox.string", true).tokenize; + + tokenize = function(/*String*/ str, /*RegExp*/ re, /*Function?*/ parseDelim, /*Object?*/ instance){ + // summary: + // Split a string by a regular expression with the ability to capture the delimeters + // parseDelim: + // Each group (excluding the 0 group) is passed as a parameter. If the function returns + // a value, it's added to the list of tokens. + // instance: + // Used as the "this" instance when calling parseDelim + var tokens = []; + var match, content, lastIndex = 0; + while(match = re.exec(str)){ + content = str.slice(lastIndex, re.lastIndex - match[0].length); + if(content.length){ + tokens.push(content); + } + if(parseDelim){ + if(has("opera")){ + var copy = match.slice(0); + while(copy.length < match.length){ + copy.push(null); + } + match = copy; + } + var parsed = parseDelim.apply(instance, match.slice(1).concat(tokens.length)); + if(typeof parsed != "undefined"){ + tokens.push(parsed); + } + } + lastIndex = re.lastIndex; + } + content = str.slice(lastIndex); + if(content.length){ + tokens.push(content); + } + return tokens; + }; + return tokenize; +}); |
