232 lines
7.4 KiB
JavaScript
232 lines
7.4 KiB
JavaScript
var RE_SPACEDASH = /[- ]/g;
|
||
// Module exports
|
||
var iconv = module.exports = {
|
||
toEncoding: function(str, encoding) {
|
||
return iconv.getCodec(encoding).toEncoding(str);
|
||
},
|
||
fromEncoding: function(buf, encoding) {
|
||
return iconv.getCodec(encoding).fromEncoding(buf);
|
||
},
|
||
encodingExists: function(enc) {
|
||
loadEncodings();
|
||
enc = enc.replace(RE_SPACEDASH, "").toLowerCase();
|
||
return (iconv.encodings[enc] !== undefined);
|
||
},
|
||
|
||
defaultCharUnicode: '<27>',
|
||
defaultCharSingleByte: '?',
|
||
|
||
encodingsLoaded: false,
|
||
|
||
// Get correct codec for given encoding.
|
||
getCodec: function(encoding) {
|
||
loadEncodings();
|
||
var enc = encoding || "utf8";
|
||
var codecOptions = undefined;
|
||
while (1) {
|
||
if (getType(enc) === "String")
|
||
enc = enc.replace(RE_SPACEDASH, "").toLowerCase();
|
||
var codec = iconv.encodings[enc];
|
||
var type = getType(codec);
|
||
if (type === "String") {
|
||
// Link to other encoding.
|
||
codecOptions = {originalEncoding: enc};
|
||
enc = codec;
|
||
}
|
||
else if (type === "Object" && codec.type != undefined) {
|
||
// Options for other encoding.
|
||
codecOptions = codec;
|
||
enc = codec.type;
|
||
}
|
||
else if (type === "Function")
|
||
// Codec itself.
|
||
return codec(codecOptions);
|
||
else
|
||
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
|
||
}
|
||
},
|
||
|
||
// Define basic encodings
|
||
encodings: {
|
||
internal: function(options) {
|
||
return {
|
||
toEncoding: toInternalEncoding,
|
||
fromEncoding: fromInternalEncoding,
|
||
options: options
|
||
};
|
||
},
|
||
utf8: "internal",
|
||
ucs2: "internal",
|
||
binary: "internal",
|
||
ascii: "internal",
|
||
base64: "internal",
|
||
|
||
// Codepage single-byte encodings.
|
||
singlebyte: function(options) {
|
||
// Prepare chars if needed
|
||
if (!options.charsBuf) {
|
||
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
|
||
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
|
||
|
||
if (options.chars.length === 128)
|
||
options.chars = asciiString + options.chars;
|
||
|
||
options.charsBuf = new Buffer(options.chars, 'ucs2');
|
||
}
|
||
|
||
if (!options.revCharsBuf) {
|
||
options.revCharsBuf = new Buffer(65536);
|
||
var defChar = iconv.defaultCharSingleByte.charCodeAt(0);
|
||
for (var i = 0; i < options.revCharsBuf.length; i++)
|
||
options.revCharsBuf[i] = defChar;
|
||
for (var i = 0; i < options.chars.length; i++)
|
||
options.revCharsBuf[options.chars.charCodeAt(i)] = i;
|
||
}
|
||
|
||
return {
|
||
toEncoding: toSingleByteEncoding,
|
||
fromEncoding: fromSingleByteEncoding,
|
||
options: options,
|
||
};
|
||
},
|
||
|
||
// Codepage double-byte encodings.
|
||
table: function(options) {
|
||
if (!options.table) {
|
||
throw new Error("Encoding '" + options.type + "' has incorect 'table' option");
|
||
}
|
||
if (!options.revCharsTable) {
|
||
var revCharsTable = options.revCharsTable = {};
|
||
for (var i = 0; i <= 0xFFFF; i++) {
|
||
revCharsTable[i] = 0;
|
||
}
|
||
|
||
var table = options.table;
|
||
for (var key in table) {
|
||
revCharsTable[table[key]] = +key;
|
||
}
|
||
}
|
||
|
||
return {
|
||
toEncoding: toTableEncoding,
|
||
fromEncoding: fromTableEncoding,
|
||
options: options,
|
||
};
|
||
}
|
||
}
|
||
};
|
||
|
||
function toInternalEncoding(str) {
|
||
return new Buffer(ensureString(str), this.options.originalEncoding);
|
||
}
|
||
|
||
function fromInternalEncoding(buf) {
|
||
return ensureBuffer(buf).toString(this.options.originalEncoding);
|
||
}
|
||
|
||
function toTableEncoding(str) {
|
||
str = ensureString(str);
|
||
var strLen = str.length;
|
||
var revCharsTable = this.options.revCharsTable;
|
||
var newBuf = new Buffer(strLen*2), gbkcode, unicode,
|
||
defaultChar = revCharsTable[iconv.defaultCharUnicode.charCodeAt(0)];
|
||
|
||
for (var i = 0, j = 0; i < strLen; i++) {
|
||
unicode = str.charCodeAt(i);
|
||
if (unicode >> 7) {
|
||
gbkcode = revCharsTable[unicode] || defaultChar;
|
||
newBuf[j++] = gbkcode >> 8; //high byte;
|
||
newBuf[j++] = gbkcode & 0xFF; //low byte
|
||
} else {//ascii
|
||
newBuf[j++] = unicode;
|
||
}
|
||
}
|
||
return newBuf.slice(0, j);
|
||
}
|
||
|
||
function fromTableEncoding(buf) {
|
||
buf = ensureBuffer(buf);
|
||
var bufLen = buf.length;
|
||
var table = this.options.table;
|
||
var newBuf = new Buffer(bufLen*2), unicode, gbkcode,
|
||
defaultChar = iconv.defaultCharUnicode.charCodeAt(0);
|
||
|
||
for (var i = 0, j = 0; i < bufLen; i++, j+=2) {
|
||
gbkcode = buf[i];
|
||
if (gbkcode & 0x80) {
|
||
gbkcode = (gbkcode << 8) + buf[++i];
|
||
unicode = table[gbkcode] || defaultChar;
|
||
} else {
|
||
unicode = gbkcode;
|
||
}
|
||
newBuf[j] = unicode & 0xFF; //low byte
|
||
newBuf[j+1] = unicode >> 8; //high byte
|
||
}
|
||
return newBuf.slice(0, j).toString('ucs2');
|
||
}
|
||
|
||
function toSingleByteEncoding(str) {
|
||
str = ensureString(str);
|
||
|
||
var buf = new Buffer(str.length);
|
||
var revCharsBuf = this.options.revCharsBuf;
|
||
for (var i = 0; i < str.length; i++)
|
||
buf[i] = revCharsBuf[str.charCodeAt(i)];
|
||
|
||
return buf;
|
||
}
|
||
|
||
function fromSingleByteEncoding(buf) {
|
||
buf = ensureBuffer(buf);
|
||
|
||
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
|
||
var charsBuf = this.options.charsBuf;
|
||
var newBuf = new Buffer(buf.length*2);
|
||
var idx1 = 0, idx2 = 0;
|
||
for (var i = 0, _len = buf.length; i < _len; i++) {
|
||
idx1 = buf[i]*2; idx2 = i*2;
|
||
newBuf[idx2] = charsBuf[idx1];
|
||
newBuf[idx2+1] = charsBuf[idx1+1];
|
||
}
|
||
return newBuf.toString('ucs2');
|
||
}
|
||
|
||
// Add aliases to convert functions
|
||
iconv.encode = iconv.toEncoding;
|
||
iconv.decode = iconv.fromEncoding;
|
||
|
||
// Load other encodings manually from files in /encodings dir.
|
||
function loadEncodings() {
|
||
if (!iconv.encodingsLoaded) {
|
||
[ require('./encodings/singlebyte'),
|
||
require('./encodings/gbk'),
|
||
require('./encodings/big5')
|
||
].forEach(function(encodings) {
|
||
for (var key in encodings)
|
||
iconv.encodings[key] = encodings[key]
|
||
});
|
||
iconv.encodingsLoaded = true;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
// Utilities
|
||
var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
|
||
' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f';
|
||
|
||
var ensureBuffer = function(buf) {
|
||
buf = buf || new Buffer(0);
|
||
return (buf instanceof Buffer) ? buf : new Buffer(""+buf, "binary");
|
||
}
|
||
|
||
var ensureString = function(str) {
|
||
str = str || "";
|
||
return (str instanceof Buffer) ? str.toString('utf8') : (""+str);
|
||
}
|
||
|
||
var getType = function(obj) {
|
||
return Object.prototype.toString.call(obj).slice(8, -1);
|
||
}
|
||
|