Wednesday, November 12, 2008

Encoding javascript utf16 characters for urls

Dug this up from the bowels of the ddbdp webapp code. I wrote it a long time ago, but didn't end up needing it. Seems like it might be useful down the line...



var firstByteMark = [ 0x00,0x00,0xC0,0xE0,0xF0,0xF8,0xFC ];
var byteMask = 0xBF;
var byteMark = 0x80;

function UTF16toUTF8Bytes(u16){
var bytes = new Array();
if (u16 < 128){
bytes.length = 1;
} else if (u16 < 2048){
bytes.length = 2;
} else { // presuming max js charCode of 65535
bytes.length = 3;
}
switch (bytes.length){
case 3:
bytes[2] = ((u16 | byteMark) & byteMask);
u16 >>= 6;
case 2:
bytes[1] = ((u16 | byteMark) & byteMask);
u16 >>= 6;
case 1:
bytes[0] = (u16 | firstByteMark[bytes.length]);
}
return bytes;
}
function encode(input){
var output = new Array();
var inputArray = input.split(/\s+/);
for(var i=0;i<inputArray.length;i++){
var term = '';
for(var j=0;j<inputArray[i].length;j++){
var u16 = inputArray[i].charCodeAt(j);
if (u16 < 128){
term += inputArray[i].charAt(j);
continue;
}
var utf8bytes = UTF16toUTF8Bytes(u16);
for(var k=0;k<utf8bytes.length;k++){
if(utf8bytes[k] < 16){
term += "%0";
} else {
term += "%";
}
term += utf8bytes[k].toString(16);
}
}
output[i] = term;
}
return output;
}

No comments: