假设有字符串“朱元璋”,截取限定的长度为7字节
截取编码为utf-8 结果为“朱”
截取编码为gb2312时 结果为“朱”
字符串为“hello朱元璋”,截取限定的长度为13字节
截取编码为utf-8 结果为“hello朱元”
这里的问题就是gbk编码占位2个字节,而utf-8占位3个字节,完整的示例代码:
1:
>
3: <html>
4: <head>
new document </title>
/>
/>
/>
/>
/>
11: </head>
12:
13: <body>
>
function getRealLen(str, isUTF8) {
'string') {
return 0;
18: }
19:
if (!isUTF8) {
'xx').length;
else {
'');
return (str.length - tempStr.length) + Math.floor(encodeURI(tempStr).length / 3);
25: }
26: }
27:
function uniLeft(str, isUTF8, len, suffix) {
var str = str.toString();
var len = len * 1;
;
32:
if (isNaN(len)) {
return str;
35: }
36:
'';
38:
if (uniLen <= len) {
return str.substr(0);
41: }
42:
var i = Math.floor(len / 2); i< uniLen; i++) {
44: tempStr = str.substr(0, i);
45:
if (getRealLen(tempStr, isUTF8) >= len || getRealLen(str.substr(0, i+1), isUTF8) > len) {
return tempStr;
48: }
49: }
50: }
51:
'朱元璋';
'hello朱元璋';
54:
//gb2312
//utf-8
57: </script>
58: </body>
59: </html>