如何使用 Javascript 修改节点的内容？答案

【解决方案1】：

不需要 jQuery：

UE_replacer = function (node) {

   // just for performance, skip attribute and
   // comment nodes (types 2 and 8, respectively)
   if (node.nodeType == 2) return;
   if (node.nodeType == 8) return;

   // for text nodes (type 3), wrap words of the
   // form *xyzzy with a span that has class xyzzy
   if (node.nodeType == 3) {

      // in the actual text, the nodeValue, change
      // all strings ('g'=global) that start and end
      // on a word boundary ('\b') where the first
      // character is '*' and is followed by one or
      // more ('+'=one or more) 'word' characters
      // ('\w'=word character). save all the word
      // characters (that's what parens do) so that
      // they can be used in the replacement string
      // ('$1'=re-use saved characters).
      var text = node.nodeValue.replace(
            /\b\*(\w+)\b/g,
            '<span class="$1">*$1</span>'   // <== Wrong!
      );

      // set the new text back into the nodeValue
      node.nodeValue = text;
      return;
   }

   // for all other node types, call this function
   // recursively on all its child nodes
   for (var i=0; i<node.childNodes.length; ++i) {
      UE_replacer( node.childNodes[i] );
   }
}

// start the replacement on 'document', which is
// the root node
UE_replacer( document );

更新：为了对比strager's answer 的方向，我摆脱了我拙劣的jQuery 并保持正则表达式尽可能简单。这种“原始”的 javascript 方法比我预期的要容易得多。

虽然 jQuery 显然很适合操作 DOM 结构，但实际上要弄清楚如何操作 text 元素并不容易。

【讨论】：

不完全确定，但这可能会遇到以下问题：
*asdf跨度>

@system：您能解释一下您的答案与 strager 的答案有何不同吗？我很难看出两者之间的区别。

@Unknown Entity，系统 PAUSE 的方法是遍历 DOM 节点并在文本节点上执行替换（并根据需要添加跨度）。我的方法只对包含在单个节点中的 HTML 进行操作。这两种方法在理论上都应该同样有效。

@Unknown Entity，strager 是对的，只是我的版本仍然存在错误......请参阅此答案的第一条评论。

如果你把“”放到一个nodeValue中，你只会得到文本“”而不是一个元素。 \b*\w 也不起作用：* 不是单词字符，因此它之前不会有单词边界。

【解决方案2】：

不要尝试处理元素的 innerHTML/html()。这永远不会起作用，因为正则表达式不足以解析 HTML。只需遍历 Text 节点即可查找所需内容：

// Replace words in text content, recursively walking element children.
//
function wrapWordsInDescendants(element, tagName, className) {
    for (var i= element.childNodes.length; i-->0;) {
        var child= element.childNodes[i];
        if (child.nodeType==1) // Node.ELEMENT_NODE
            wrapWordsInDescendants(child, tagName, className);
        else if (child.nodeType==3) // Node.TEXT_NODE
            wrapWordsInText(child, tagName, className);
    }
}

// Replace words in a single text node
//
function wrapWordsInText(node, tagName, className) {

    // Get list of *word indexes
    //
    var ixs= [];
    var match;
    while (match= starword.exec(node.data))
        ixs.push([match.index, match.index+match[0].length]);

    // Wrap each in the given element
    //
    for (var i= ixs.length; i-->0;) {
        var element= document.createElement(tagName);
        element.className= className;
        node.splitText(ixs[i][1]);
        element.appendChild(node.splitText(ixs[i][0]));
        node.parentNode.insertBefore(element, node.nextSibling);
    }
}
var starword= /(^|\W)\*\w+\b/g;

// Process all elements with class 'foo'
//
$('.foo').each(function() {
    wrapWordsInDescendants(this, 'span', 'xyz');
});


// If you're not using jQuery, you'll need the below bits instead of $...

// Fix missing indexOf method on IE
//
if (![].indexOf) Array.prototype.indexOf= function(item) {
    for (var i= 0; i<this.length; i++)
        if (this[i]==item)
            return i;
    return -1;
}

// Iterating over '*' (all elements) is not fast; if possible, reduce to
// all elements called 'li', or all element inside a certain element etc.
//
var elements= document.getElementsByTagName('*');
for (var i= elements.length; i-->0;)
    if (elements[i].className.split(' ').indexOf('foo')!=-1)
        wrapWordsInDescendants(elements[i], 'span', 'xyz');

【讨论】：

@bobince，结构不错！我喜欢你的正则表达式（我不确定 * 前面的 \b 匹配）。但是，您将所有类名设置为“xyz”。星号 *abc 的类名必须是“abc”。
哦，好的，在这种情况下：“element.className= element.firstChild.data”（在 insertBefore 之后）。

【解决方案3】：

正则表达式看起来像这样（sed-ish 语法）：

s/\*\(\w+\)\b\(?![^<]*>\)/<span class="\1">*\1</span>/g

因此：

$('li.foo').each(function() {
    var html = $(this).html();
    html = html.replace(/\*(\w+)\b(?![^<]*>)/g, "<span class=\"$1\">*$1</span>");
    $(this).html(html);
});

\*(\w+)\b 段是重要的部分。它找到一个星号后跟一个或多个单词字符，然后是某种单词终止（例如，行尾或空格）。单词被捕获到$1，然后用作输出的文本和类。

紧随其后的部分 ((?![^<]*>)) 是负前瞻。它断言不跟随右尖括号，除非它之前有一个左尖括号。这可以防止字符串在 HTML 标记内的匹配。这不能处理格式错误的 HTML，但无论如何都不应该是这样。

【讨论】：

在替换字符串中，javascript 使用 $1 而不是 \\1。另外，我推荐使用 text() 而不是 html()，因为您可能在
中的某处有一个与标记或属性同名的类。非常好！

@system PAUSE，我不相信使用 text() 代替 html()。如果你更新 text()，它会被转义，不是吗？

@strager，你说得对，text(t) 会转义新的 span，所以应该是 html(t)。我正在尝试解决这种情况：

*abc

。使用 html() 在提取的文本中包含整个 —— *xyz 被替换，导致 HTML 无效。 text() 只会产生 *abc.

@system PAUSE，啊，我明白你的意思了。稍后我将处理正则表达式以满足您的要求。

@system PAUSE，更新了我的答案。根据 RegexBuddy 工作（它使用 .NET 正则表达式 AFAIK，但希望它也适用于 JS）。