lante lante - 1 year ago 45
Javascript Question

Regex for escaped characters highlighting

I am building an autocomplete in JavaScript that needs to highlight words when doing a search:

highlighting example

That works fine, but there's an issue with escaped characters.
When trying to highlight a text with escaped characters (for example

regex <br> example
), the following is happening:

regex highlighting

That's happening because I am doing the following:

element.innerHTML.replace(/a/g, highlight)

function highlight(str) {
return '<span class="foo"' + '>' + str + '</span>';

includes the word
, so it makes sense.

In conclusion, I need a way to solve that so I would like a function that:

  • receives
    regex <br> example
    and returns
    regex &lt;br&gt; ex<span class="foo">a</span>mple

  • receives
    regex <br> example
    and returns
    <span class="foo">r</span>egex &lt;b<span class="foo">r</span>&gt; example

  • receives
    regex <br> example
    and returns
    regex <span class="foo">&lt;</span>br&gt; example

The entries may or may not contain html blocks, see the issue here (search for

Answer Source

str.replace only returns a new string with the intended replacements. The original string is unchanged.

var str = 'replace me';
var str2 = str.replace(/e/g, 'E');

// For display only
document.write('<pre>' + JSON.stringify({
  str: str,
  str2: str2
}, null, 2) + '</pre>');

Therefore the code needs to set the returned value from the replace back to the desired element.

Also, innerHTML will return the escaped text rather than the unescaped text. This could be unescaped itself within the function but why bother if you can use textContent. However by using innerHTML when it's time to set the highlighted text to the element it will auto-escape the text for us. :)

UPDATE: the values are passed to the function and then set to the element:


  • The regexp could probably be made a bit more robust to avoid having to handle the special case using lastIndex
  • There needs to be some protection on the input as someone could provide a nasty regexp pattern. There is a minimal protection check in this example.

higlightElemById('a', 'regex &>< example', 'a');
higlightElemById('b', 'regex &>< example', '&');
higlightElemById('c', 'regex <br> example', '<');
higlightElemById('d', 'regex <br> example', 'e');
higlightElemById('e', 'regex <br> example', '[aex]');

function higlightElemById(id, str, match) {
  var itemElem = document.getElementById(id);
  // minimal regexp escape to prevent shenanigans
  var safeMatch = match.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  // construct regexp to match highlight text
  var regexp = new RegExp('(.*?)(' + safeMatch + ')', 'g');
  var text = '';
  var lastIndex;
  var matches;
  while (matches = regexp.exec(str)) {
    // Escape the non-matching prefix
    text += escapeHTML(matches[1]);
    // Highlight the match
    text += highlight(matches[2]);
    // Cache the lastIndex in case no regexp at end
    lastIndex = regexp.lastIndex;

  if (text) {
    text += escapeHTML(str.substr(lastIndex));
  } else {
    text += escapeHTML(str);

  itemElem.innerHTML = text;

function highlight(str) {
  return '<span class="myHighlightClass">' + str + '</span>';

function escapeHTML(html) {
  this.el = this.el || document.createElement('textarea');

  this.el.textContent = html;
  return this.el.innerHTML;
.myHighlightClass {
  text-decoration: underline;
  color: red;
<div id="a"></div>
<div id="b"></div>
<div id="c"></div>
<div id="d"></div>
<div id="e"></div>