Yasin Yaqoobi Yasin Yaqoobi - 13 days ago 5
Node.js Question

Express routing non ascii characters (Farsi)

I am trying to use this route http://localhost:3030/api/words/عشق
in my express app, so I can match the word in the dictionary.

The browser changes the url to http://localhost:3030/api/words/%D8%B9%D8%B4%D9%82 but I have written a small middleware that converts it back to the original version before passing it in to the routes. In the routes, I have the regex that checks for unicode character that encompasses farsi/persian characters.

Not sure what's going on because the middleware prints

/words/عشق
and if I remove the regex rule the route also prints
/words/عشق
. Why is express not matching this ? Does express not use the req.url to determine the route ?

/** Get word be string **/
api.get('/:word(^([\\u0600-\\u06FF]+\\s?)+$)', (req, res, next) =>{
console.log("persian version " + req.url);
res.send(req.params);
});


/** Url encoder middleware **/
function urlencoder(req, res, next) {
req.url = decodeURIComponent(req.url);
console.log("Middleware " + req.url);
next();
}

Answer

I think that the code that converts the route path to a regular expression already prefixes the regex with an anchor (^), so you shouldn't use an additional one in yours.

This seems to work:

let unescape = require('querystring').unescape;

api.use((req, res, next) => {
  req.url = unescape(req.url);
  next();
});

api.get('/:word(([\\u0600-\\u06FF]+\\s?)+$)', (req, res) => {
  console.log("persian version " + req.url);
  res.send(req.params);
});