deadManN deadManN - 4 months ago 33
AngularJS Question

Angular(HTML5) with DotNet (rewrite config), For Seo, Read Whole URL

I'm using angularJS and C#,
Due to my superior demand, i have to use HTML5 mode at all cost, mean no '#' sign.
And Again do to his request, i Used rewrite to provide ability to access the angular page even on page refresh.
Recently we implement phantomJS, the first page did worked well, till i notice other page doesn't, after digin-in in codes, i found out that, NO, it wont capture the whole URL, for example, for:

http://localhost:xyzw/flights?_escaped_fragment_=
it only capture the
http://localhost:1350/?_escaped_fragment_=
, which doesn't contain the 'flights' part. i also made some change in my rewrite config, which the last one was adding following rule to ignore these path for angular app, and process them directly:
<add input="{REQUEST_URI}" matchType="Pattern" pattern="(.*)_escaped_fragment_=(.*)" ignoreCase="false" />


I separate my code for those who come and say hey shorten your issue, as whole of it can be,...


  • first part, my configs and small codes

  • second the tutorial i read about PhantomeJS

  • other files which may be required



Here are my main setup:

WebConfig Rewrite:

<rewrite>
<rules>
<!--<rule name="Seo rewrite rule" stopProcessing="true">
<conditions>
<add input="{QUERY_STRING}" pattern="(.*)_escaped_fragment_=(.*)" ignoreCase="false" />
</conditions>
<action type="Rewrite" url="http://service.prerender.io/http://{HTTP_HOST}{REQUEST_URI}" appendQueryString="false" />
</rule>-->
<rule name="Index Rule" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAll">
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/api/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Token" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/bundles/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Content/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Scripts/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/SiteMap/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/CallBackBank/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Error/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="(.*)_escaped_fragment_=(.*)" ignoreCase="false" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/HtmlSnapshot[^s]?/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Flight[^s]?/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Hotel[^s]?/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Tour[^s]?/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/TravelAgency[^s]?/" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Users[^s]?/" negate="true" />
</conditions>
<action type="Rewrite" url="/" />
</rule>
</rules>
</rewrite>


Phantom JS required Filter:

public class AjaxCrawlableAttribute : System.Web.Mvc.ActionFilterAttribute
{
private const string Fragment = "_escaped_fragment_";
public override void OnActionExecuting(ActionExecutingContext filterContext)
{
var request = filterContext.RequestContext.HttpContext.Request;
var url = request.Url.ToString();
if (request.QueryString[Fragment] != null && !url.Contains("HtmlSnapshot/returnHTML"))
{
url = url.Replace("?_escaped_fragment_=", string.Empty).Replace(request.Url.Scheme + "://", string.Empty);
url = url.Split(':')[1];
filterContext.Result = new RedirectToRouteResult(
new RouteValueDictionary { { "controller", "HtmlSnapshot" }, { "action", "returnHTML" }, { "url", url } });
}
return;
}
}


Route Config:

public static void RegisterRoutes(RouteCollection routes)
{
routes.IgnoreRoute("{resource}.axd/{*pathInfo}");
routes.LowercaseUrls = true;

//PhantomJS
routes.MapRoute(
name: "HtmlSnapshot",
url: "HtmlSnapshot/returnHTML/{*url}",
defaults: new {controller = "HtmlSnapshot", action = "returnHTML", url = UrlParameter.Optional});
////PhantomJS
//routes.MapRoute(
// name: "SPA",
// url: "{*catchall}",
// defaults: new {controller = "Home", action = "Index"});

routes.MapRoute(
name: "Default",
url: "{controller}/{action}/{id}",
defaults: new {controller = "Home", action = "Index", id = UrlParameter.Optional});
}


PhantomJS for C# The tutorial that i read:
OK, since the tutorial i read about phantomjs is in non-english i post the thing i wrote for my later usage:

1.Install Package

Install-Package PhantomJS.exe -version 1.9.2.1


2.Create Helper

public class AjaxCrawlableAttribute : System.Web.Mvc.ActionFilterAttribute
{
private const string Fragment = "_escaped_fragment_";
public override void OnActionExecuting(ActionExecutingContext filterContext)
{
var request = filterContext.RequestContext.HttpContext.Request;
var url = request.Url.ToString();
if (request.QueryString[Fragment] != null && !url.Contains("HtmlSnapshot/returnHTML"))
{
url = url.Replace("?_escaped_fragment_=", string.Empty).Replace(request.Url.Scheme + "://", string.Empty);
url = url.Split(':')[1];
filterContext.Result = new RedirectToRouteResult(
new RouteValueDictionary { { "controller", "HtmlSnapshot" }, { "action", "returnHTML" }, { "url", url } });
}
return;
}
}


3.Replace Default Routes With:

public static void RegisterRoutes(RouteCollection routes)
{
routes.IgnoreRoute("{resource}.axd/{*pathInfo}");

routes.MapRoute(
name: "HtmlSnapshot",
url: "HtmlSnapshot/returnHTML/{*url}",
defaults: new { controller = "HtmlSnapshot", action = "returnHTML", url = UrlParameter.Optional });

//If doesn't work, use default route instead...:
routes.MapRoute(
name: "SPA",
url: "{*catchall}",
defaults: new { controller = "Home", action = "Index" });
}


4.Add AjaxCrawlableAttribute As A Filter

public class FilterConfig
{
public static void RegisterGlobalFilters(GlobalFilterCollection filters)
{
filters.Add(new AjaxCrawlableAttribute());
}
}


5.Create HtmlSnapshot Controller

public ActionResult returnHTML(string url)
{
var prefix = HttpContext.Request.Url.Scheme + "://" + HttpContext.Request.Url.Host + ":";
url = prefix + url;
string appRoot = Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory);
var startInfo = new ProcessStartInfo
{
Arguments = string.Format("{0} {1}", "\"" + Path.Combine(appRoot, "Scripts\\seo.js") + "\"", url),
FileName = "\"" + Path.Combine(appRoot, "bin\\phantomjs.exe") + "\"",
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
RedirectStandardInput = true,
StandardOutputEncoding = System.Text.Encoding.UTF8
};
var p = new Process();
p.StartInfo = startInfo;
p.Start();
string output1 = p.StandardOutput.ReadToEnd();
p.WaitForExit();
var removeNgUiView = output1.Replace("<!-- ngView: -->", "").Replace("ng-view=\"\"", "");
removeNgUiView = Regex.Replace(removeNgUiView, "<!--\\suiView:\\s\\w*\\s-->", "");
removeNgUiView = Regex.Replace(removeNgUiView, "(<\\w+[^>]*)(ui-view(=\"\\w*\")?)([^<]*>)", "$1$4");
removeNgUiView = Regex.Replace(removeNgUiView, "(<\\w+[^>]*)(ng-app(=\"\\w*\")?)([^<]*>)", "$1$4");
ViewData["result"] = removeNgUiView;
return View();
}


6.Create Views of Controller

@{
Layout = null;
}
@Html.Raw(ViewData["result"])


7.Create seo.js in Script (!Important) Folder

var page = require('webpage').create();
var system = require('system');

var lastReceived = new Date().getTime();
var requestCount = 0;
var responseCount = 0;
var requestIds = [];
var startTime = new Date().getTime();;
page.onResourceReceived = function (response) {
if (requestIds.indexOf(response.id) !== -1) {
lastReceived = new Date().getTime();
responseCount++;
requestIds[requestIds.indexOf(response.id)] = null;
}
};
page.onResourceRequested = function (request) {
if (requestIds.indexOf(request.id) === -1) {
requestIds.push(request.id);
requestCount++;
}
};

function checkLoaded() {
return page.evaluate(function () {
return document.all["compositionComplete"];
}) != null;
}
// Open the page
page.open(system.args[1], function () {

});

var checkComplete = function () {
// We don't allow it to take longer than 5 seconds but
// don't return until all requests are finished
if ((new Date().getTime() - lastReceived > 300 && requestCount === responseCount) || new Date().getTime() - startTime > 10000 || checkLoaded()) {
clearInterval(checkCompleteInterval);
console.log(page.content);
phantom.exit();
}
}
// Let us check to see if the page is finished rendering
var checkCompleteInterval = setInterval(checkComplete, 300);


8.Layout.cshtml Based On:

<!DOCTYPE html>
<html ng-app="appOne">
<head>
<meta name="fragment" content="!">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta charset="utf-8" />
<link href="~/favicon.ico" rel="shortcut icon" type="image/x-icon" />
<meta name="viewport" content="width=device-width" />
<base href="/">
@Styles.Render("~/Content/css")
@Scripts.Render("~/bundles/modernizr")
<script src="~/Scripts/angular/angular.js"></script>
<script src="~/Scripts/angular/angular-route.js"></script>
<script src="~/Scripts/angular/angular-animate.js"></script>
<script>
angular.module('appOne', ['ngRoute'], function ($routeProvider, $locationProvider) {
$routeProvider.when('/one', {
template: "<div>one</div>", controller: function ($scope) {
}
})
.when('/two', {
template: "<div>two</div>", controller: function ($scope) {
}
}).when('/', {
template: "<div>home</div>", controller: function ($scope) {
}
});
$locationProvider.html5Mode({
enabled: true
});
});
</script>
</head>
<body>
<div id="body">
<section ng-view></section>
@RenderBody()
</div>
<div id="footer">
<ul class='xoxo blogroll'>
<li><a href="one">one</a></li>
<li><a href="two">two</a></li>
</ul>
</div>
</body>
</html>


NOTE: PhantomJS cannot process Persian Links(UTF-8)

Third part, other things you may required to know...
I don't see any other thing that may be involved, if you saw one ask me i'll edit my question.

Answer

Here's what i did:

for configuration:

<rule name="Crawler" stopProcessing="false">
  <match url=".*"/> <!-- rule back-reference is captured here -->
  <conditions logicalGrouping="MatchAll">
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="(.*)_escaped_fragment_=(.*)" ignoreCase="true" negate="true" /><!-- condition back-reference is captured here -->
  </conditions>
  <action type="Rewrite" url="{R:0}" /><!-- rewrite action uses back-references to condition and to rule when rewriting the url -->
</rule>
<rule name="Index Rule" stopProcessing="true">
  <match url=".*" />
  <conditions logicalGrouping="MatchAll">
    <add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
    <add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/api/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Token" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/bundles/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Content/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Scripts/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/SiteMap/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/CallBackBank/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Error/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/HtmlSnapshot[^s]?/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Flight[^s]?/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Hotel[^s]?/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Tour[^s]?/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/TravelAgency[^s]?/" negate="true" />
    <add input="{REQUEST_URI}" matchType="Pattern" pattern="^/Users[^s]?/" negate="true" />
  </conditions>
  <action type="Rewrite" url="/" />
</rule>

And here's for Filter:

var request = filterContext.RequestContext.HttpContext.Request;
        if (request.Url == null)
            return;
var url = request.Url.Scheme + "://" + request.Url.Authority + request.RawUrl; //THIS LINE (`RowUrl` contains rest of the path)