zi3guw zi3guw - 3 months ago 18
C# Question

WebUtility.HtmlDecode vs HttpUtilty.HtmlDecode

I was using

WebUtilty.HtmlDecode
to decode HTML. It turns out that it doesn't decode properly, for example,
–
is supposed to decode to a "–" character, but
WebUtilty.HtmlDecode
does not decode it.
HttpUtilty.HtmlDecode
, however, does.

Debug.WriteLine(WebUtility.HtmlDecode("–"));
Debug.WriteLine(HttpUtility.HtmlDecode("–"));


> –
> –


decode screenshot

The documentation for both of these is the same:
Converts a string that has been HTML-encoded for HTTP transmission into a decoded string.

Why are they different, which one should I be using, and what will change if I switch to WebUtility.HtmlDecode to get "–" to decode correctly?

Answer

The implementation of the two methods are indeed different on Windows Phone.

WebUtility.HtmlDecode:

public static void HtmlDecode(string value, TextWriter output)
{
    if (value != null)
    {
        if (output == null)
        {
            throw new ArgumentNullException("output");
        }
        if (!StringRequiresHtmlDecoding(value))
        {
            output.Write(value);
        }
        else
        {
            int length = value.Length;
            for (int i = 0; i < length; i++)
            {
                bool flag;
                uint num4;
                char ch = value[i];
                if (ch != '&')
                {
                    goto Label_01B6;
                }
                int num3 = value.IndexOfAny(_htmlEntityEndingChars, i + 1);
                if ((num3 <= 0) || (value[num3] != ';'))
                {
                    goto Label_01B6;
                }
                string entity = value.Substring(i + 1, (num3 - i) - 1);
                if ((entity.Length <= 1) || (entity[0] != '#'))
                {
                    goto Label_0188;
                }
                if ((entity[1] == 'x') || (entity[1] == 'X'))
                {
                    flag = uint.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num4);
                }
                else
                {
                    flag = uint.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num4);
                }
                if (flag)
                {
                    switch (_htmlDecodeConformance)
                    {
                        case UnicodeDecodingConformance.Strict:
                            flag = (num4 < 0xd800) || ((0xdfff < num4) && (num4 <= 0x10ffff));
                            goto Label_0151;

                        case UnicodeDecodingConformance.Compat:
                            flag = (0 < num4) && (num4 <= 0xffff);
                            goto Label_0151;

                        case UnicodeDecodingConformance.Loose:
                            flag = num4 <= 0x10ffff;
                            goto Label_0151;
                    }
                    flag = false;
                }
            Label_0151:
                if (!flag)
                {
                    goto Label_01B6;
                }
                if (num4 <= 0xffff)
                {
                    output.Write((char) num4);
                }
                else
                {
                    char ch2;
                    char ch3;
                    ConvertSmpToUtf16(num4, out ch2, out ch3);
                    output.Write(ch2);
                    output.Write(ch3);
                }
                i = num3;
                goto Label_01BD;
            Label_0188:
                i = num3;
                char ch4 = HtmlEntities.Lookup(entity);
                if (ch4 != '\0')
                {
                    ch = ch4;
                }
                else
                {
                    output.Write('&');
                    output.Write(entity);
                    output.Write(';');
                    goto Label_01BD;
                }
            Label_01B6:
                output.Write(ch);
            Label_01BD:;
            }
        }
    }
}

HttpUtility.HtmlDecode:

public static string HtmlDecode(string html)
{
    if (html == null)
    {
        return null;
    }
    if (html.IndexOf('&') < 0)
    {
        return html;
    }
    StringBuilder sb = new StringBuilder();
    StringWriter writer = new StringWriter(sb, CultureInfo.InvariantCulture);
    int length = html.Length;
    for (int i = 0; i < length; i++)
    {
        char ch = html[i];
        if (ch == '&')
        {
            int num3 = html.IndexOfAny(s_entityEndingChars, i + 1);
            if ((num3 > 0) && (html[num3] == ';'))
            {
                string entity = html.Substring(i + 1, (num3 - i) - 1);
                if ((entity.Length > 1) && (entity[0] == '#'))
                {
                    try
                    {
                        if ((entity[1] == 'x') || (entity[1] == 'X'))
                        {
                            ch = (char) int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
                        }
                        else
                        {
                            ch = (char) int.Parse(entity.Substring(1), CultureInfo.InvariantCulture);
                        }
                        i = num3;
                    }
                    catch (FormatException)
                    {
                        i++;
                    }
                    catch (ArgumentException)
                    {
                        i++;
                    }
                }
                else
                {
                    i = num3;
                    char ch2 = HtmlEntities.Lookup(entity);
                    if (ch2 != '\0')
                    {
                        ch = ch2;
                    }
                    else
                    {
                        writer.Write('&');
                        writer.Write(entity);
                        writer.Write(';');
                        continue;
                    }
                }
            }
        }
        writer.Write(ch);
    }
    return sb.ToString();
}

Interestingly, WebUtility doesn't exist on WP7. Also, the WP8 implementation of WebUtility is identical to the desktop one. The desktop implementation of HttpUtility.HtmlDecode is just a wrapper around WebUtility.HtmlDecode. Last but not least, Silverlight 5 has the same implementation of HttpUtility.HtmlDecode as Windows Phone, and does not implement WebUtility.

From there, I can venture a guess: since the Windows Phone 7 runtime is based on Silverlight, WP7 inherited of the Silverlight version of HttpUtility.HtmlDecode, and WebUtility wasn't present. Then came WP8, whose runtime is based on WinRT. WinRT brought WebUtility, and the old version of HttpUtility.HtmlDecode was kept to ensure the compatibility with the legacy WP7 apps.

As to know which one you should use... If you want to target WP7 then you have no choice but to use HttpUtility.HtmlDecode. If you're targeting WP8, then just pick the method whose behavior suits your needs the best. WebUtility is probably the future-proof choice, just in case Microsoft decides to ditch the Silverlight runtime in an upcoming version of Windows Phone. But I'd just go with the practical choice of picking HttpUtility to not have to worry about manually supporting the example you've put in your question.