【问题标题】:Validate HTML5 in C#在 C# 中验证 HTML5
【发布时间】:2013-10-12 05:11:25
【问题描述】:

我们目前正在用 C# 构建一个全新的应用程序。我们有大量使用 Selenium Web 驱动程序的 UI 测试。这些测试(以及单元测试)由我们的 CI 服务器运行。

Selenium 公开了一个 .PageSource 属性,并且(对我而言)通过 HTML5 验证器运行该源代码作为每个 UI 测试的另一部分是有意义的。

我想了解http://validator.w3.org/ 了解的相同类型的事情。作为奖励,我还想了解 508 个问题。

我的问题是我找不到任何可以在本地执行此操作并且很容易集成到我的 UI 测试中的东西。W3C 站点公开了一个 SOAP api,但是我不想将他们的站点作为其中的一部分访问CI 过程。它们似乎也不支持返回 SOAP 响应。我想避免在本地安装完整的 W3C 服务器。

我看到的最接近的是http://www.totalvalidator.com/,使用它需要编写临时文件和解析报告。

在我走这条路之前,我想看看是否有人知道另一种方法。最好是我可以调用的 DotNet 程序集。

c

【问题讨论】:

标签: c# html selenium


【解决方案1】:

最好的 HTML5 验证器 nu checker 是 Java 语言,很难与 .NET 交互。但是libtidy 可以写入 C++ dll 以从托管代码中调用。他们发布的示例程序对我来说做得很好,稍微适应了一下。

LibTidy.h:

public ref class LibTidy
{
public:
    System::String^ __clrcall Test(System::String^ input);
};

LibTidy.cpp:

System::String^ __clrcall LibTidy::Test(System::String^ input)
{
    CStringW cstring(input);
  
    const size_t newsizew = (cstring.GetLength() + 1) * 2;
    char* nstringw = new char[newsizew];
    size_t convertedCharsw = 0;
    wcstombs_s(&convertedCharsw, nstringw, newsizew, cstring, _TRUNCATE);

        TidyBuffer errbuf = { 0 };
        int rc = -1;
        Bool ok;

        TidyDoc tdoc = tidyCreate();                     // Initialize "document"
                
        ok = tidyOptSetBool(tdoc, TidyShowInfo, no);
        ok = tidyOptSetBool(tdoc, TidyQuiet, yes);
        ok = tidyOptSetBool(tdoc, TidyEmacs, yes);
        if (ok)
            rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics
        if (rc >= 0)
            rc = tidyParseString(tdoc, nstringw);           // Parse the input
        if (rc >= 0)
            rc = tidyCleanAndRepair(tdoc);               // Tidy it up!
        if (rc >= 0)
            rc = tidyRunDiagnostics(tdoc);               // Kvetch
        char* outputBytes = (char*)errbuf.bp;
    
        if (errbuf.allocator != NULL) tidyBufFree(&errbuf);
        tidyRelease(tdoc);

        return gcnew System::String(outputBytes);
    }

【讨论】:

    【解决方案2】:

    在这个问题上花了整个周末后,我能看到的唯一解决方案是一个名为 CSE HTML Validator 的商业库

    它位于这里http://www.htmlvalidator.com/htmldownload.html

    我为它写了一个简单的包装器。这是代码

    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.Diagnostics;
    using System.IO;
    using System.Linq;
    
    [assembly: CLSCompliant(true)]
    namespace HtmlValidator
    {
    
    public class Validator
    {
        #region Constructors...
    
        public Validator(string htmlToValidate)
        {
            HtmlToValidate = htmlToValidate;
            HasExecuted = false;
            Errors = new List<ValidationResult>();
            Warnings = new List<ValidationResult>();
            OtherMessages = new List<ValidationResult>();
    
        }
    
        #endregion
    
    
    
        #region Properties...
        public IList<ValidationResult> Errors { get; private set; }
        public bool HasExecuted { get; private set; }
        public string HtmlToValidate { get; private set; }
        public IList<ValidationResult> OtherMessages { get; private set; }
        public string ResultsString { get; private set; }
        public string TempFilePath { get; private set; }
        public IList<ValidationResult> Warnings { get; private set; }
        #endregion
    
    
    
        #region Public methods...
        public void ValidateHtmlFile()
        {
    
            WriteTempFile();
    
            ExecuteValidator();
    
            DeleteTempFile();
    
            ParseResults();
    
            HasExecuted = true;
        }
    
        #endregion
    
    
    
        #region Private methods...
        private void DeleteTempFile()
        {
            TempFilePath = Path.GetTempFileName();
            File.Delete(TempFilePath);
        }
    
    
        private void ExecuteValidator()
        {
            var psi = new ProcessStartInfo(GetHTMLValidatorPath())
            {
                RedirectStandardInput = false,
                RedirectStandardOutput = true,
                RedirectStandardError = false,
                UseShellExecute = false,
                Arguments = String.Format(@"-e,(stdout),0,16 ""{0}""", TempFilePath)
            };
    
            var p = new Process
            {
                StartInfo = psi
            };
            p.Start();
    
            var stdOut = p.StandardOutput;
    
            ResultsString = stdOut.ReadToEnd();
        }
    
    
        private static string GetHTMLValidatorPath()
        {
            return @"C:\Program Files (x86)\HTMLValidator120\cmdlineprocessor.exe";
        }
    
    
        private void ParseResults()
        {
            var results = JsonConvert.DeserializeObject<dynamic>(ResultsString);
            IList<InternalValidationResult> messages = results.messages.ToObject<List<InternalValidationResult>>();
    
    
            foreach (InternalValidationResult internalValidationResult in messages)
            {
                ValidationResult result = new ValidationResult()
                {
                    Message = internalValidationResult.message,
                    LineNumber = internalValidationResult.linenumber,
                    MessageCategory = internalValidationResult.messagecategory,
                    MessageType = internalValidationResult.messagetype,
                    CharLocation = internalValidationResult.charlocation
                };
    
                switch (internalValidationResult.messagetype)
                {
                    case "ERROR":
                        Errors.Add(result);
                        break;
    
                    case "WARNING":
                        Warnings.Add(result);
                        break;
    
                    default:
                        OtherMessages.Add(result);
                        break;
                }
            }
        }
    
    
        private void WriteTempFile()
        {
            TempFilePath = Path.GetTempFileName();
            StreamWriter streamWriter = File.AppendText(TempFilePath);
            streamWriter.WriteLine(HtmlToValidate);
            streamWriter.Flush();
            streamWriter.Close();
        }
        #endregion
    }
    }
    
    
    
    
    public class ValidationResult
    {
        public string MessageType { get; set; }
        public string MessageCategory { get; set; }
        public string Message { get; set; }
        public int LineNumber { get; set; }
        public int CharLocation { get; set; }
    
    
        public override string ToString()
        {
            return String.Format("{0} Line {1} Char {2}:: {3}", this.MessageType, this.LineNumber, this.CharLocation, this.Message);
    
        }
    
    }
    
    
    public class InternalValidationResult
    {
        /*
         * DA: this class is used as in intermediate store of messages that come back from the underlying validator. The fields must be cased as per the underlying Json object.
         * That is why they are ignored.
         */
        #region Properties...
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "charlocation"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "charlocation")]
        public int charlocation { get; set; }
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "linenumber"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "linenumber")]
    
        public int linenumber { get; set; }
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "message"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "message")]
    
        public string message { get; set; }
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagecategory"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagecategory")]
        public string messagecategory { get; set; }
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagetype"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagetype")]
    
        public string messagetype { get; set; }
        #endregion
    }
    

    使用/测试

       private const string ValidHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body></html>";
        private const string BrokenHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body>";
    
        [TestMethod]
        public void CanValidHtmlStringReturnNoErrors()
        {
            Validator subject = new Validator(ValidHtml);
            subject.ValidateHtmlFile();
            Assert.IsTrue(subject.HasExecuted);
            Assert.IsTrue(subject.Errors.Count == 0);
        }
    
    
        [TestMethod]
        public void CanInvalidHtmlStringReturnErrors()
        {
            Validator subject = new Validator(BrokenHtml);
            subject.ValidateHtmlFile();
            Assert.IsTrue(subject.HasExecuted);
            Assert.IsTrue(subject.Errors.Count > 0);
            Assert.IsTrue(subject.Errors[0].ToString().Contains("ERROR"));
        }
    

    【讨论】:

      【解决方案3】:

      看起来这个链接可能有你想要的:Automated W3C Validation

      您可以在接受的答案中下载标记验证器并将您的 HTML 传递给它。抱歉,它们不是 .NET 程序集:/,但如果你真的想的话,你可以将它包装在 DLL 中。

      此外,关于这个问题的答案之一表明 W3C 服务实际上公开了一个 RESTful API,但可以返回一个 SOAP 响应:How might I use the W3C Markup Validator API in my .NET application?

      【讨论】:

      • 感谢您的回复。第一个链接中的二进制工具都不能处理 HTML5。 W3C 服务似乎不允许您将 SOAP 响应与发送要验证的任意内容结合起来(即,您只能给它们提供指向要检查的站点的链接)。这个htmlvalidator.com/htmlval/developer.html 似乎是迄今为止最好的解决方案。它是命令行驱动的,可以处理 HTML5。
      猜你喜欢
      • 2014-06-12
      • 2012-03-15
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2023-03-19
      • 2017-12-13
      • 2021-08-01
      • 2012-07-20
      相关资源
      最近更新 更多