Aspose.HTML - 解析HTML

Aspose.HTML是一款商业授权的HTML解析包,可以实现HTML的解析、渲染、转换格式等功能。

Aspose.HTML - 解析HTML

1 example

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
using System;
using Aspose.Html;

namespace HtmlTest
{
class HtmlTest
{
static void Main(string[] args)
{
// Create HtmlDocument instance to load existing HTML file
HTMLDocument htmlPage = new HTMLDocument(@"C:\Dev\Test\AsposeTest\HtmlTest\Sources\page1.html");

// Concatenate to get the raw string of the page
// Important: use htmlDocument.DocumentElement.OuterHTML to get htmlPage's HtmlContents(outerHtml)
string rawPage = htmlPage.Doctype.ToString() + htmlPage.DocumentElement.OuterHTML;
Console.WriteLine(rawPage);

// Print Title and inner HTML of file to console
Console.WriteLine(htmlPage.Title);
Console.WriteLine(htmlPage.Body.InnerHTML);

// Get DOM node and set new text in it
htmlPage.GetElementById("p1").TextContent = "modified paragraph1";

// Get a collection of specific nodes and traverse
foreach(var para in htmlPage.Body.GetElementsByTagName("p"))
{
Console.WriteLine(para.OuterHTML);
}

// Save to output the processed html
htmlPage.Save("out.html");
}
}
}

2 references

Product Page: Aspose.HTML - aspose.com

Documentation: Aspose.HTML Documentation - aspose.com