quote: https://www.cnblogs.com/wangchuang/archive/2013/03/11/2953638.html
Before learning HTML Xpath, let's download the Dll file
Download address: http://htmlagilitypack.codeplex.com/
Click the following picture to download
<ignore_js_op>xpath1.jpg
The next step is to quote it in the program,
<ignore_js_op>xpath2.jpg
Then you can call it directly. Let's have a look
Code bar
Common browse copy code
//The htmlDcoument object is used to access Html documents s HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument(); //Load Html document hd.LoadHtml(strhtml); string str = hd.DocumentNode.SelectSingleNode("//*[@id='e_font']").OuterHtml;
In this way, you can get the HTml code of a tag
OuterHtml takes the Html containing itself. If InnerHtml, it takes all the Html codes contained in this tag
We should pay attention to this
If you want to get the Xpath path of Html code, this is the part
//*[@id='e_font']
Copy code
This is actually very simple. Just install a fibug,
Look at the picture
<ignore_js_op>xpath3.jpg
Just enter the selection mode, select the content you want, and then right-click to copy it.
Then put it in the SelectSingleNode () method
Let me talk about the meaning of several methods and attributes
method
SelectNodes gets a collection
SelectSingleNode gets a label
SetAttributeValue sets the attribute value of the tag, for example: SetAttributeValue("name","xpath-89"); This means that the value of the name attribute is changed to xpath-89
attribute
OuterHtml is the Html containing itself
InnerHtml takes all Html codes contained in this tag
Xpath gets the corresponding Xpath value
Attributes gets the value of an attribute, for example: Attributes("name")
You can also add attributes, such as:
Common browse copy code
hd.DocumentNode.SelectSingleNode(item.Key).Attributes.Add("xpathid", "xpath_1" );
Next, I wrote a method to recursively obtain all Xpath values of Html pages. Let's have a look
Common browse copy code
//key (Xpath), value (entire node) public List<ObjXpath> XpathList = new List<ObjXpath>(); public string strhtml = "";//Here is your Html code. For details, please refer to my < a href=\“ http://www.sufeinet.com/thread-3-1-1.Html \" target=\"_ Blank \ "> httphelper < / a > class private int Index = 0; //Start processing Node private void SartNode() { //The htmlDcoument object is used to access Html documents s HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument(); //Load Html document hd.LoadHtml(strhtml); HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes; Index = 0; XpathList.Clear(); foreach (HtmlNode em in htmllist) { Setxpath(em); } } /// <summary> ///Get Html Dom recursively /// </summary> ///< param name = "node" > node to process < / param > private void Setxpath(HtmlNode node) { foreach (HtmlNode item in node.ChildNodes) { if (item.XPath.Contains("#")) { continue; } if (item.ChildNodes.Count > 0) { XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" }); Index++; Setxpath(item); } else { XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" }); Index++; } } } public class ObjXpath { public string id { get; set; } public string Key { get; set; } public string Value { get; set; } }
XpathList is all the obtained Xpath values. If you are interested, you can try it
Let's see the effect first
<ignore_js_op>xpath4.jpg
Well, let's release all the code to you
Common browse copy code
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.Text.RegularExpressions; using System.Threading; using HtmlAgilityPack; using System.IO; using System.Runtime.Serialization.Json; namespace AutoXpathTools { public partial class Form1 : Form { public Form1() { InitializeComponent(); } #region private variables and methods //Delegate passes in a string private delegate void SetListBox(string str); //key (Xpath), value (entire node) List<ObjXpath> XpathList = new List<ObjXpath>(); private int Index = 0; //The htmlDcoument object is used to access Html documents HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument(); #endregion //Analyze all the code of Xpath private void btnGetXpath_Click(object sender, EventArgs e) { try { HttpHelper http = new HttpHelper(); HttpItem item = new HttpItem() { URL = textBox1.Text.Trim(), IsToLower = false, Encoding = "gbk" }; txtXml.Text = http.GetHtml(item); if (!string.IsNullOrWhiteSpace(txtXml.Text) && txtXml.Text.Trim().ToLower() != "error") { //Load Html document hd.LoadHtml(txtXml.Text); Thread pingTask = new Thread(new ThreadStart(delegate { //Code, the code to be executed by the thread SartNode(txtXml.Text); })); pingTask.Start(); } else { txtXml.Text = "According to your ULR: " + textBox1.Text.Trim() + "Can't get anything"; } } catch (Exception ex) { txtXml.Text = ex.Message.Trim(); } } //Start processing Node private void SartNode(string strhtml) { //The htmlDcoument object is used to access Html documents s HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument(); //Load Html document hd.LoadHtml(strhtml); HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes; Index = 0; XpathList.Clear(); foreach (HtmlNode em in htmllist) { Setxpath(em); } } /// <summary> ///Get Html Dom recursively /// </summary> ///< param name = "node" > node to process < / param > private void Setxpath(HtmlNode node) { foreach (HtmlNode item in node.ChildNodes) { if (item.XPath.Contains("#")) { continue; } if (item.ChildNodes.Count > 0) { XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" }); UIContorol(item.XPath); Index++; Setxpath(item); } else { XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" }); UIContorol(item.XPath); Index++; } } } //Assign values to controls using delegates private void UIContorol(string str) { listBox1.Items.Add(str); toolStripStatusLabel1.Text = str; } private void listBox1_SelectedValueChanged(object sender, EventArgs e) { if (listBox1.SelectedItem != null) { txtPath.Text = listBox1.SelectedItem.ToString().Trim(); } } private void button3_Click(object sender, EventArgs e) { txtContents.Text = hd.DocumentNode.SelectSingleNode(txtPath.Text.Trim()).OuterHtml; } private void Form1_Load(object sender, EventArgs e) { //HttpItem item = new HttpItem() //{ // URL = "http://www.diandian.com/login", // Method = "post", // Cookie = "dtid=ZfXUVo1IsplHR4mHW1HYmgKbY4GJa003; kvf=1358855337188; alf=1; dru=1356356040; _l5=y", // ContentType = "application/x-www-form-urlencoded", // Postdata = "account=xinsuilie1998@163.com&password=wjlove520&nextUrl=&lcallback=&persistent=1", // Referer = "http://www.diandian.com/logout?formKey=e4714d863c862a84fafd83d98e5ecb22" //}; //HttpHelper http = new HttpHelper(); //string html = http.GetHtml(item); //string cookie = item.Cookie; //item = new HttpItem() { URL = "http://www.diandian.com/home", Cookie = cookie }; //html = http.GetHtml(item); } } public class ObjXpath { public string id { get; set; } public string Key { get; set; } public string Value { get; set; } } }
That's it. You can download my source code to try
Package download:
<ignore_ js_ op> AutoXpathTools. Zip (76.32 KB, downloads: 0)
If you feel OK, recommend it to me. Thank you