通過c#來實現對于目標的抓取工作,然後直接讀取在TXT文檔中,這樣的操作在工作中可以節省很多的時間方法,想嘗試用代碼的方式解決這些問題嗎?一起來學習試試看吧。
public class Program
{
private static string baseUrl = System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase;
private static void Main(string[] args)
{
string baseReUrl = "https://www.biduoxs.com/";
var client = new RestClient("https://www.biduoxs.com/biquge/17_17005/");
var request = new RestRequest();
Task<RestResponse> response = client.ExecuteAsync(request);
//Console.WriteLine(response.Result.Content);
var html = response.Result.Content;
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(html);
var list = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"list\"]/dl/dd/a");
foreach (var item in list)
{
Console.WriteLine(item.InnerText);
//Console.WriteLine(item.Attributes["href"].Value);
System.IO.File.AppendAllText(baseUrl + "xs.txt", item.InnerText + "\n\r");
client = new RestClient(baseReUrl + item.Attributes["href"].Value);
response = client.ExecuteAsync(request);
var html2 = response.Result.Content;
HtmlAgilityPack.HtmlDocument htmlDoc2 = new HtmlAgilityPack.HtmlDocument();
htmlDoc2.LoadHtml(html2);
var content = htmlDoc2.GetElementbyId("content").InnerHtml.Replace("<br>", "\n\r").Replace(" ", " ");
System.IO.File.AppendAllText(baseUrl + "xs.txt", content);
}
}
}
免責聲明:文中圖文均來自網絡,如有侵權請聯系刪除。
歐普軟件發布此文僅爲傳遞信息,不代表歐普軟件認同其觀點或證實其描述。