c从网页提取数据.docx
《c从网页提取数据.docx》由会员分享,可在线阅读,更多相关《c从网页提取数据.docx(12页珍藏版)》请在冰豆网上搜索。
c从网页提取数据
下面的函数作用,将DataTable导出到EXCEL文件:
privatevoidDataTabletoExcel(System.Data.DataTabletmpDataTable,stringstrFileName)
{
if(tmpDataTable==null)
return;
}
introwNum=tmpDataTable.Rows.Count;
intcolumnNum=tmpDataTable.Columns.Count;
introwIndex=1;
intcolumnIndex=0;
Excel.ApplicationxlApp=newExcel.ApplicationClass();
xlApp.DefaultFilePath="";
xlApp.DisplayAlerts=true;
xlApp.SheetsInNewWorkbook=1;
Excel.WorkbookxlBook=xlApp.Workbooks.Add(true);
//将DataTable的列名导入Excel表第一行
foreach(DataColumndcintmpDataTable.Columns)
columnIndex++;
xlApp.Cells[rowIndex,columnIndex]=dc.ColumnName;
//将DataTable中的数据导入Excel中
for(inti=0;i{rowIndex++;columnIndex=0;for(intj=0;j{columnIndex++;xlApp.Cells[rowIndex,columnIndex]=tmpDataTable.Rows[i][j].ToString();}}xlBook.SaveCopyAs(strFileName+".xls");} C#codeusingSystem;usingSystem.Text.RegularExpressions;usingSystem.IO;usingSystem.Text;usingSystem.Net;usingSystem.IO.Compression;usingSystem.Web;usingSystem.Collections;namespace控制台测试{classProgram{staticvoidMain(string[]args){byte[]buffer=getBytes("null,null);stringhtml=Encoding.UTF8.GetString(buffer);MatchCollectionmc=Regex.Matches(html,@"{""BonusNumberString"":""(?[^|]*)\|\d\|\d"",""BonusTime"":""(?[^""]*)"",""IssueNumber"":""(?[^""]*)""}");foreach(Matchminmc){Console.WriteLine(m.Groups["qi"]+""+m.Groups["num"].Value.Replace(",","")+""+m.Groups["kai"]);}Console.WriteLine("程序运行结束,按任意键关闭窗口!");Console.ReadKey();}//读取网络资源,返回字节数组privatestaticbyte[]getBytes(stringurl,CookieContainercookie,byte[]postData){intc=url.IndexOf("/",10);byte[]data=null;HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(url);request.AllowAutoRedirect=true;if(cookie!=null)request.CookieContainer=cookie;request.Referer=(c>0?url.Substring(0,c):url);request.UserAgent="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)";request.Headers[HttpRequestHeader.AcceptEncoding]="gzip,deflate";if(postData!=null)//需要Post数据{request.Method="POST";request.ContentType="application/x-www-form-urlencoded";request.ContentLength=postData.Length;StreamrequestStream=request.GetRequestStream();requestStream.Write(postData,0,postData.Length);requestStream.Close();}HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();stringce=response.Headers[HttpResponseHeader.ContentEncoding];intContentLength=(int)response.ContentLength;Streams=response.GetResponseStream();c=1024*10;if(ContentLength<0)//不能获取数据的长度{data=newbyte[c];MemoryStreamms=newMemoryStream();intl=s.Read(data,0,c);while(l>0){ms.Write(data,0,l);l=s.Read(data,0,c);}data=ms.ToArray();ms.Close();}else//数据长度已知{data=newbyte[ContentLength];intpos=0;while(ContentLength>0){intl=s.Read(data,pos,ContentLength);pos+=l;ContentLength-=l;}}s.Close();response.Close();if(ce=="gzip")//若数据是压缩格式,则要进行解压{MemoryStreamjs=newMemoryStream();//解压后的流MemoryStreamms=newMemoryStream(data);//用于解压的流GZipStreamg=newGZipStream(ms,CompressionMode.Decompress);byte[]buffer=newbyte[c];//读数据缓冲区intl=g.Read(buffer,0,c);//一次读10Kwhile(l>0){js.Write(buffer,0,l);l=g.Read(buffer,0,c);}g.Close();ms.Close();data=js.ToArray();js.Close();}returndata;//返回字节数组}}}运行结果:20100625-083907232010-06-2519:5020100625-082613712010-06-2519:4020100625-081309382010-06-2519:3020100625-080138602010-06-2519:2020100625-079432382010-06-2519:1020100625-078681452010-06-2519:0020100625-077203032010-06-2518:5020100625-076533652010-06-2518:4020100625-075803612010-06-2518:3020100625-074761212010-06-2518:2020100625-073508112010-06-2518:1020100625-072360942010-06-2518:0020100625-071913812010-06-2517:5020100625-070065152010-06-2517:4020100625-069689052010-06-2517:3020100625-068622752010-06-2517:2020100625-067437852010-06-2517:1020100625-066435082010-06-2517:0020100625-065823072010-06-2516:5020100625-064630792010-06-2516:4020100625-063171782010-06-2516:3020100625-062296232010-06-2516:2020100625-061153362010-06-2516:1020100625-060253502010-06-2516:0020100625-059478022010-06-2515:5020100625-058022212010-06-2515:4020100625-057578882010-06-2515:3020100625-056410192010-06-2515:2020100625-055005002010-06-2515:1020100625-054095622010-06-2515:0020100625-053003372010-06-2514:5020100625-052694352010-06-2514:4020100625-051453142010-06-2514:3020100625-050450872010-06-2514:2020100625-049937432010-06-2514:1020100625-048562802010-06-2514:0020100625-047112072010-06-2513:5020100625-046556422010-06-2513:4020100625-045320702010-06-2513:3020100625-044676242010-06-2513:2020100625-043742272010-06-2513:1020100625-042508082010-06-2513:0020100625-041636572010-06-2512:5020100625-040019642010-06-2512:4020100625-039873152010-06-2512:3020100625-038867832010-06-2512:2020100625-037552632010-06-2512:1020100625-036595782010-06-2512:0020100625-035183962010-06-2511:5020100625-034678262010-06-2511:4020100625-033064462010-06-2511:3020100625-032439162010-06-2511:2020100625-031429442010-06-2511:1020100625-030631492010-06-2511:0020100625-029047562010-06-2510:5020100625-028565872010-06-2510:4020100625-027221102010-06-2510:3020100625-026522932010-06-2510:2020100625-025060402010-06-2510:1020100625-024979922010-06-2510:0020100625-023049152010-06-2501:5520100625-022812202010-06-2501:5020100625-021336832010-06-2501:4520100625-020411642010-06-2501:4020100625-019980972010-06-2501:3520100625-018056272010-06-2501:3020100625-017126362010-06-2501:2520100625-016046672010-06-2501:2020100625-015887182010-06-2501:1520100625-014590382010-06-2501:1020100625-013669442010-06-2501:0520100625-012076632010-06-2501:0020100625-011181992010-06-2500:5520100625-010379632010-06-2500:5020100625-009168432010-06-2500:4520100625-008310372010-06-2500:4020100625-007693502010-06-2500:3520100625-006597642010-06-2500:3020100625-005793752010-06-2500:2520100625-004414392010-06-2500:2020100625-003801382010-06-2500:1520100625-002427172010-06-2500:1020100625-001835542010-06-2500:0520100624-120612802010-06-2500:0020100624-119007472010-06-2423:5520100624-118614352010-06-2423:5020100624-117918162010-06-2423:4520100624-116294732010-06-2423:4020100624-115283882010-06-2423:3520100624-114890152010-06-2423:3020100624-113565862010-06-2423:2520100624-112920122010-06-2423:2020100624-111043332010-06-2423:1520100624-110613662010-06-2423:1020100624-109302312010-06-2423:0520100624-108325482010-06-2423:0020100624-107315222010-06-2422:5520100624-106347672010-06-2422:5020100624-105870772010-06-2422:4520100624-104965932010-06-2422:4020100624-103073992010-06-2422:3520100624-102617732010-06-2422:3020100624-101046222010-06-2422:2520100624-100753522010-06-2422:2020100624-099303352010-06-2422:1520100624-098394592010-06-2422:1020100624-097281542010-06-2422:0520100624-096200112010-06-2422:0020100624-095327782010-06-2421:5020100624-094382442010-06-2421:4020100624-093755902010-06-2421:3020100624-092813222010-06-2421:2020100624-091685412010-06-2421:1020100624-090818552010-06-2421:0020100624-089883212010-06-2420:5020100624-088952302010-06-2420:4020100624-087793022010-06-2420:3020100624-086086242010-06-2420:2020100624-085870482010-06-2420:1020100624-084827102010-06-2420:00程序运行结束,按任意键关闭窗口! 抓取Web网页数据分析(c#)通过程序自动的读取其它网站网页显示的信息,类似于爬虫程序。比方说我们有一个系统,要提取BaiDu网站上歌曲搜索排名。分析系统在根据得到的数据进行数据分析。为业务提供参考数据。 为了完成以上的需求,我们就需要模拟浏览器浏览网页,得到页面的数据在进行分析,最后把分析的结构,即整理好的数据写入数据库。那么我们的思路就是: 1、发送HttpRequest请求。 2、接收HttpResponse返回的结果。得到特定页面的html源文件。 3、取出包含数据的那一部分源码。 4、根据html源码生成HtmlDocument,循环取出数据。 5、写入数据库。 程序如下: //根据Url地址得到网页的html源码 privatestringGetWebContent(stringUrl){stringstrResult="";try{HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(Url); //声明一个HttpWebRequest请求 request.Timeout=30000;//设置连接超时时间 request.Headers.Set("Pragma","no-cache");HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();StreamstreamReceive=response.GetResponseStream();Encodingencoding=Encoding.GetEncoding("GB2312");StreamReaderstreamReader=newStreamReader(streamReceive,encoding);strResult=streamReader.ReadToEnd();}catch{MessageBox.Show("出错");}returnstrResult;}为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用 usingSystem.Net;以下是程序具体实现过程:privatevoidbutton1_Click(objectsender,EventArgse){//要抓取的URL地址 stringUrl="//得到指定Url的源码 stringstr
rowIndex++;
columnIndex=0;
for(intj=0;j{columnIndex++;xlApp.Cells[rowIndex,columnIndex]=tmpDataTable.Rows[i][j].ToString();}}xlBook.SaveCopyAs(strFileName+".xls");} C#codeusingSystem;usingSystem.Text.RegularExpressions;usingSystem.IO;usingSystem.Text;usingSystem.Net;usingSystem.IO.Compression;usingSystem.Web;usingSystem.Collections;namespace控制台测试{classProgram{staticvoidMain(string[]args){byte[]buffer=getBytes("null,null);stringhtml=Encoding.UTF8.GetString(buffer);MatchCollectionmc=Regex.Matches(html,@"{""BonusNumberString"":""(?[^|]*)\|\d\|\d"",""BonusTime"":""(?[^""]*)"",""IssueNumber"":""(?[^""]*)""}");foreach(Matchminmc){Console.WriteLine(m.Groups["qi"]+""+m.Groups["num"].Value.Replace(",","")+""+m.Groups["kai"]);}Console.WriteLine("程序运行结束,按任意键关闭窗口!");Console.ReadKey();}//读取网络资源,返回字节数组privatestaticbyte[]getBytes(stringurl,CookieContainercookie,byte[]postData){intc=url.IndexOf("/",10);byte[]data=null;HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(url);request.AllowAutoRedirect=true;if(cookie!=null)request.CookieContainer=cookie;request.Referer=(c>0?url.Substring(0,c):url);request.UserAgent="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)";request.Headers[HttpRequestHeader.AcceptEncoding]="gzip,deflate";if(postData!=null)//需要Post数据{request.Method="POST";request.ContentType="application/x-www-form-urlencoded";request.ContentLength=postData.Length;StreamrequestStream=request.GetRequestStream();requestStream.Write(postData,0,postData.Length);requestStream.Close();}HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();stringce=response.Headers[HttpResponseHeader.ContentEncoding];intContentLength=(int)response.ContentLength;Streams=response.GetResponseStream();c=1024*10;if(ContentLength<0)//不能获取数据的长度{data=newbyte[c];MemoryStreamms=newMemoryStream();intl=s.Read(data,0,c);while(l>0){ms.Write(data,0,l);l=s.Read(data,0,c);}data=ms.ToArray();ms.Close();}else//数据长度已知{data=newbyte[ContentLength];intpos=0;while(ContentLength>0){intl=s.Read(data,pos,ContentLength);pos+=l;ContentLength-=l;}}s.Close();response.Close();if(ce=="gzip")//若数据是压缩格式,则要进行解压{MemoryStreamjs=newMemoryStream();//解压后的流MemoryStreamms=newMemoryStream(data);//用于解压的流GZipStreamg=newGZipStream(ms,CompressionMode.Decompress);byte[]buffer=newbyte[c];//读数据缓冲区intl=g.Read(buffer,0,c);//一次读10Kwhile(l>0){js.Write(buffer,0,l);l=g.Read(buffer,0,c);}g.Close();ms.Close();data=js.ToArray();js.Close();}returndata;//返回字节数组}}}运行结果:20100625-083907232010-06-2519:5020100625-082613712010-06-2519:4020100625-081309382010-06-2519:3020100625-080138602010-06-2519:2020100625-079432382010-06-2519:1020100625-078681452010-06-2519:0020100625-077203032010-06-2518:5020100625-076533652010-06-2518:4020100625-075803612010-06-2518:3020100625-074761212010-06-2518:2020100625-073508112010-06-2518:1020100625-072360942010-06-2518:0020100625-071913812010-06-2517:5020100625-070065152010-06-2517:4020100625-069689052010-06-2517:3020100625-068622752010-06-2517:2020100625-067437852010-06-2517:1020100625-066435082010-06-2517:0020100625-065823072010-06-2516:5020100625-064630792010-06-2516:4020100625-063171782010-06-2516:3020100625-062296232010-06-2516:2020100625-061153362010-06-2516:1020100625-060253502010-06-2516:0020100625-059478022010-06-2515:5020100625-058022212010-06-2515:4020100625-057578882010-06-2515:3020100625-056410192010-06-2515:2020100625-055005002010-06-2515:1020100625-054095622010-06-2515:0020100625-053003372010-06-2514:5020100625-052694352010-06-2514:4020100625-051453142010-06-2514:3020100625-050450872010-06-2514:2020100625-049937432010-06-2514:1020100625-048562802010-06-2514:0020100625-047112072010-06-2513:5020100625-046556422010-06-2513:4020100625-045320702010-06-2513:3020100625-044676242010-06-2513:2020100625-043742272010-06-2513:1020100625-042508082010-06-2513:0020100625-041636572010-06-2512:5020100625-040019642010-06-2512:4020100625-039873152010-06-2512:3020100625-038867832010-06-2512:2020100625-037552632010-06-2512:1020100625-036595782010-06-2512:0020100625-035183962010-06-2511:5020100625-034678262010-06-2511:4020100625-033064462010-06-2511:3020100625-032439162010-06-2511:2020100625-031429442010-06-2511:1020100625-030631492010-06-2511:0020100625-029047562010-06-2510:5020100625-028565872010-06-2510:4020100625-027221102010-06-2510:3020100625-026522932010-06-2510:2020100625-025060402010-06-2510:1020100625-024979922010-06-2510:0020100625-023049152010-06-2501:5520100625-022812202010-06-2501:5020100625-021336832010-06-2501:4520100625-020411642010-06-2501:4020100625-019980972010-06-2501:3520100625-018056272010-06-2501:3020100625-017126362010-06-2501:2520100625-016046672010-06-2501:2020100625-015887182010-06-2501:1520100625-014590382010-06-2501:1020100625-013669442010-06-2501:0520100625-012076632010-06-2501:0020100625-011181992010-06-2500:5520100625-010379632010-06-2500:5020100625-009168432010-06-2500:4520100625-008310372010-06-2500:4020100625-007693502010-06-2500:3520100625-006597642010-06-2500:3020100625-005793752010-06-2500:2520100625-004414392010-06-2500:2020100625-003801382010-06-2500:1520100625-002427172010-06-2500:1020100625-001835542010-06-2500:0520100624-120612802010-06-2500:0020100624-119007472010-06-2423:5520100624-118614352010-06-2423:5020100624-117918162010-06-2423:4520100624-116294732010-06-2423:4020100624-115283882010-06-2423:3520100624-114890152010-06-2423:3020100624-113565862010-06-2423:2520100624-112920122010-06-2423:2020100624-111043332010-06-2423:1520100624-110613662010-06-2423:1020100624-109302312010-06-2423:0520100624-108325482010-06-2423:0020100624-107315222010-06-2422:5520100624-106347672010-06-2422:5020100624-105870772010-06-2422:4520100624-104965932010-06-2422:4020100624-103073992010-06-2422:3520100624-102617732010-06-2422:3020100624-101046222010-06-2422:2520100624-100753522010-06-2422:2020100624-099303352010-06-2422:1520100624-098394592010-06-2422:1020100624-097281542010-06-2422:0520100624-096200112010-06-2422:0020100624-095327782010-06-2421:5020100624-094382442010-06-2421:4020100624-093755902010-06-2421:3020100624-092813222010-06-2421:2020100624-091685412010-06-2421:1020100624-090818552010-06-2421:0020100624-089883212010-06-2420:5020100624-088952302010-06-2420:4020100624-087793022010-06-2420:3020100624-086086242010-06-2420:2020100624-085870482010-06-2420:1020100624-084827102010-06-2420:00程序运行结束,按任意键关闭窗口! 抓取Web网页数据分析(c#)通过程序自动的读取其它网站网页显示的信息,类似于爬虫程序。比方说我们有一个系统,要提取BaiDu网站上歌曲搜索排名。分析系统在根据得到的数据进行数据分析。为业务提供参考数据。 为了完成以上的需求,我们就需要模拟浏览器浏览网页,得到页面的数据在进行分析,最后把分析的结构,即整理好的数据写入数据库。那么我们的思路就是: 1、发送HttpRequest请求。 2、接收HttpResponse返回的结果。得到特定页面的html源文件。 3、取出包含数据的那一部分源码。 4、根据html源码生成HtmlDocument,循环取出数据。 5、写入数据库。 程序如下: //根据Url地址得到网页的html源码 privatestringGetWebContent(stringUrl){stringstrResult="";try{HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(Url); //声明一个HttpWebRequest请求 request.Timeout=30000;//设置连接超时时间 request.Headers.Set("Pragma","no-cache");HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();StreamstreamReceive=response.GetResponseStream();Encodingencoding=Encoding.GetEncoding("GB2312");StreamReaderstreamReader=newStreamReader(streamReceive,encoding);strResult=streamReader.ReadToEnd();}catch{MessageBox.Show("出错");}returnstrResult;}为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用 usingSystem.Net;以下是程序具体实现过程:privatevoidbutton1_Click(objectsender,EventArgse){//要抓取的URL地址 stringUrl="//得到指定Url的源码 stringstr
xlApp.Cells[rowIndex,columnIndex]=tmpDataTable.Rows[i][j].ToString();
xlBook.SaveCopyAs(strFileName+".xls");
C#code
usingSystem;
usingSystem.Text.RegularExpressions;
usingSystem.IO;
usingSystem.Text;
usingSystem.Net;
usingSystem.IO.Compression;
usingSystem.Web;
usingSystem.Collections;
namespace控制台测试
classProgram
staticvoidMain(string[]args)
byte[]buffer=getBytes("null,null);
stringhtml=Encoding.UTF8.GetString(buffer);
MatchCollectionmc=Regex.Matches(html,@"{""BonusNumberString"":
""(?
[^|]*)\|\d\|\d"",""BonusTime"":
[^""]*)"",""IssueNumber"":
[^""]*)""}");
foreach(Matchminmc)
Console.WriteLine(m.Groups["qi"]+""+m.Groups["num"].Value.Replace(",","")+""+m.Groups["kai"]);
Console.WriteLine("程序运行结束,按任意键关闭窗口!
");
Console.ReadKey();
//读取网络资源,返回字节数组
privatestaticbyte[]getBytes(stringurl,CookieContainercookie,byte[]postData)
intc=url.IndexOf("/",10);
byte[]data=null;
HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(url);
request.AllowAutoRedirect=true;
if(cookie!
=null)request.CookieContainer=cookie;
request.Referer=(c>0?
url.Substring(0,c):
url);
request.UserAgent="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)";
request.Headers[HttpRequestHeader.AcceptEncoding]="gzip,deflate";
if(postData!
=null)//需要Post数据
request.Method="POST";
request.ContentType="application/x-www-form-urlencoded";
request.ContentLength=postData.Length;
StreamrequestStream=request.GetRequestStream();
requestStream.Write(postData,0,postData.Length);
requestStream.Close();
HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();
stringce=response.Headers[HttpResponseHeader.ContentEncoding];
intContentLength=(int)response.ContentLength;
Streams=response.GetResponseStream();
c=1024*10;
if(ContentLength<0)//不能获取数据的长度
data=newbyte[c];
MemoryStreamms=newMemoryStream();
intl=s.Read(data,0,c);
while(l>0)
ms.Write(data,0,l);
l=s.Read(data,0,c);
data=ms.ToArray();
ms.Close();
else//数据长度已知
data=newbyte[ContentLength];
intpos=0;
while(ContentLength>0)
intl=s.Read(data,pos,ContentLength);
pos+=l;
ContentLength-=l;
s.Close();
response.Close();
if(ce=="gzip")//若数据是压缩格式,则要进行解压
MemoryStreamjs=newMemoryStream();//解压后的流
MemoryStreamms=newMemoryStream(data);//用于解压的流
GZipStreamg=newGZipStream(ms,CompressionMode.Decompress);
byte[]buffer=newbyte[c];//读数据缓冲区
intl=g.Read(buffer,0,c);//一次读10K
js.Write(buffer,0,l);
l=g.Read(buffer,0,c);
g.Close();
data=js.ToArray();
js.Close();
returndata;//返回字节数组
运行结果:
20100625-083907232010-06-2519:
50
20100625-082613712010-06-2519:
40
20100625-081309382010-06-2519:
30
20100625-080138602010-06-2519:
20
20100625-079432382010-06-2519:
10
20100625-078681452010-06-2519:
00
20100625-077203032010-06-2518:
20100625-076533652010-06-2518:
20100625-075803612010-06-2518:
20100625-074761212010-06-2518:
20100625-073508112010-06-2518:
20100625-072360942010-06-2518:
20100625-071913812010-06-2517:
20100625-070065152010-06-2517:
20100625-069689052010-06-2517:
20100625-068622752010-06-2517:
20100625-067437852010-06-2517:
20100625-066435082010-06-2517:
20100625-065823072010-06-2516:
20100625-064630792010-06-2516:
20100625-063171782010-06-2516:
20100625-062296232010-06-2516:
20100625-061153362010-06-2516:
20100625-060253502010-06-2516:
20100625-059478022010-06-2515:
20100625-058022212010-06-2515:
20100625-057578882010-06-2515:
20100625-056410192010-06-2515:
20100625-055005002010-06-2515:
20100625-054095622010-06-2515:
20100625-053003372010-06-2514:
20100625-052694352010-06-2514:
20100625-051453142010-06-2514:
20100625-050450872010-06-2514:
20100625-049937432010-06-2514:
20100625-048562802010-06-2514:
20100625-047112072010-06-2513:
20100625-046556422010-06-2513:
20100625-045320702010-06-2513:
20100625-044676242010-06-2513:
20100625-043742272010-06-2513:
20100625-042508082010-06-2513:
20100625-041636572010-06-2512:
20100625-040019642010-06-2512:
20100625-039873152010-06-2512:
20100625-038867832010-06-2512:
20100625-037552632010-06-2512:
20100625-036595782010-06-2512:
20100625-035183962010-06-2511:
20100625-034678262010-06-2511:
20100625-033064462010-06-2511:
20100625-032439162010-06-2511:
20100625-031429442010-06-2511:
20100625-030631492010-06-2511:
20100625-029047562010-06-2510:
20100625-028565872010-06-2510:
20100625-027221102010-06-2510:
20100625-026522932010-06-2510:
20100625-025060402010-06-2510:
20100625-024979922010-06-2510:
20100625-023049152010-06-2501:
55
20100625-022812202010-06-2501:
20100625-021336832010-06-2501:
45
20100625-020411642010-06-2501:
20100625-019980972010-06-2501:
35
20100625-018056272010-06-2501:
20100625-017126362010-06-2501:
25
20100625-016046672010-06-2501:
20100625-015887182010-06-2501:
15
20100625-014590382010-06-2501:
20100625-013669442010-06-2501:
05
20100625-012076632010-06-2501:
20100625-011181992010-06-2500:
20100625-010379632010-06-2500:
20100625-009168432010-06-2500:
20100625-008310372010-06-2500:
20100625-007693502010-06-2500:
20100625-006597642010-06-2500:
20100625-005793752010-06-2500:
20100625-004414392010-06-2500:
20100625-003801382010-06-2500:
20100625-002427172010-06-2500:
20100625-001835542010-06-2500:
20100624-120612802010-06-2500:
20100624-119007472010-06-2423:
20100624-118614352010-06-2423:
20100624-117918162010-06-2423:
20100624-116294732010-06-2423:
20100624-115283882010-06-2423:
20100624-114890152010-06-2423:
20100624-113565862010-06-2423:
20100624-112920122010-06-2423:
20100624-111043332010-06-2423:
20100624-110613662010-06-2423:
20100624-109302312010-06-2423:
20100624-108325482010-06-2423:
20100624-107315222010-06-2422:
20100624-106347672010-06-2422:
20100624-105870772010-06-2422:
20100624-104965932010-06-2422:
20100624-103073992010-06-2422:
20100624-102617732010-06-2422:
20100624-101046222010-06-2422:
20100624-100753522010-06-2422:
20100624-099303352010-06-2422:
20100624-098394592010-06-2422:
20100624-097281542010-06-2422:
20100624-096200112010-06-2422:
20100624-095327782010-06-2421:
20100624-094382442010-06-2421:
20100624-093755902010-06-2421:
20100624-092813222010-06-2421:
20100624-091685412010-06-2421:
20100624-090818552010-06-2421:
20100624-089883212010-06-2420:
20100624-088952302010-06-2420:
20100624-087793022010-06-2420:
20100624-086086242010-06-2420:
20100624-085870482010-06-2420:
20100624-084827102010-06-2420:
程序运行结束,按任意键关闭窗口!
抓取Web网页数据分析(c#)
通过程序自动的读取其它网站网页显示的信息,类似于爬虫程序。
比方说我们有一个系统,要提取BaiDu网站上歌曲搜索排名。
分析系统在根据得到的数据进行数据分析。
为业务提供参考数据。
为了完成以上的需求,我们就需要模拟浏览器浏览网页,得到页面的数据在进行分析,最后把分析的结构,即整理好的数据写入数据库。
那么我们的思路就是:
1、发送HttpRequest请求。
2、接收HttpResponse返回的结果。
得到特定页面的html源文件。
3、取出包含数据的那一部分源码。
4、根据html源码生成HtmlDocument,循环取出数据。
5、写入数据库。
程序如下:
//根据Url地址得到网页的html源码
privatestringGetWebContent(stringUrl)
stringstrResult="";
try
HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(Url);
//声明一个HttpWebRequest请求
request.Timeout=30000;
//设置连接超时时间
request.Headers.Set("Pragma","no-cache");
StreamstreamReceive=response.GetResponseStream();
Encodingencoding=Encoding.GetEncoding("GB2312");
StreamReaderstreamReader=newStreamReader(streamReceive,encoding);
strResult=streamReader.ReadToEnd();
catch
MessageBox.Show("出错");
returnstrResult;
为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用
以下是程序具体实现过程:
privatevoidbutton1_Click(objectsender,EventArgse)
//要抓取的URL地址
stringUrl="
//得到指定Url的源码
stringstr
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1