c从网页提取数据.docx

上传人:b****7 文档编号:8996356 上传时间:2023-02-02 格式:DOCX 页数:12 大小:18.72KB
下载 相关 举报
c从网页提取数据.docx_第1页
第1页 / 共12页
c从网页提取数据.docx_第2页
第2页 / 共12页
c从网页提取数据.docx_第3页
第3页 / 共12页
c从网页提取数据.docx_第4页
第4页 / 共12页
c从网页提取数据.docx_第5页
第5页 / 共12页
点击查看更多>>
下载资源
资源描述

c从网页提取数据.docx

《c从网页提取数据.docx》由会员分享,可在线阅读,更多相关《c从网页提取数据.docx(12页珍藏版)》请在冰豆网上搜索。

c从网页提取数据.docx

c从网页提取数据

下面的函数作用,将DataTable导出到EXCEL文件:

privatevoidDataTabletoExcel(System.Data.DataTabletmpDataTable,stringstrFileName)

{

if(tmpDataTable==null)

{

return;

}

introwNum=tmpDataTable.Rows.Count;

intcolumnNum=tmpDataTable.Columns.Count;

introwIndex=1;

intcolumnIndex=0;

Excel.ApplicationxlApp=newExcel.ApplicationClass();

xlApp.DefaultFilePath="";

xlApp.DisplayAlerts=true;

xlApp.SheetsInNewWorkbook=1;

Excel.WorkbookxlBook=xlApp.Workbooks.Add(true);

//将DataTable的列名导入Excel表第一行

foreach(DataColumndcintmpDataTable.Columns)

{

columnIndex++;

xlApp.Cells[rowIndex,columnIndex]=dc.ColumnName;

}

//将DataTable中的数据导入Excel中

for(inti=0;i

{

rowIndex++;

columnIndex=0;

for(intj=0;j

{

columnIndex++;

xlApp.Cells[rowIndex,columnIndex]=tmpDataTable.Rows[i][j].ToString();

}

}

xlBook.SaveCopyAs(strFileName+".xls");

}

 

C#code

usingSystem;

usingSystem.Text.RegularExpressions;

usingSystem.IO;

usingSystem.Text;

usingSystem.Net;

usingSystem.IO.Compression;

usingSystem.Web;

usingSystem.Collections;

namespace控制台测试

{

classProgram

{

staticvoidMain(string[]args)

{

byte[]buffer=getBytes("null,null);

stringhtml=Encoding.UTF8.GetString(buffer);

MatchCollectionmc=Regex.Matches(html,@"{""BonusNumberString"":

""(?

[^|]*)\|\d\|\d"",""BonusTime"":

""(?

[^""]*)"",""IssueNumber"":

""(?

[^""]*)""}");

foreach(Matchminmc)

{

Console.WriteLine(m.Groups["qi"]+""+m.Groups["num"].Value.Replace(",","")+""+m.Groups["kai"]);

}

Console.WriteLine("程序运行结束,按任意键关闭窗口!

");

Console.ReadKey();

}

//读取网络资源,返回字节数组

privatestaticbyte[]getBytes(stringurl,CookieContainercookie,byte[]postData)

{

intc=url.IndexOf("/",10);

byte[]data=null;

HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(url);

request.AllowAutoRedirect=true;

if(cookie!

=null)request.CookieContainer=cookie;

request.Referer=(c>0?

url.Substring(0,c):

url);

request.UserAgent="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)";

request.Headers[HttpRequestHeader.AcceptEncoding]="gzip,deflate";

if(postData!

=null)//需要Post数据

{

request.Method="POST";

request.ContentType="application/x-www-form-urlencoded";

request.ContentLength=postData.Length;

StreamrequestStream=request.GetRequestStream();

requestStream.Write(postData,0,postData.Length);

requestStream.Close();

}

HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();

stringce=response.Headers[HttpResponseHeader.ContentEncoding];

intContentLength=(int)response.ContentLength;

Streams=response.GetResponseStream();

c=1024*10;

if(ContentLength<0)//不能获取数据的长度

{

data=newbyte[c];

MemoryStreamms=newMemoryStream();

intl=s.Read(data,0,c);

while(l>0)

{

ms.Write(data,0,l);

l=s.Read(data,0,c);

}

data=ms.ToArray();

ms.Close();

}

else//数据长度已知

{

data=newbyte[ContentLength];

intpos=0;

while(ContentLength>0)

{

intl=s.Read(data,pos,ContentLength);

pos+=l;

ContentLength-=l;

}

}

s.Close();

response.Close();

if(ce=="gzip")//若数据是压缩格式,则要进行解压

{

MemoryStreamjs=newMemoryStream();//解压后的流

MemoryStreamms=newMemoryStream(data);//用于解压的流

GZipStreamg=newGZipStream(ms,CompressionMode.Decompress);

byte[]buffer=newbyte[c];//读数据缓冲区

intl=g.Read(buffer,0,c);//一次读10K

while(l>0)

{

js.Write(buffer,0,l);

l=g.Read(buffer,0,c);

}

g.Close();

ms.Close();

data=js.ToArray();

js.Close();

}

returndata;//返回字节数组

}

}

}

运行结果:

20100625-083907232010-06-2519:

50

20100625-082613712010-06-2519:

40

20100625-081309382010-06-2519:

30

20100625-080138602010-06-2519:

20

20100625-079432382010-06-2519:

10

20100625-078681452010-06-2519:

00

20100625-077203032010-06-2518:

50

20100625-076533652010-06-2518:

40

20100625-075803612010-06-2518:

30

20100625-074761212010-06-2518:

20

20100625-073508112010-06-2518:

10

20100625-072360942010-06-2518:

00

20100625-071913812010-06-2517:

50

20100625-070065152010-06-2517:

40

20100625-069689052010-06-2517:

30

20100625-068622752010-06-2517:

20

20100625-067437852010-06-2517:

10

20100625-066435082010-06-2517:

00

20100625-065823072010-06-2516:

50

20100625-064630792010-06-2516:

40

20100625-063171782010-06-2516:

30

20100625-062296232010-06-2516:

20

20100625-061153362010-06-2516:

10

20100625-060253502010-06-2516:

00

20100625-059478022010-06-2515:

50

20100625-058022212010-06-2515:

40

20100625-057578882010-06-2515:

30

20100625-056410192010-06-2515:

20

20100625-055005002010-06-2515:

10

20100625-054095622010-06-2515:

00

20100625-053003372010-06-2514:

50

20100625-052694352010-06-2514:

40

20100625-051453142010-06-2514:

30

20100625-050450872010-06-2514:

20

20100625-049937432010-06-2514:

10

20100625-048562802010-06-2514:

00

20100625-047112072010-06-2513:

50

20100625-046556422010-06-2513:

40

20100625-045320702010-06-2513:

30

20100625-044676242010-06-2513:

20

20100625-043742272010-06-2513:

10

20100625-042508082010-06-2513:

00

20100625-041636572010-06-2512:

50

20100625-040019642010-06-2512:

40

20100625-039873152010-06-2512:

30

20100625-038867832010-06-2512:

20

20100625-037552632010-06-2512:

10

20100625-036595782010-06-2512:

00

20100625-035183962010-06-2511:

50

20100625-034678262010-06-2511:

40

20100625-033064462010-06-2511:

30

20100625-032439162010-06-2511:

20

20100625-031429442010-06-2511:

10

20100625-030631492010-06-2511:

00

20100625-029047562010-06-2510:

50

20100625-028565872010-06-2510:

40

20100625-027221102010-06-2510:

30

20100625-026522932010-06-2510:

20

20100625-025060402010-06-2510:

10

20100625-024979922010-06-2510:

00

20100625-023049152010-06-2501:

55

20100625-022812202010-06-2501:

50

20100625-021336832010-06-2501:

45

20100625-020411642010-06-2501:

40

20100625-019980972010-06-2501:

35

20100625-018056272010-06-2501:

30

20100625-017126362010-06-2501:

25

20100625-016046672010-06-2501:

20

20100625-015887182010-06-2501:

15

20100625-014590382010-06-2501:

10

20100625-013669442010-06-2501:

05

20100625-012076632010-06-2501:

00

20100625-011181992010-06-2500:

55

20100625-010379632010-06-2500:

50

20100625-009168432010-06-2500:

45

20100625-008310372010-06-2500:

40

20100625-007693502010-06-2500:

35

20100625-006597642010-06-2500:

30

20100625-005793752010-06-2500:

25

20100625-004414392010-06-2500:

20

20100625-003801382010-06-2500:

15

20100625-002427172010-06-2500:

10

20100625-001835542010-06-2500:

05

20100624-120612802010-06-2500:

00

20100624-119007472010-06-2423:

55

20100624-118614352010-06-2423:

50

20100624-117918162010-06-2423:

45

20100624-116294732010-06-2423:

40

20100624-115283882010-06-2423:

35

20100624-114890152010-06-2423:

30

20100624-113565862010-06-2423:

25

20100624-112920122010-06-2423:

20

20100624-111043332010-06-2423:

15

20100624-110613662010-06-2423:

10

20100624-109302312010-06-2423:

05

20100624-108325482010-06-2423:

00

20100624-107315222010-06-2422:

55

20100624-106347672010-06-2422:

50

20100624-105870772010-06-2422:

45

20100624-104965932010-06-2422:

40

20100624-103073992010-06-2422:

35

20100624-102617732010-06-2422:

30

20100624-101046222010-06-2422:

25

20100624-100753522010-06-2422:

20

20100624-099303352010-06-2422:

15

20100624-098394592010-06-2422:

10

20100624-097281542010-06-2422:

05

20100624-096200112010-06-2422:

00

20100624-095327782010-06-2421:

50

20100624-094382442010-06-2421:

40

20100624-093755902010-06-2421:

30

20100624-092813222010-06-2421:

20

20100624-091685412010-06-2421:

10

20100624-090818552010-06-2421:

00

20100624-089883212010-06-2420:

50

20100624-088952302010-06-2420:

40

20100624-087793022010-06-2420:

30

20100624-086086242010-06-2420:

20

20100624-085870482010-06-2420:

10

20100624-084827102010-06-2420:

00

程序运行结束,按任意键关闭窗口!

 

抓取Web网页数据分析(c#)

通过程序自动的读取其它网站网页显示的信息,类似于爬虫程序。

比方说我们有一个系统,要提取BaiDu网站上歌曲搜索排名。

分析系统在根据得到的数据进行数据分析。

为业务提供参考数据。

 为了完成以上的需求,我们就需要模拟浏览器浏览网页,得到页面的数据在进行分析,最后把分析的结构,即整理好的数据写入数据库。

那么我们的思路就是:

  1、发送HttpRequest请求。

  2、接收HttpResponse返回的结果。

得到特定页面的html源文件。

  3、取出包含数据的那一部分源码。

  4、根据html源码生成HtmlDocument,循环取出数据。

  5、写入数据库。

 程序如下:

  

 

 //根据Url地址得到网页的html源码

       privatestringGetWebContent(stringUrl)

{

stringstrResult="";

try

{

HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(Url);

    //声明一个HttpWebRequest请求

               request.Timeout=30000;

//设置连接超时时间

               request.Headers.Set("Pragma","no-cache");

HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();

StreamstreamReceive=response.GetResponseStream();

Encodingencoding=Encoding.GetEncoding("GB2312");

StreamReaderstreamReader=newStreamReader(streamReceive,encoding);

strResult=streamReader.ReadToEnd();

}

catch

{

MessageBox.Show("出错");

}

returnstrResult;

}

为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用

  usingSystem.Net;

以下是程序具体实现过程:

privatevoidbutton1_Click(objectsender,EventArgse)

{

//要抓取的URL地址

           stringUrl="

//得到指定Url的源码

   stringstr

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 高中教育 > 高考

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1