抓取网站信息源码.docx

上传人:b****8 文档编号:9370017 上传时间:2023-02-04 格式:DOCX 页数:15 大小:17.18KB
下载 相关 举报
抓取网站信息源码.docx_第1页
第1页 / 共15页
抓取网站信息源码.docx_第2页
第2页 / 共15页
抓取网站信息源码.docx_第3页
第3页 / 共15页
抓取网站信息源码.docx_第4页
第4页 / 共15页
抓取网站信息源码.docx_第5页
第5页 / 共15页
点击查看更多>>
下载资源
资源描述

抓取网站信息源码.docx

《抓取网站信息源码.docx》由会员分享,可在线阅读,更多相关《抓取网站信息源码.docx(15页珍藏版)》请在冰豆网上搜索。

抓取网站信息源码.docx

抓取网站信息源码

用HttpURLConnection抓取登陆网站信息的源码:

供参考。

一共2个类。

一个测试类

packagedev.smart.craw.util;

importjava.io.BufferedReader;

importjava.io.BufferedWriter;

importjava.io.IOException;

importjava.io.InputStream;

importjava.io.InputStreamReader;

importjava.io.OutputStream;

importjava.io.OutputStreamWriter;

import.HttpURLConnection;

import.ProtocolException;

import.URL;

importjava.util.List;

importjava.util.Map;

/**

*

*@authorroyoky

*

*/

publicclassCrawWeb{

/**

*获得一个HttpURLConnection对象

*

*@paramcookie

*@paramreferer

*上一个引用页面

*@paramurl

*请求的URL地址

*@parammethod

*方法类型(POSTGET)

*@return

*@throwsIOException

*@throwsProtocolException

*/

publicHttpURLConnectiongetHttpURLConnection(HeaderEntiyentity)

throwsIOException,ProtocolException{

URLcookieUrl=newURL(entity.getUrl());

HttpURLConnectioncookiecon=(HttpURLConnection)cookieUrl

.openConnection();

cookiecon.setInstanceFollowRedirects(false);

if(entity.getHost()!

=null&&!

"".equals(entity.getHost()))

cookiecon.setRequestProperty("Host",entity.getHost());

if(entity.getUser_Agent()!

=null

&&!

"".equals(entity.getUser_Agent()))

cookiecon.setRequestProperty("User-Agent",entity.getUser_Agent());

if(entity.getAccept()!

=null&&!

"".equals(entity.getAccept()))

cookiecon.setRequestProperty("Accept",entity.getAccept());

if(entity.getAccept_Language()!

=null

&&!

"".equals(entity.getAccept_Language()))

cookiecon.setRequestProperty("Accept-Language",entity

.getAccept_Language());

if(entity.getAccept_Encoding()!

=null

&&!

"".equals(entity.getAccept_Encoding()))

cookiecon.setRequestProperty("Accept-Encoding",entity

.getAccept_Encoding());

if(entity.getContent_Type()!

=null

&&!

"".equals(entity.getContent_Type()))

cookiecon.setRequestProperty("Content-Type",entity

.getContent_Type());

if(entity.getContent_Length()!

=null

&&!

"".equals(entity.getContent_Length()))

cookiecon.setRequestProperty("Content-Length",entity

.getContent_Length());

//map.put("Accept-Charset","x-gbk,utf-8;q=0.7,*;q=0.7");

if(entity.getAccept_Charset()!

=null

&&!

"".equals(entity.getAccept_Charset()))

cookiecon.setRequestProperty("Accept-Charset",entity

.getAccept_Charset());

if(entity.getConnection()!

=null

&&!

"".equals(entity.getConnection()))

cookiecon.setRequestProperty("Connection",entity.getConnection());

if(entity.getCache_Control()!

=null

&&!

"".equals(entity.getCache_Control()))

cookiecon.setRequestProperty("Cache-Control",entity

.getCache_Control());

if(entity.getReferer()!

=null&&!

"".equals(entity.getReferer())){

cookiecon.setRequestProperty("Referer",entity.getReferer());

}

if(entity.getCookie()!

=null&&!

"".equals(entity.getCookie())){

cookiecon.setRequestProperty("Cookie",entity.getCookie());

}

cookiecon.setRequestMethod(entity.getFormWay());

cookiecon.setConnectTimeout(90000);

cookiecon.setReadTimeout(120000);

cookiecon.setDoOutput(true);

cookiecon.setDoInput(true);

cookiecon.setUseCaches(false);

returncookiecon;

}

/**

*POST做参数传递

*

*@paramcon

*@paramparam

*@throwsIOException

*/

publicvoidsendParameter(HttpURLConnectioncon,HeaderEntiyentity)

throwsIOException{

con.connect();

OutputStreamout=con.getOutputStream();

BufferedWriterbw=newBufferedWriter(newOutputStreamWriter(out));

if(entity.getParamPost()!

=null)

bw.write(entity.getParamPost());

bw.flush();

bw.close();

out.close();

}

/**

*获得请求返回的文本信息

*

*@paramcon

*@paramencode

*@return

*@throwsException

*/

publicStringgetText(HttpURLConnectioncon,Stringencode)

throwsException{

//Stringhost="";

//String

//url="

//String

//parm="startDate="+startDate+"&endDate="+endDate+"&payType=all&orderStatus=&hostType=D&tripType=all&isGroup=all&userid=DZC001&orgID=TUAIR&officeCode=SZX348&iataNo=08017074&grp=SZXYYB&CanViewOrhersRight=true&downtype=b2b_pay&fileType=csv";

returnthis.doString(con,con.getInputStream(),encode);

}

publicStringgetCookie(HttpURLConnectioncon){

Listcookie=null;

Stringsession="";

Map>map=con.getHeaderFields();

if(map!

=null){

cookie=map.get("Set-Cookie");

}

if(cookie!

=null){

for(Strings:

cookie){

session+=s.replace("path=/","");

}

}

returnsession;

}

/**

*得到请求返回的状态200表示请求成功

*

*@paramcon

*@return

*/

publicintgetResponseState(HttpURLConnectioncon){

try{

if(con!

=null)

returncon.getResponseCode();

}catch(IOExceptione){

//TODOAuto-generatedcatchblock

e.printStackTrace();

}

return0;

}

privateStringdoString(HttpURLConnectioncon,InputStreamin,Stringencode)

throwsException{

Stringtemp=null;

String__viewstate="";

if(encode==null||"".equals(encode)){

encode="UTF-8";

}

BufferedReaderbr=newBufferedReader(

newInputStreamReader(in,encode));

while((temp=br.readLine())!

=null){

//System.out.println(temp);

__viewstate+=temp+"\r\n";

}

con.disconnect();

br.close();

in.close();

return__viewstate;

}

}

packagedev.smart.craw.util;

publicclassHeaderEntiy{

//map.put("Accept","image/gif,image/jpeg,image/pjpeg,image/pjpeg,application/x-shockwave-flash,application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint,*/*");

//map.put("Accept-Language","zh-cn");

//map.put("Accept-Encoding","GBK");

//map.put("User-Agent","Mozilla/4.0(compatible;MSIE8.0;WindowsNT5.1;Trident/4.0;.NETCLR2.0.50727)");

//map.put("Host","www.cococ.cc");

//map.put("Connection","Keep-Alive");

//map.put("Content-Type","application/x-www-form-urlencoded");

//map.put("Cache-Control","no-cache");

//map.put("Accept-Charset","x-gbk,utf-8;q=0.7,*;q=0.7");

//map.put("Referer","http:

//www.cococ.cc/member/?

act=login&jumpUrl=http:

//www.cococ.cc/");

privateStringHost;

privateStringUser_Agent;

privateStringAccept;

privateStringAccept_Language;

privateStringAccept_Encoding;

privateStringContent_Type;

privateStringContent_Length;

privateStringAccept_Charset;

privateStringConnection;

privateStringCache_Control;

privateStringReferer;

privateStringCookie;

privateStringUrl;

privateStringFormWay="GET";

privateStringParamPost;

publicStringgetAccept(){

returnAccept;

}

publicvoidsetAccept(Stringaccept){

Accept=accept;

}

publicStringgetAccept_Charset(){

returnAccept_Charset;

}

publicvoidsetAccept_Charset(Stringaccept_Charset){

Accept_Charset=accept_Charset;

}

publicStringgetAccept_Encoding(){

returnAccept_Encoding;

}

publicvoidsetAccept_Encoding(Stringaccept_Encoding){

Accept_Encoding=accept_Encoding;

}

publicStringgetAccept_Language(){

returnAccept_Language;

}

publicvoidsetAccept_Language(Stringaccept_Language){

Accept_Language=accept_Language;

}

publicStringgetCache_Control(){

returnCache_Control;

}

publicvoidsetCache_Control(Stringcache_Control){

Cache_Control=cache_Control;

}

publicStringgetConnection(){

returnConnection;

}

publicvoidsetConnection(Stringconnection){

Connection=connection;

}

publicStringgetContent_Length(){

returnContent_Length;

}

publicvoidsetContent_Length(Stringcontent_Length){

Content_Length=content_Length;

}

publicStringgetContent_Type(){

returnContent_Type;

}

publicvoidsetContent_Type(Stringcontent_Type){

Content_Type=content_Type;

}

publicStringgetCookie(){

returnCookie;

}

publicvoidsetCookie(Stringcookie){

Cookie=cookie;

}

publicStringgetHost(){

returnHost;

}

publicvoidsetHost(Stringhost){

Host=host;

}

publicStringgetReferer(){

returnReferer;

}

publicvoidsetReferer(Stringreferer){

Referer=referer;

}

publicStringgetUser_Agent(){

returnUser_Agent;

}

publicvoidsetUser_Agent(Stringuser_Agent){

User_Agent=user_Agent;

}

publicStringgetParamPost(){

returnParamPost;

}

publicvoidsetParamPost(StringparamPost){

ParamPost=paramPost;

}

publicStringgetUrl(){

returnUrl;

}

publicvoidsetUrl(Stringurl){

Url=url;

}

publicStringgetFormWay(){

returnFormWay;

}

publicvoidsetFormWay(StringformWay){

FormWay=formWay;

}

}

packagedev.smart.craw.util;

import.HttpURLConnection;

publicclassTestextendsCrawWeb{

publicStringgetInfo()throwsException{

HeaderEntiyvo=newHeaderEntiy();

vo.setAccept("image/gif,image/jpeg,image/pjpeg,image/pjpeg,application/x-shockwave-flash,application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint,*/*");

vo.setAccept_Charset("x-gbk,utf-8;q=0.7,*;q=0.7");

vo.setAccept_Encoding("GBK");

vo.setAccept_Language("zh-cn");

vo.setCache_Control("no-cache");

vo.setConnection("Keep-Alive");

vo.setContent_Length("");

vo.setContent_Type("application/x-www-form-urlencoded");

vo.setCookie("");

vo.setHost("www.cococ.cc");

vo.setReferer("http:

//www.cococ.cc/member/?

act=login&jumpUrl=http:

//www.cococ.cc/");

vo.setUser_Agent("Mozilla/4.0(compatible;MSIE8.0;WindowsNT5.1;Trident/4.0;.NETCLR2.0.50727)");

vo.setUrl("http:

//www.cococ.cc/member/?

act=login");

vo.setParamPost("username=royoks&password=123456&+%B5%C7+%C2%BD+=++++&handler=login&jumpurl=http%3A%2F%2Fwww.cococ.cc%2FPOST/member/?

act=login");

vo.setFormWay("POST");

HttpURLConnectioncon=super.getHttpURLConnection(vo

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 解决方案 > 学习计划

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1