java正則表達(dá)式解析html示例分享
package work;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
public class chuanboyi {
public static void main(String[] args){
// TODO Auto-generated method stub
StringBuffer html = new StringBuffer();
HttpClient httpclient = new HttpClient();
//創(chuàng)建GET方法實(shí)例
GetMethod getMethod = new GetMethod("//chabaoo.cn");
//使用系統(tǒng)提供的默認(rèn)恢復(fù)策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
try{
//執(zhí)行GET方法
int statusCode = httpclient.executeMethod(getMethod);
if(statusCode != HttpStatus.SC_OK){
System.out.println("Method is wrong " + getMethod.getStatusLine());
}
InputStream responseBody = getMethod.getResponseBodyAsStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(responseBody,"utf-8"));
String line = reader.readLine();
while(line != null){
html.append(line).append("\n");
line = reader.readLine();
}
reader.close();
//正則表達(dá)式
String regex = "<form name=\"compareForm\"[\\s\\S]+>[\\s\\S]+</form>.*<script.*>";
String regexa ="(?<=<li>)[\\s\\S]+?(?=</li>)";
Pattern pattern = Pattern.compile(regex);
Matcher m = pattern.matcher(html);
StringBuffer str = new StringBuffer();
int i = 0;
while(m.find()){
str.append(m.group());
}
pattern = Pattern.compile(regexa);
m = pattern.matcher(str);
while(m.find()){
attrs(m.group());
i++;
}
System.out.println("共有"+i+"條數(shù)據(jù)!");
}catch (HttpException e) {
// TODO: handle exception
System.out.println("Please check your provided http address!");
e.printStackTrace();
}catch (IOException e) {
// TODO: handle exception
System.out.println("the line is wrong!");
e.printStackTrace();
}finally{
getMethod.releaseConnection();//釋放鏈接
}
}
public static void attrs(String str){
//獲取url的正則表達(dá)式
String regexURL = "[a-z]+-[0-9]+\\.html";
//獲取Name的正則表達(dá)式
String regexName = "(?<=title=\")[[\\w-\\s][^x00-xff]]+(?=\")";
//獲取圖片的正則表達(dá)式
String regexPicture = "images.*\\.jpg";
Pattern patternURL = Pattern.compile(regexURL);
Pattern patternName = Pattern.compile(regexName);
Pattern patternPicture = Pattern.compile(regexPicture);
Matcher mURL = patternURL.matcher(str);
Matcher mName = patternName.matcher(str);
Matcher mPicture = patternPicture.matcher(str);
if(mName.find()){
System.out.println("名字:"+mName.group());
}
if(mURL.find()){
System.out.println("鏈接:"+mURL.group());
}
if(mPicture.find()){
System.out.println("圖片:"+mPicture.group());
}
}
}
相關(guān)文章
Java中常見(jiàn)延時(shí)隊(duì)列的實(shí)現(xiàn)方案小結(jié)(建議收藏)
延時(shí)隊(duì)列它要具有隊(duì)列的特性,再給它附加一個(gè)延遲消費(fèi)隊(duì)列消息的功能,也就是說(shuō)可以指定隊(duì)列中的消息在哪個(gè)時(shí)間點(diǎn)被消費(fèi),這篇文章主要介紹了Java中常見(jiàn)延時(shí)隊(duì)列的實(shí)現(xiàn)方案總結(jié),需要的朋友可以參考下2024-04-04IDEA下從零開(kāi)始搭建SpringBoot工程的方法步驟
這篇文章主要介紹了IDEA下從零開(kāi)始搭建SpringBoot工程的方法步驟,小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,也給大家做個(gè)參考。一起跟隨小編過(guò)來(lái)看看吧2019-01-01使用feign調(diào)用接口時(shí)調(diào)不到get方法的問(wèn)題及解決
這篇文章主要介紹了使用feign調(diào)用接口時(shí)調(diào)不到get方法的問(wèn)題及解決,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。如有錯(cuò)誤或未考慮完全的地方,望不吝賜教2022-03-03springboot aop切到service層,不生效問(wèn)題
這篇文章主要介紹了springboot aop切到service層,不生效問(wèn)題,具有很好的參考價(jià)值,希望對(duì)大家有所幫助,如有錯(cuò)誤或未考慮完全的地方,望不吝賜教2024-05-05

Hibernate Validator實(shí)現(xiàn)更簡(jiǎn)潔的參數(shù)校驗(yàn)及一個(gè)util

springboot在filter中如何用threadlocal存放用戶身份信息