Java获取网址HTML页面文本内容



Java获取网址HTML页面文本内容。
[java] view plain copy
public static String getWebHtml(String domain) {
// http://www.baidu.com
StringBuffer sb = new StringBuffer();
InputStream is = null;
InputStreamReader isr = null;
BufferedReader in = null;
try {
java.net.URL url = new java.net.URL(domain);
is = url.openStream();
isr = new InputStreamReader(is,”utf-8″);
in = new BufferedReader(isr);
String line;
while ((line = in.readLine()) != null) {
sb.append(line).append(“\n”);
// System.out.println(line);
}
in.close();

} catch (IOException e) {
e.printStackTrace();
}finally {
try {
if(in!=null){
in.close();
in=null;
}
if(isr!=null){
isr.close();
isr=null;
}
if(is!=null){
is.close();
is=null;
}
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}