主要的工作就是如何通过Java抓取***的Baidu好听的歌曲,Java抓取的工作主要包括3个属性:歌名、歌曲在线播放地址和歌词内容(符合LRC歌词格式),目前完成歌曲和歌曲地址抓取,由于百度的歌曲地址很多通过js获取,所以歌曲地址获取我这里使用搜狗音乐搜索方便些,所有的源码如下:

创新互联专注于企业成都全网营销推广、网站重做改版、大连网站定制设计、自适应品牌网站建设、H5响应式网站、商城网站建设、集团公司官网建设、成都外贸网站建设公司、高端网站制作、响应式网页设计等建站业务,价格优惠性价比高,为大连等各大城市提供网站开发制作服务。
- /** *//**
 - http://www.bt285.cn http://www.5a520.cn
 - */
 - package com.common.utils;
 - import Java.io.BufferedReader;
 - import java.io.ByteArrayOutputStream;
 - import java.io.IOException;
 - import java.io.InputStream;
 - import java.io.InputStreamReader;
 - import java.io.OutputStreamWriter;
 - import java.io.UnsupportedEncodingException;
 - import java.net.HttpURLConnection;
 - import java.net.MalformedURLException;
 - import java.net.URL;
 - import java.net.URLConnection;
 - import java.net.URLDecoder;
 - import java.net.URLEncoder;
 - import java.util.ArrayList;
 - import java.util.HashSet;
 - import java.util.List;
 - import java.util.Set;
 - import java.util.TreeSet;
 - import java.util.regex.Matcher;
 - import java.util.regex.Pattern;
 - import org.htmlparser.Node;
 - import org.htmlparser.NodeFilter;
 - import org.htmlparser.Parser;
 - import org.htmlparser.filters.NodeClassFilter;
 - import org.htmlparser.filters.OrFilter;
 - import org.htmlparser.nodes.TextNode;
 - import org.htmlparser.tags.LinkTag;
 - import org.htmlparser.util.NodeList;
 - import org.htmlparser.util.ParserException;
 - import com.common.doc.FileOperUtils;
 - class Song{
 - private String name;
 - private String url;
 - private String lrc;
 - public Song(String name,String url){
 - this.name = name;
 - this.url = url;
 - this.lrc = "";
 - }
 - public String getName() {
 - return name;
 - }
 - public void setName(String name) {
 - this.name = name;
 - }
 - public String getUrl() {
 - return url;
 - }
 - public void setUrl(String url) {
 - this.url = url;
 - }
 - public String getLrc() {
 - return lrc;
 - }
 - public void setLrc(String lrc) {
 - this.lrc = lrc;
 - }
 - }
 - public class BaiduMP3 {
 - public static String visitURL(String strUrl) {
 - URL url = null;
 - try {
 - url = new URL(strUrl);
 - } catch (MalformedURLException e) {
 - e.printStackTrace();
 - }
 - URLConnection conn = null;
 - try {
 - conn = url.openConnection();
 - conn.setDoOutput(true);
 - } catch (IOException e) {
 - System.out.println("e:"+e.getMessage());
 - }
 - OutputStreamWriter out;
 - try {
 - out = new OutputStreamWriter(conn.getOutputStream(), "GBK");
 - out.flush();
 - out.close();
 - } catch (UnsupportedEncodingException e2) {
 - e2.printStackTrace();
 - } catch (IOException e2) {
 - e2.printStackTrace();
 - }
 - // 接收返回信息
 - BufferedReader rd = null;
 - try {
 - rd = new BufferedReader(
 - new InputStreamReader(conn.getInputStream()));
 - return rd.readLine();
 - } catch (IOException e1) {
 - e1.printStackTrace();
 - }
 - return "";
 - }
 - /** *//**
 - * 功能说明:访问指定的URL并检查返回结果。
 - * @param strUrl
 - * @param successFlag 请求成功的标识,比如包含“_SUCCESS”字。
 - * @return
 - */
 - public static String visitURL(String strUrl, String successFlag) {
 - boolean rs = false;
 - HttpURLConnection jconn = null;
 - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
 - try {
 - URL url = new URL(strUrl);
 - jconn = (HttpURLConnection) url.openConnection();
 - jconn.setDoOutput(true);
 - jconn.setDoInput(true);
 - jconn.connect();
 - InputStream in = jconn.getInputStream();
 - byte[] buf = new byte[4096];
 - int bytesRead;
 - while ((bytesRead = in.read(buf)) != -1) {
 - byteArrayOutputStream.write(buf, 0, bytesRead);
 - }
 - String strRead = new String(byteArrayOutputStream.toByteArray(),"GBK");
 - return strRead;
 - } catch (MalformedURLException e) {
 - e.printStackTrace();
 - } catch (IOException e) {
 - e.printStackTrace();
 - } finally {
 - jconn.disconnect();
 - try {
 - byteArrayOutputStream.close();
 - } catch (IOException e) {
 - e.printStackTrace();
 - }
 - }
 - return "";
 - }
 - private static boolean isTrimEmptyOrBlank(String astr) {
 - if ((null == astr) || (astr.length() == 0) || " ".equals(astr)) {
 - return true;
 - }
 - astrastr = astr.trim();
 - if ((null == astr) || (astr.length() == 0)) {
 - return true;
 - }
 - return false;
 - }
 - private static String getFilteredContent(String htmlContent, String reg,int i) {
 - String content = "";
 - int k=1;
 - Pattern pp = Pattern.compile(reg, Pattern.DOTALL);
 - Matcher m = pp.matcher(htmlContent);
 - while (m.find()) {
 - content = m.group();
 - if(k++==i)
 - break;
 - }
 - return content;
 - }
 - public static List getBaiduSongs(){
 - List ss = new ArrayList();
 - String htmlContent = visitURL("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2","s");
 - String encode = "GBK";
 - // System.out.println("===========================================================================");
 - // System.out.println(htmlContent);
 - // System.out.println("===========================================================================");
 - String reg = "(.*?)";
 - htmlContent = getFilteredContent(htmlContent,reg,0);
 - //FileOperUtils.writeFile("c:\\1.html", htmlContent, false);
 - String line = "",lineurl="";
 - Node anode = null;
 - TextNode textnode = null;
 - try {
 - Parser parser = Parser.createParser(htmlContent, encode);
 - NodeClassFilter textFilter = new NodeClassFilter(LinkTag.class);
 - OrFilter lastFilter = new OrFilter();
 - lastFilter.setPredicates(new NodeFilter[] { textFilter });
 - NodeList nodeList = parser.parse(lastFilter);
 - Node[] nodes = nodeList.toNodeArray();
 - for (int i = 0; i < nodes.length; i++) {
 - anode = (Node) nodes[i];
 - if(anode instanceof LinkTag){
 - LinkTag txt = (LinkTag)anode;
 - line = txt.getLinkText();
 - if(txt.getPreviousSibling()!=null){
 - if(txt.getPreviousSibling().toString().indexOf("(")>=0)
 - continue;
 - }
 - line = txt.getLinkText();
 - lineurl = txt.getAttribute("href");
 - //System.out.println(txt.getLink());
 - }
 - if (isTrimEmptyOrBlank(line)||isTrimEmptyOrBlank(lineurl))
 - continue;
 - ss.add(new Song(line,getSongURL(line)));
 - }
 - } catch (ParserException pe) {
 - pe.printStackTrace();
 - }
 - return ss;
 - }
 - private static String getSongURL(String songname){
 - try {
 - String ss = URLEncoder.encode(songname,"GBK");
 - String htmlContent = visitURL("http://so.mp3.qihoo.com/?type=0&ssrc=s&kw="+ss,"s");
 - String encode = "GBK";
 - http://www.feng123.com
 - String reg = "(.*?)"; http://www.5a520.cn
 - htmlContent = getFilteredContent(htmlContent,reg,1);
 - String line = "",lineurl="";
 - Node anode = null;
 - TextNode textnode = null;
 - Parser parser = Parser.createParser(htmlContent, encode);
 - NodeClassFilter textFilter = new NodeClassFilter(LinkTag.class);
 - OrFilter lastFilter = new OrFilter();
 - lastFilter.setPredicates(new NodeFilter[] { textFilter });
 - NodeList nodeList = parser.parse(lastFilter);
 - Node[] nodes = nodeList.toNodeArray();
 - for (int i = 0; i < nodes.length; i++) {
 - anode = (Node) nodes[i];
 - if(anode instanceof LinkTag){
 - LinkTag txt = (LinkTag)anode;
 - line = txt.getLinkText();
 - lineurl = txt.getAttribute("href");
 - if(!isTrimEmptyOrBlank(lineurl) && lineurl.startsWith("down.html")){
 - String s = getFilteredContent(lineurl,"u=(.*?)\\&",0);
 - if(!s.equals("")&&s.length()>5){
 - s = Utils.replace(s, "u=", "");
 - s = Utils.replace(s, "&", "");
 - s = URLDecoder.decode(s,"GBK");
 - return s;
 - }
 - }
 - }
 - }
 - } catch (Exception pe) {
 - pe.printStackTrace();
 - }
 - return "";
 - }
 - public static void main(String[] args) throws Exception{
 - List ss = getBaiduSongs();
 - int idx = 0;
 - for(Song s:ss){
 - System.out.println((++idx)+":"+s.getName()+"->"+s.getUrl());
 - }
 - // String ss = getSongURL("国家");
 - // System.out.println(ss);
 - // String s = URLDecoder.decode("http%3A%2F%2F http://www.5a520.cn %2F%B9%FA%BC%D2.mp3","GBK");
 - // System.out.println(s);
 - }
 - }
 
至此Java抓取百度Top500歌曲及源码的工作完成。
                标题名称:开发者体验:Java抓取百度Top500歌曲及源码
                
                网页地址:http://www.csdahua.cn/qtweb/news30/92680.html
            
网站建设、网络推广公司-快上网,是专注品牌与效果的网站制作,网络营销seo公司;服务项目有等
声明:本网站发布的内容(图片、视频和文字)以用户投稿、用户转载内容为主,如果涉及侵权请尽快告知,我们将会在第一时间删除。文章观点不代表本网站立场,如需处理请联系客服。电话:028-86922220;邮箱:631063699@qq.com。内容未经允许不得转载,或转载时需注明来源: 快上网