且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

一个Java实现的有道云笔记中包含的图片文件的批量下载工具

更新时间:2022-09-11 18:16:16

package youdao;

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

class ImageDownloader implements Runnable {
    private String mUrl = null;
    private int index = -1;
    private String parent = null;

    public ImageDownloader(String parent, String url, int index){
        this.mUrl = url;
        this.index = index;
        this.parent = parent;
    }
    
    @Override
    public void run() {
        URL url = null;
        
        try {
            url = new URL(this.mUrl);
            DataInputStream dataInputStream = new DataInputStream(url.openStream());

            String imageName =  parent +
                    "/clipboard" + ++this.index + ".png";

            FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
            ByteArrayOutputStream output = new ByteArrayOutputStream();

            byte[] buffer = new byte[1024];
            int length;

            while ((length = dataInputStream.read(buffer)) > 0) {
                output.write(buffer, 0, length);
            }

            fileOutputStream.write(output.toByteArray());
            dataInputStream.close();
            fileOutputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

public class YoudaoNoteBatch {

    private static String SHAREID = "a9d31900bb8d0b89ce7b15fa30d1f0d3";
    private static String FOLDERURL = "https://note.youdao.com/yws/public/notebook/";
    private static String NOTEURL = "https://note.youdao.com/yws/public/note/";
    private static String RESULTFOLDER = "C:/Users/i042416/Pictures/wiki/";
        
    private static void createFolder(String title){
        File file = new File(title);
        file.mkdir();
    }
    
    private static List<String> getImageURLArray(JSONObject note){
        String[] uniqueIDs = note.getString("p").split("/");
        String uniqueID = uniqueIDs[2];
        String noteurl = NOTEURL + SHAREID + "/" + uniqueID;
        HttpClient client = HttpClients.createDefault();
        
        HttpGet get = new HttpGet(noteurl);
        String result = null;
        List<String> imageList = new ArrayList<String>();
            try {
                HttpResponse response = client.execute(get);
                HttpEntity entity = response.getEntity();
                result = EntityUtils.toString(entity, "UTF-8");
                JSONObject noteDetail = (JSONObject)JSON.parse(result);
                String content = noteDetail.getString("content");
                String img = null;
                Pattern p_image;
                Matcher m_image;
                String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
                p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
                m_image = p_image.matcher(content);
                while (m_image.find()) {
                    img = m_image.group();
                    Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
                    while (m.find()) {
                        String image = m.group(1);
                        //System.out.println("image to be downloaded: " + image);
                        
                        // Jerry: why this can happen??
                        if( image.contains("https://wiki.jerry/wiki/")){
                            imageList.clear();
                            System.out.println("content: " + content);
                            return imageList;
                        }
                        imageList.add(m.group(1));
                    }
                }
            }
            catch(Exception e){
                e.printStackTrace();
                System.exit(-1);
            }
            return imageList;
    }
    
    private static void start(List<String> images, String parent, String title){
        System.out.println("note image size : " + images.size() + " note: " + title );
        ExecutorService executor = Executors.newFixedThreadPool(10);
        
        for( int i = 0; i < images.size(); i++){
            ImageDownloader cc = new ImageDownloader(parent, images.get(i), i);
            executor.execute(cc);           
        }
        
        executor.shutdown();
        while (!executor.isTerminated()) {
        }
        
        System.out.println("download finished for note: " + parent);
    }
    
    private static void download(JSONObject note){
        List<String> images = getImageURLArray(note);
        if( images.isEmpty()){
            System.err.println("note: " + note.getString("tl") + " image corrupted. Please retry.");
            return;
        }
        String name = note.getString("tl");
        String remove_note = name.substring(0, name.length() - 5);
        String parent = RESULTFOLDER + remove_note;
        
        createFolder(parent);
        start(images, parent, remove_note);
    }
    private static void getPictureList(){
        String entry = FOLDERURL + SHAREID;
        HttpClient client = HttpClients.createDefault();
        
        HttpGet get = new HttpGet(entry);
        String result = null;
            try {
                HttpResponse response = client.execute(get);
                HttpEntity entity = response.getEntity();
                result = EntityUtils.toString(entity, "UTF-8");
                JSONArray aResult = (JSONArray) JSON.parse(result);
                int totalCount = (int) aResult.get(0);
                JSONArray aNoteList = (JSONArray)aResult.get(2);
                System.out.println("Total note: " + totalCount);
                for( int i = 0; i < totalCount; i++){
                    JSONObject note = (JSONObject) aNoteList.get(i);
                    download(note);
                }
            }
            catch(Exception e){
                e.printStackTrace();
                System.exit(-1);
            }
    }
    public static void main(String[] args) {
        getPictureList();
        System.out.println("*********************** DONE !!! *************************");
    }
}