자바 셀레니움으로 웹업무 자동화하기(개선) _(전체화면 캡쳐)

개발개발·2024년 4월 10일

작년에 공익제보단 활동을 편하게 할 목적으로 웹사이트에서 신고 결과화면을 캡쳐하는 프로그램을 만들었었다.

작년에 사용한 웹사이트는 "스마트 국민제보"였고 올해부터는 "안전신문고"사이트로 변경한고 해서 기존 소스를 수정하게 되었다. 작년에 만든 소스를 다시 보니 몇가지 고치고 싶은 것들이 보였다.

기능 및 공통점에 따라 class로 묶여있거나 메소드로 작성되어 있지 않다.
같은 기능을 다른 웹사이트에서 하게 된다.

그래서 인터페이스를 만들고 원하는 크롤링하고자 하는 사이트를 하나의 class로 만들기로 했다.

주요 기능
1. 로그인(인터페이스)
2. 캡쳐할 화면 URL 수집하기(인터페이스)
3. 이미지 저장하기(공통 기능)

부가적인 기능
1. 파일 이름 설정(인터페이스)
-> 신고 상세정보는 캡쳐할 화면에 있기 때문에 URL을 수집할때는 알 수 없다. 그렇기 때문에 새롭게 인터페이스로 만들어서 이미지 저장할때 람다식으로 넘겨준다.

화면 마운트될때까지 대기하는 기능
-> 이미지로 저장할 화면을 window.open으로 실행하고 화면이 마운트 될때까지 기다린다.

아래부터는 기능별로 실제 자바 소스다.

public interface WebPageLogin {
	// driver와 로그인 정보를 넘겨받으면 id,pw를 input에 넣고 로그인 버튼 누르는 것을 구현한다.
    void login(WebDriver driver,LoginInfo loginInfo);
}

public interface WebPageLoading {
    // 조건을 걸어서 페이지를 걸러내서 띄울 화면들만 모아낸다.
    List<String> loading(WebDriver driver);
}

public interface FilenameSetter {
	// 상세 페이지에서 저장할 파일명을 크롤링한다.
	String setFileName(WebDriver webdriver);
}

public interface Waiter {
	// 저장할 화면이 마운트될때까지 대기시간을 준다.
    boolean until(WebDriver driver);
}

아래는 안전신문고 사이트에서 사용하기 위해서 구현한 자바 소스다.

// 1. 로그인
@Override
    public void login(WebDriver driver, LoginInfo loginInfo) {
        // TODO Auto-generated method stub
        // xpath는 web페이지에서 개발자 도구를 통해 확인할 수 있다.
        driver.findElement(By.xpath("//*[@id=\"username\"]")).sendKeys(loginInfo.getId());
		driver.findElement(By.xpath("//*[@id=\"password\"]")).sendKeys(loginInfo.getPw());
		driver.findElement(By.xpath("//*[@id=\"contents\"]/div/ul/li[1]/article/div[1]/p[3]/button")).click();
		MyUtils.sleep(500);
    }

//2. 캡쳐할 화면 URL 수집하기
    @Override
    public List<String> loading(WebDriver driver) {
        // TODO Auto-generated method stub
        List<String> list = new ArrayList<>();
        // 신고 내역 전체 개수 확인
        int count = Integer.parseInt(driver.findElement(By.cssSelector("#contents > div.table_bbs.list.tb_sty01 > p > strong")).getText());        
		
        // 페이징 개수 확인
        int loop = count/10 + (count %10>0?1:0);
        System.out.println(loop);
        
        // 페이지가 변해도 URL이 변하지 않아서 버튼 클릭 방식으로 정했다. !! 버튼을 누른 다음 다시 버튼 element를 받아와야 한다.
        WebElement nextButton = driver.findElement(By.cssSelector("#table1 > tfoot > tr > td > ul > li:nth-last-child(2) > a"));
        for (int i = 0; i < loop; i++) {
            MyUtils.sleep(300);
            nextButton = driver.findElement(By.cssSelector("#table1 > tfoot > tr > td > ul > li:nth-last-child(2) > a"));
            
			WebElement el = driver.findElement(By.cssSelector("#table1Body"));
			// 테이블의 로우 개수 가져오기
			List<WebElement> tr = el.findElements(By.tagName("tr"));
			
            // 주소 가져오기, 여기서 이전 신고내용 및 기간관련 필터링이 들어가야 한다.
			for (WebElement row : tr) {
                count-=1;
				WebElement hiddenInputVal = row.findElement(By.cssSelector(String.format("td > input[type=hidden]")));
                
				String state = row.findElement(By.cssSelector("td.bbs_subject > span")).getAttribute("class");
                // 진행 내역이 "종결"이 아니면 저장하지 않는다.
				if(!"ico_state_end".equals(state)) continue;
				if(hiddenInputVal !=null) {
					list.add(String.format("window.open('https://www.safetyreport.go.kr/#mypage/mysafereport/%s');",hiddenInputVal.getDomAttribute("value")));
				}
			}
            // 아직 항목이 남아있으면 다음 버튼을 누른다.
            if(count > 0){
                nextButton.click();
                MyUtils.sleep(500);
            }
		}

        return list;
    }


//3 이미지 저장하기(공통기능) - ImageMerge.java파일의 일부...
/*
	해당 내용은 속도 및 메모리 사용을 개선하기 위해서 수정중이다.
	3-1 이미지 캡쳐하기
	3-2 캡쳐된 이미지 하나로 합치기
*/

	// 3-1 이미지 캡쳐하기
    public File[] imageSave(String mainWindow, String windowName) {
    	// 캡쳐를 진행할 화면으로 전환
		driver.switchTo().window(windowName);
		webElement = driver.findElement(By.xpath("/html/body"));
        
        // 크롬화면의 크기
        // !! 혹시 화면의 좌우 크기가 작다면 화면 넓이도 계산해서 좌우 스크롤도 포함해야 한다.
        // 그렇게 된다면 return하는 내용은 2차원 File배열이 되어야 한다.
		int windowHeight = driver.manage().window().getSize().height;
		int windowWidth = driver.manage().window().getSize().width;
		
        // 캡쳐할 화면의 크기
        int contentHeight = webElement.getSize().getHeight();
		int contentWidth = webElement.getSize().getWidth();
		String fileName = filenameSetter.setFileName(driver);

		File[] srcFile = new File[contentHeight/(windowHeight) + 1];
		Object ob = null;
		int totalCapturedImg = 0;
		int imgHeight = 0;
		int imgWitdh = 0;
		int k=0;
		Object lastY = null;
		Object lastX = null;
		
        // 캡쳐된 화면의 크기를 계산하기 위해서 처음 화면을 캡쳐
		BufferedImage temp=MyUtils.getBufferedImage(webElement.getScreenshotAs(OutputType.FILE));
		imgHeight = temp.getHeight();
		imgWitdh = temp.getWidth();
		
		while (totalCapturedImg <= contentHeight+100) {

			// C:/Users/사용자 이름/AppData/Local/Temp
			srcFile[k] = ((TakesScreenshot) webElement).getScreenshotAs(OutputType.FILE);
            // 저장된 이미지 크기만큼 스크롤을 내린다.
			js.executeScript("window.scrollTo(0," + (imgHeight)* (k + 1) + ")");
			k+=1;
			totalCapturedImg +=imgHeight;
            
            // 이동한 스크롤의 y값 확인
			ob = js.executeScript("return window.scrollY");
			if(lastY!= null && lastY.equals(ob)) {
				break;
			}
			lastY= ob;
			sleep(10);
		}
        
        // !!이미지를 합칠때 중복되는 부분을 겹쳐서 그리기 위해서 마지막 scrollY값이 필요하다.
		int lastScrollHeigh = Math.round(Float.parseFloat(String.valueOf(lastY)));

		// 멀티쓰레드로 만들것을 고려해서 객체에 담아서 저장한다.
		concurrentHashMap.put(windowName, new MergeInfo(fileName, contentHeight, lastScrollHeigh));
		driver.close();
		
		return srcFile;
	}

// 3-2 캡쳐된 이미지 하나로 합치기
public void mergeImage(File[] images, MergeInfo mergeInfo) throws IOException {
	int imagesCount = (int)Arrays.stream(images).filter(t -> t!=null).count();
	String fileName = mergeInfo.getFileName();
	int contentHeight = mergeInfo.getContentHeight();
	int lastScrollHeigh = mergeInfo.getLastScrollHeigh();

	BufferedImage[] is = new BufferedImage[imagesCount];

	int width = 0;
    // BufferedImage는 리소스의 해제 메소드가 없어서 fis로 만든다음 마지막에 fis 리소스를 해제해주었다.
	FileInputStream[] fisArr = new FileInputStream[is.length];
	for (int i = 0; i < is.length; i++) {
		if(images[i]==null) {
			continue;
		}
		fisArr[i] = new FileInputStream(images[i]);
		is[i] = ImageIO.read(fisArr[i]);
		// is[i] = ImageIO.read(images[i]);
		width = Math.max(width, is[i].getWidth());
	}
	BufferedImage mergedImage = new BufferedImage(width, contentHeight, BufferedImage.TYPE_INT_RGB);
	
	Graphics2D graphics = (Graphics2D) mergedImage.getGraphics();
	graphics.setBackground(Color.WHITE);
    
	int tempHeight = 0;
	for (int i = 0; i < is.length; i++) {
		int cutHeight = tempHeight;
        // 마지막 캡쳐된 내용은 이전 파일과 중복되는 내용이 있다. 중복된 부분을 겹쳐서 저장해야 원본과 같이 저장된다.
		if(is.length-1 == i ) {
			cutHeight = lastScrollHeigh;
		}
		graphics.drawImage(is[i], 0, cutHeight, null);
		tempHeight += is[i].getHeight();
	}

	File saveFile = new File(saveDir+ fileName + ".png");
	int i =0 ;
    
	while(saveFile.exists()) {
		saveFile = new File(saveDir+ fileName + "_("+i +").png");
		i+=1;
	}
    // 파일로 저장후 resource 해제
	ImageIO.write(mergedImage, "png",saveFile);
    graphics.dispose();

	for (FileInputStream fileInputStream : fisArr) {
		fileInputStream.close();
	}
	for (BufferedImage bufferedImage : is) {
		bufferedImage.flush();
	}
	mergedImage.flush();
}

전체 소스는 깃허브에서 확인할 수 있다.

개발개발

청포도루이보스민트티

이전 포스트

FileOutputStream으로 이미지속에 이미지 숨기기

다음 포스트

자바 셀레니움으로 웹업무 자동화하기(개선) _(전체화면 캡쳐)

FileOutputStream으로 이미지속에 이미지 숨기기

자바 셀레니움으로 웹업무 자동화하기(부록) _(성능 개선기)

0개의 댓글