[제로베이스 데이터 취업 스쿨 15기] 6주차 (EDA: 4. Selenium Basics)

김지환·2023년 6월 10일
0
post-thumbnail

6주차: 6/5/2023 - 6/11/2023


Selenium webdriver


# !pip install webdriver_manager

from selenium import webdriver

from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://pinkwink.kr")

Basic commands

# Maximize window
driver.maximize_window()

# Minimize window
driver.minimize_window()

# Set window size
driver.set_window_size(600, 600)

# Refresh
driver.refresh()

# Back
driver.back()

# Forward
driver.forward()

# Click
from selenium.webdriver.common.by import By

first_content = driver.find_element(By.CSS_SELECTOR, '#content > div.cover-masonry > div > ul > li:nth-child(1)')
first_content.click()

# New tab
driver.execute_script('window.open("https://www.naver.com")')

# Switch tab
driver.switch_to.window(driver.window_handles[1])
len(driver.window_handles)

# Close the current tab
driver.close()

# Close all tabs
driver.quit()

Scroll

# Scroll height
driver.execute_script('return document.body.scrollHeight')

# Scroll down
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')

# Save screenshot
driver.save_screenshot('./last_height.png')

# Scroll up
driver.execute_script('window.scrollTo(0, 0);')

# Scroll to a specific tag
from selenium.webdriver import ActionChains

some_tag = driver.find_element(By.CSS_SELECTOR, '#content > div.cover-thumbnail-list > div > ul > li:nth-child(1)')
action = ActionChains(driver)
action.move_to_element(some_tag).perform()

Search word

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get('https://www.naver.com')

keyword = driver.find_element(By.CSS_SELECTOR, '#query')
keyword.clear()
keyword.send_keys('파이썬')

search_btn = driver.find_element(By.CSS_SELECTOR, '#sform > fieldset > button')
search_btn.click()
  • XPATH
    - '//': uppermost
    - '*': descendant element
    - '/': child element
driver.find_element(By.XPATH, '//*[@id="query"]').send_keys('xpath')

driver.find_element(By.XPATH, '//*[@id="sform"]/fieldset/button').click()
# 1. Select the search button
from selenium.webdriver import ActionChains

search_tag = driver.find_element(By.CSS_SELECTOR, '.search')
action = ActionChains(driver)
action.click(search_tag)
action.perform()

# 2. Enter the search word
driver.find_element(By.CSS_SELECTOR, '#header > div.search.on > input[type=text]').send_keys('딥러닝')

# 3. Click the search button
driver.find_element(By.CSS_SELECTOR, '#header > div.search.on > button').click()

Selenium + BeautifulSoup

# Get html codes from the current page
driver.page_source

from bs4 import BeautifulSoup

req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')

soup.select('.post-item')

contents = soup.select('.post-item')
len(contents)

contents[2]
profile
데이터 분석 공부하고 있습니다

0개의 댓글