===============================================
크롤링 요청 페이지 작성
===============================================
c:\dev> tree /F Flask4Crawling2
Web.
│ app.py
│
├─static
│ index.css
│
└─templates
index.html
result.html <- 추가하세요
-------------------
> index.html <
-------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
<link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}">
<title>Document</title>
</head>
<body>
<div id="wrap">
<h2>Form Grid</h2>
<p>Create two form elements that appear side by side with .row and .col:</p>
<form action="/result" method="POST">
<div class="col">
<input type="text" class="form-control" placeholder="키워드 입력" name="input1">
<input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
<button type="submit" class="btn btn-primary mt-3">Submit</button>
</div>
</form>
</div>
</body>
</html>
----------------
> index.css <
----------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
<link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}">
<title>Document</title>
</head>
<body>
<div id="wrap">
<h2>Form Grid</h2>
<p>Create two form elements that appear side by side with .row and .col:</p>
<form action="/result" method="POST">
<div class="col">
<input type="text" class="form-control" placeholder="키워드 입력" name="input1">
<input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
<button type="submit" class="btn btn-primary mt-3">Submit</button>
</div>
</form>
</div>
</body>
</html>
----------------
> index.css <
----------------
#wrap {
width: 600px;
margin: auto;
}
-------------------
> result.html <
-------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<p>결과 페이지입니다</p>
</body>
</html>
-------------------
> app.py <
-------------------
from flask import Flask, render_template, request
app = Flask(__name__)
@app.route('/')
def index():
return render_template("index.html")
@app.route('/result', methods=['POST'])
def result():
if request.method == 'POST':
keyword = request.form['input1']
page = request.form['input2']
print(keyword)
print(page)
return render_template('result.html')
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True)
===============================================
Page 숫자 설정
===============================================
-------------------
> result.html <
-------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<ul>
{% for i in daum_list %}
<li>{{ i }}</li>
{% endfor %}
</ul>
</body>
</html>
-------------------
> app.py <
-------------------
import requests
from bs4 import BeautifulSoup
from flask import Flask, render_template, request
app = Flask(__name__)
@app.route('/')
def index():
return render_template("index.html")
@app.route('/result', methods=['POST'])
def result():
if request.method == 'POST':
keyword = request.form['input1']
page = request.form['input2']
daum_news_list = []
for i in range(1, int(page)+1):
resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
soup = BeautifulSoup(resp.text, 'html.parser')
for i in soup.find_all("a", class_="tit_main"):
daum_news_list.append(i.text)
return render_template('result.html', daum_list=daum_news_list)
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True)
===============================================
엑셀 파일로 데이터 저장
===============================================
-------------------
> result.html <
-------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<ul>
{% for i in daum_list %}
<li>{{ i }}</li>
{% endfor %}
</ul>
<ul>
<li><a href="static/result.xlsx">검색 결과 엑셀 파일로 다운로드</a></li>
</ul>
</body>
</html>
-------------------
> app.py <
-------------------
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook
app = Flask(__name__)
write_wb = Workbook()
write_ws = write_wb.active
@app.route('/')
def hello_world():
return render_template("index.html")
@app.route('/result', methods=['POST'])
def result():
if request.method == 'POST':
keyword = request.form['input1']
page = request.form['input2']
daum_news_list = []
for i in range(1, int(page)+1):
resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
soup = BeautifulSoup(resp.text, 'html.parser')
for i in soup.find_all("a", class_="tit_main"):
daum_news_list.append(i.text)
for i in range(1, len(daum_news_list) + 1):
# sheet.cell(row = 1, column = 1, value='wolrd')
write_ws.cell(i, 1, daum_news_list[i - 1])
write_wb.save('static/result.xlsx')
return render_template('result.html', daum_list = daum_news_list)
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True)
===============================================
셀레니움을 활용해서 크롤링
===============================================
c:\dev> tree /F Flask4Crawling2
Web.
│ app.py
│
├─static
│ index.css
│
└─templates
index.html
shopping_results.html <- 추가하세요
result.html
-------------------
> index.html <
-------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
<link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}">
<title>Document</title>
</head>
<body>
<div id="wrap">
<div class="news">
<h3>다음 뉴스</h3>
<p>키워드와 페이지수를 설정하고 확인 버튼 클릭하세요</p>
<form action="/result" method="POST">
<div class="col">
<input type="text" class="form-control" placeholder="키워드 입력" name="input1">
<input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
<button type="submit" class="btn btn-primary mt-3">확인</button>
</div>
</form>
</div><br>
<div class="naver">
<h3>네이버 쇼핑</h3>
<p>쇼핑할 물품을 입력하세요</p>
<form action="/shopping_results" method="POST">
<div class="col">
<input type="text" class="form-control" name="input3">
<button type="submit" class="btn btn-primary mt-3">확인</button>
</div>
</form>
</div>
</div>
</body>
</html>
-------------------
> index.css <
-------------------
#wrap {
width: 600px;
margin: auto;
}
.news {
margin-top: 30px;
}
.naver {
margin-top: 30px;
}
---------------------------
> shopping_results.html <
---------------------------
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<ul>
{% for i in search_list %}
<li>{{ i }}</li>
{% endfor %}
</ul>
</body>
</html>
-------------------
> app.py <
-------------------
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook
from selenium import webdriver
app = Flask(__name__)
write_wb = Workbook()
write_ws = write_wb.active
@app.route('/')
def hello_world():
return render_template("index.html")
@app.route('/result', methods=['POST'])
def result():
if request.method == 'POST':
keyword = request.form['input1']
page = request.form['input2']
daum_news_list = []
for i in range(1, int(page)+1):
resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
soup = BeautifulSoup(resp.text, 'html.parser')
for i in soup.find_all("a", class_="f_link_b"):
daum_news_list.append(i.text)
for i in range(1, len(daum_news_list) + 1):
# sheet.cell(row = 1, column = 1, value='wolrd')
write_ws.cell(i, 1, daum_news_list[i - 1])
write_wb.save('static/result.xlsx')
return render_template('result.html', daum_list = daum_news_list)
# @app.route('/shopping_results')
# def naver_shopping():
# driver = webdriver.Chrome()
# driver.implicitly_wait(3)
# driver.get( "https://search.shopping.naver.com/search/all_search.nhn?query=과일"
# soup = BeautifulSoup(driver.page_source, 'html.parser')
# print(soup)
# return render_template('shopping_results.html')
@app.route('/shopping_results', methods=['POST'])
def naver_shopping():
search = request.form['input3']
search_list = []
driver = webdriver.Chrome()
driver.implicitly_wait(3)
driver.get("https://search.shopping.naver.com/search/all_search.nhn?query=" + search)
soup = BeautifulSoup(driver.page_source, 'html.parser')
for i in soup.select("li._itemSection"):
search_list.append(i.find("a", class_="link").text)
driver.find_element_by_class_name("_productSet_hotdeal").click()
soup = BeautifulSoup(driver.page_source, 'html.parser')
for i in soup.select("li._itemSection"):
search_list.append(i.find("a", class_="link").text)
driver.find_element_by_class_name("_productSet_overseas").click()
soup = BeautifulSoup(driver.page_source, 'html.parser')
for i in soup.select("div.search_list.basis > ul > li"):
search_list.append(i.find("a", class_="link").text)
return render_template('shopping_results.html', search_list = search_list)
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True)
'인공지능 > PYTHON' 카테고리의 다른 글
Do it ! - Python (1) (0) | 2021.06.25 |
---|---|
크롤링 (2) (0) | 2021.06.25 |
셀레늄 (0) | 2021.06.25 |
Crawling and Flask (0) | 2021.06.24 |
크롤링 (2) (0) | 2021.06.23 |