본문 바로가기
인공지능/PYTHON

Crawling and Flask (2)

by bibibig_data 2021. 6. 25.

===============================================
 크롤링 요청 페이지 작성
===============================================

 

 c:\dev> tree /F Flask4Crawling2

  Web.
  │  app.py
  │
  ├─static
  │      index.css
  │
  └─templates
          index.html
          result.html    <- 추가하세요

 

-------------------
> index.html      <
-------------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="X-UA-Compatible" content="ie=edge">
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
  <link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}"> 
  <title>Document</title>
</head>
<body>
  
  <div id="wrap">
    <h2>Form Grid</h2>
    <p>Create two form elements that appear side by side with .row and .col:</p>
    <form action="/result" method="POST">
        <div class="col">
          <input type="text" class="form-control" placeholder="키워드 입력"     name="input1">
          <input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
          <button type="submit" class="btn btn-primary mt-3">Submit</button>
        </div>
    </form>
  </div>    

</body>  
</html>

----------------
> index.css    <
----------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="X-UA-Compatible" content="ie=edge">
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
  <link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}"> 
  <title>Document</title>
</head>
<body>
  
  <div id="wrap">
    <h2>Form Grid</h2>
    <p>Create two form elements that appear side by side with .row and .col:</p>
    <form action="/result" method="POST">
        <div class="col">
          <input type="text" class="form-control" placeholder="키워드 입력"     name="input1">
          <input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
          <button type="submit" class="btn btn-primary mt-3">Submit</button>
        </div>
    </form>
  </div>    

</body>  
</html>

----------------
> index.css    <
----------------

#wrap {
  width: 600px;
  margin: auto;
}

-------------------
> result.html     <
-------------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Title</title>
</head>
<body>
  <p>결과 페이지입니다</p>
</body>
</html>

-------------------
> app.py          <
-------------------

from flask import Flask, render_template, request

app = Flask(__name__)


@app.route('/')
def index():
    return render_template("index.html")


@app.route('/result', methods=['POST'])
def result():
    if request.method == 'POST':
        keyword = request.form['input1']
        page    = request.form['input2']

        print(keyword)
        print(page)

        return render_template('result.html')


if __name__ == '__main__':
    app.run(host='0.0.0.0', debug=True)

===============================================
 Page 숫자 설정
===============================================

-------------------
> result.html     <
-------------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Title</title>
</head>
<body>

  <ul>
    {% for i in daum_list %}
    <li>{{ i }}</li>
    {% endfor %}
  </ul>

</body>
</html>


-------------------
> app.py          <
-------------------

import requests
from bs4 import BeautifulSoup
from flask import Flask, render_template, request

app = Flask(__name__)


@app.route('/')
def index():
    return render_template("index.html")


@app.route('/result', methods=['POST'])
def result():
    if request.method == 'POST':
        keyword = request.form['input1']
        page    = request.form['input2']

        daum_news_list = []

        for i in range(1, int(page)+1):
            resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
            soup = BeautifulSoup(resp.text, 'html.parser')

            for i in soup.find_all("a", class_="tit_main"):
                daum_news_list.append(i.text)

        return render_template('result.html',  daum_list=daum_news_list)


if __name__ == '__main__':
    app.run(host='0.0.0.0', debug=True)

===============================================
 엑셀 파일로 데이터 저장
===============================================

-------------------
> result.html     <
-------------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Title</title>
</head>
<body>

  <ul>
    {% for i in daum_list %}
    <li>{{ i }}</li>
    {% endfor %}
  </ul>

  <ul>
    <li><a href="static/result.xlsx">검색 결과 엑셀 파일로 다운로드</a></li>
  </ul>

</body>
</html>

-------------------
> app.py          <
-------------------

from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook

app = Flask(__name__)

write_wb = Workbook()
write_ws = write_wb.active


@app.route('/')
def hello_world():
    return render_template("index.html")


@app.route('/result', methods=['POST'])
def result():
    if request.method == 'POST':

        keyword = request.form['input1']
        page    = request.form['input2']

        daum_news_list = []

        for i in range(1, int(page)+1):
            resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
            soup = BeautifulSoup(resp.text, 'html.parser')

            for i in soup.find_all("a", class_="tit_main"):
                daum_news_list.append(i.text)

        for i in range(1, len(daum_news_list) + 1):
            # sheet.cell(row = 1, column = 1, value='wolrd') 
            write_ws.cell(i, 1, daum_news_list[i - 1])

        write_wb.save('static/result.xlsx')

        return render_template('result.html', daum_list = daum_news_list)


if __name__ == '__main__':
    app.run(host='0.0.0.0', debug=True)


===============================================
 셀레니움을 활용해서 크롤링
===============================================

  c:\dev> tree /F Flask4Crawling2


  Web.
  │  app.py
  │
  ├─static
  │      index.css
  │
  └─templates
          index.html
          shopping_results.html   <- 추가하세요
          result.html

 

-------------------
> index.html      <
-------------------

<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="X-UA-Compatible" content="ie=edge">
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
  <link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}">
  <title>Document</title>
</head>

<body>

  <div id="wrap">
    <div class="news">
      <h3>다음 뉴스</h3>
      <p>키워드와 페이지수를 설정하고 확인 버튼 클릭하세요</p>
      <form action="/result" method="POST">
        <div class="col">
          <input type="text" class="form-control" placeholder="키워드 입력" name="input1">
          <input type="number" class="form-control" placeholder="페이지수 설정" name="input2">
          <button type="submit" class="btn btn-primary mt-3">확인</button>
        </div>
      </form>
    </div><br>

    <div class="naver">
      <h3>네이버 쇼핑</h3>
      <p>쇼핑할 물품을 입력하세요</p>
      <form action="/shopping_results" method="POST">
        <div class="col">
          <input type="text" class="form-control" name="input3">
          <button type="submit" class="btn btn-primary mt-3">확인</button>
        </div>
      </form>
    </div>

  </div>
</body>
</html>


-------------------
> index.css       <
-------------------

#wrap {
  width: 600px;
  margin: auto;
}

.news {
  margin-top: 30px;
}

.naver {
  margin-top: 30px;
}


---------------------------
> shopping_results.html     <
---------------------------

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Title</title>
</head>
<body>
  <ul>
    {% for i in search_list %}
    <li>{{ i }}</li>
    {% endfor %}
  </ul>
</body>
</html>


-------------------
> app.py          <
-------------------

from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook
from selenium import webdriver


app = Flask(__name__)

write_wb = Workbook()
write_ws = write_wb.active


@app.route('/')
def hello_world():
    return render_template("index.html")


@app.route('/result', methods=['POST'])
def result():
    if request.method == 'POST':

        keyword = request.form['input1']
        page    = request.form['input2']

        daum_news_list = []

        for i in range(1, int(page)+1):
            resp = requests.get('https://search.daum.net/search?&w=news&q=' + keyword + '&p=' + str(i))
            soup = BeautifulSoup(resp.text, 'html.parser')

            for i in soup.find_all("a", class_="f_link_b"):
                daum_news_list.append(i.text)

        for i in range(1, len(daum_news_list) + 1):
            # sheet.cell(row = 1, column = 1, value='wolrd') 
            write_ws.cell(i, 1, daum_news_list[i - 1])

        write_wb.save('static/result.xlsx')

        return render_template('result.html', daum_list = daum_news_list)


# @app.route('/shopping_results')
# def naver_shopping():
#     driver = webdriver.Chrome()
#     driver.implicitly_wait(3)
#     driver.get( "https://search.shopping.naver.com/search/all_search.nhn?query=과일"
#     soup = BeautifulSoup(driver.page_source, 'html.parser')
#     print(soup)
#     return render_template('shopping_results.html')

@app.route('/shopping_results', methods=['POST'])
def naver_shopping():
    search = request.form['input3']

    search_list = []
    
    driver = webdriver.Chrome()
    driver.implicitly_wait(3)

    driver.get("https://search.shopping.naver.com/search/all_search.nhn?query=" + search)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    for i in soup.select("li._itemSection"):       
        search_list.append(i.find("a", class_="link").text)

    driver.find_element_by_class_name("_productSet_hotdeal").click()
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    for i in soup.select("li._itemSection"):
         search_list.append(i.find("a", class_="link").text)
         
    driver.find_element_by_class_name("_productSet_overseas").click()
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    for i in soup.select("div.search_list.basis > ul > li"):
        search_list.append(i.find("a", class_="link").text)

    return render_template('shopping_results.html', search_list = search_list)

if __name__ == '__main__':
    app.run(host='0.0.0.0', debug=True)

'인공지능 > PYTHON' 카테고리의 다른 글

Do it ! - Python (1)  (0) 2021.06.25
크롤링 (2)  (0) 2021.06.25
셀레늄  (0) 2021.06.25
Crawling and Flask  (0) 2021.06.24
크롤링 (2)  (0) 2021.06.23