リクエストする。天気予報アプリ

ターミナルで次のコマンドを実行します。

pip install requests

何をしてもらっているかわかりますか？

requestsという呪文集（モジュール）をインストールしてもらいます。

これで次のファイルを実行すると・

import requests
rq = requests.get('http://www.yahoo.co.jp')
print(rq.text)

printされる情報は、’http://www.yahoo.co.jp’ の情報全てです。

Webにある情報を得ることをクローリングといいます。

クローリングした情報から特定の情報を抜き出すことをスクレイピングといいます。

スクレイピングをするための呪文集をインストール

呪文集（ライブラリ）の名前は

BeautifulSoup

です。

pip install bs4

あと、これを実行したいなぁとおもって実行してみると

import re
import requests
from bs4 import BeautifulSoup
import json

def main(url):
    # bs4でパース
    s = soup(url)

    dict = {}

    # 予測地点
    l_pattern = r"(.+)の1時間天気"
    l_src = s.title.text
    dict['location'] = re.findall(l_pattern, l_src)[0]
    print(dict['location'] + "の天気")

    soup_tdy = s.find(id='forecast-point-1h-today')
    soup_tmr = s.find(id='forecast-point-1h-tomorrow')
    soup_dat = s.find(id='forecast-point-1h-dayaftertomorrow')

    dict["today"] = forecast2dict(soup_tdy)
    dict["tomorrow"] = forecast2dict(soup_tmr)
    dict["dayaftertomorrow"] = forecast2dict(soup_dat)

    # JSON形式で出力
    # print(json.dumps(dict, ensure_ascii=False))

def soup(url):
    r = requests.get(url)
    html = r.text.encode(r.encoding)
    return BeautifulSoup(html, 'lxml')

def forecast2dict(soup):
    data = {}

    # 日付処理
    d_pattern = r"(\d+)年(\d+)月(\d+)日"
    d_src = soup.select('.head p')
    date = re.findall(d_pattern, d_src[0].text)[0]
    data["date"] = "%s-%s-%s" % (date[0], date[1], date[2])
    print("=====" + data["date"] + "=====")

    # 一時間ごとのデータ
    ## 取得
    hour          = soup.select('.hour > td')
    weather       = soup.select('.weather > td')
    temperature   = soup.select('.temperature > td')
    prob_precip   = soup.select('.prob-precip > td')
    precipitation = soup.select('.precipitation > td')
    humidity      = soup.select('.humidity > td')
    wind_blow     = soup.select('.wind-blow > td')
    wind_speed    = soup.select('.wind-speed > td')

    ## 格納
    data["forecasts"] = []
    for itr in range(0, 24):
        forecast = {}
        forecast["hour"] = hour[itr].text.strip()
        forecast["weather"] = weather[itr].text.strip()
        forecast["temperature"] = temperature[itr].text.strip()
        forecast["prob-precip"] = prob_precip[itr].text.strip()
        forecast["precipitation"] = precipitation[itr].text.strip()
        forecast["humidity"] = humidity[itr].text.strip()
        forecast["wind-blow"] = wind_blow[itr].text.strip()
        forecast["wind-speed"] = wind_speed[itr].text.strip()
        data["forecasts"].append(forecast)

        print(
            "時刻         ： " + forecast["hour"] + "時" + "\n"
            "天気         ： " + forecast["weather"] + "\n"
            "気温(C)      ： " + forecast["temperature"] + "\n"
            "降水確率(%)  ： " + forecast["prob-precip"] + "\n"
            "降水量(mm/h) ： " + forecast["precipitation"] + "\n"
            "湿度(%)      ： " + forecast["humidity"] + "\n"
            "風向         ： " + forecast["wind-blow"] + "\n"
            "風速(m/s)    ： " + forecast["weather"] + "\n"
        )

    return data

if __name__ == '__main__':
    # 新宿区の一時間ごとの気象情報URL
    URL = 'https://tenki.jp/forecast/3/16/4410/13104/1hour.html'
    main(URL)