CERTIFICATE_VERIFY_FAILED
(Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1129)')))
requests.get(url, verify=False)
H_KEY_TOO_SMALL
(Caused by SSLError(SSLError(1, '[SSL: DH_KEY_TOO_SMALL] dh key too small (_ssl.c:1007)')))
import requests
import urllib3
requests.packages.urllib3.disable_warnings()
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
try:
requests.packages.urllib3.contrib.pyopenssl.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
except AttributeError:
# no pyopenssl support used / needed / available
pass
HTML 수집
import requests
import urllib3
from bs4 import BeautifulSoup
requests.packages.urllib3.disable_warnings()
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
try:
requests.packages.urllib3.contrib.pyopenssl.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
except AttributeError:
# no pyopenssl support used / needed / available
pass
url = 'https://www.kweather.co.kr/forecast/forecast_living_jisu.html'
response = requests.get(url, verify=False)
if response.status_code == 200:
text = response.text
BeautifulSoup 객체 변환 및 필요 영역 지정 HTML
서울/경기
Python
soup = BeautifulSoup(text, , 'html.parser')
navi = soup.select_one('ul#jisu_navi_content')
navi_a = navi('a')
a = (navi_a)[0]
print(a)
XML 주소 값 추출
한 개 추출
xml = a.attrs['href'].split('U/')[1].split("',")[0]
XML 주소 리스트
xml_list = []
code_list = []
for a in navi_a:
xml = a.attrs['href'].split('U/')[1].split("',")[0]
code = xml.split('.')[0]
xml_list.append(xml)
code_list.append(code)
XML 1개 문서 처리 하기
xml_path = xml_list[0]
url = f'https://www.kweather.co.kr/data/JISU/{xml_path}'
response = requests.get(url)
if response.status_code == 200:
response.encoding = 'utf-8' # or response.encoding = None
text = response.text
soup = BeautifulSoup(text, , 'html.parser')
ndate = soup.find('ndate', num='0')
num = ndate('num')
name = ndate('name')
jnum = ndate('jnum')
jtext = ndate('jtext')
num_list = [n.text for n in num]
name_list= [n.text for n in name]
jnum_list = [n.text for n in jnum]
jtext_list = [n.text for n in jtext]
리스트 하나도 합쳐 dict로 만들기
ndate_obj = zip(num_list, name_list, jnum_list, jtext_list)
ndate_list = list(ndate_obj)
ndate_dict = {code_list[0] : ndate_list}
print(ndate_dict)
전체 XML 데이터 처리 하기
전체 XML 데이터 수집
raw_data = []
for xml_path in xml_list:
url = f'https://www.kweather.co.kr/data/JISU/{xml_path}'
#print(url)
response = requests.get(url)
if response.status_code == 200:
response.encoding = 'utf-8'
text = response.text
raw_data.append(text)
전체 데이터 처리
all_zip_dict = []
for i,text in enumerate(raw_data):
soup = BeautifulSoup(text, 'lxml')
ndate = soup.find('ndate', num='0')
num = ndate('num')
name = ndate('name')
jnum = ndate('jnum')
jtext = ndate('jtext')
num_list = [n.text for n in num]
name_list= [n.text for n in name]
jnum_list = [n.text for n in jnum]
jtext_list = [n.text for n in jtext]
zip_obj = zip(num_list, name_list, jnum_list, jtext_list)
zip_list = list(zip_obj)
zip_dict = {code_list[i] : zip_list}
all_zip_dict.append(zip_dict)
날짜 별 최종 데이터 dict 처리
date = soup.tm.text.replace('/','')
final_data = {date : all_zip_dict}
print(len(final_data ))