data:image/s3,"s3://crabby-images/cee00/cee00d1c9a36a29ff19e816d894a07643e34b877" alt=""
ipythonとpandasとlxmlをインストールしておいてipython notebookを起動
pip install ipython\[all\] pip install pandas pip install lxml ipython notebook
でnotebook上で
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from lxml import html | |
# 出馬表 | |
denma_url = 'http://keiba.yahoo.co.jp/race/denma/1405040911/' | |
xpath = '//table' | |
denma_tree = html.parse(denma_url) | |
denma_table = denma_tree.xpath(xpath)[2] | |
denma_html = html.tostring(denma_table) | |
denma_html = denma_html.decode('utf-8').replace('<br>', '\\n') | |
denma = pd.read_html(denma_html, header=0)[0] | |
denma_table.make_links_absolute() | |
# 各馬の出走レース履歴 | |
hist = [] | |
for uma_url in [x[2].find('a').get('href') for x in denma_table[1:]]: | |
uma_tree = html.parse(uma_url) | |
uma_table = uma_tree.xpath(xpath)[6] | |
f = """{0} {2} | |
{1} | |
{10} {5}頭 {7}番 {8}人 | |
{13}kg {11} {12} | |
{3} {4} {14} | |
{16} {15}F | |
({17})""" | |
hist.append([f.format(*[col.text_content() for col in row]) for row in | |
uma_table[1:6]]) | |
hist = pd.DataFrame(hist, columns=['前走', '前々走', '3走前', '4走前', '5走前']) | |
# 縦馬柱 | |
bachu = pd.concat([denma, hist.iloc[:, ::-1]], axis=1).T | |
# ipython notebookで表示 | |
from IPython.display import HTML | |
pd.set_option('display.max_colwidth', 1000) | |
HTML(bachu.to_html(classes='bachu').replace('\\n', '<br>')) |
to_htmlでclassも設定できるのでhtmlファイルに吐き出した後cssつければ見やすくできるかも.表示だけじゃなくてpandasでデータ処理して色々すればおもしろいか.勝馬予想するとかdoc2vec使って名前で配合するとか.