In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from bs4 import BeautifulSoup

このnotebookでやってみたこと(´・ω・`)

  1. みずほ銀行ヒストリカルデータ取得
  2. kuchartの計算
  3. oandaのレート取得
  4. ビットコインの板のグルーピング

1. みずほ銀行のヒストリカルデータ取得

qiitaで見かけた記事でみずほ銀行のデータを使ってたので自分も取得してみた

In [2]:
# レート取得
url = 'https://www.mizuhobank.co.jp/rate/market/csv/quote.csv'
df = pd.read_csv(url, header=2, index_col=0, parse_dates=True)
print(df.shape)
df.head(3)
(3801, 37)
Out[2]:
USD GBP EUR CAD CHF SEK DKK NOK AUD NZD ... CZK PLN RUB TRY Unnamed: 32 IDR(100).1 CNY.1 MYR.1 KRW(100).1 TWD
2002-04-01 133.15 189.79 116.12 83.48 79.28 12.87 15.63 15.08 71.14 58.80 ... ***** ***** NaN NaN NaN 1.37 16.09 ***** ***** 3.82
2002-04-02 133.20 191.78 117.18 83.38 80.15 13.00 15.77 15.24 71.02 58.86 ... ***** ***** NaN NaN NaN 1.38 16.09 ***** ***** 3.82
2002-04-03 133.20 191.26 116.96 83.65 80.02 12.95 15.75 15.23 71.14 58.81 ... ***** ***** NaN NaN NaN 1.37 16.09 ***** ***** 3.82

3 rows × 37 columns

In [3]:
df.dtypes
Out[3]:
USD            float64
GBP            float64
EUR            float64
CAD            float64
CHF            float64
SEK            float64
DKK            float64
NOK            float64
AUD            float64
NZD            float64
ZAR            float64
BHD            float64
IDR(100)        object
CNY             object
HKD            float64
INR            float64
MYR             object
PHP            float64
SGD            float64
KRW(100)        object
THB            float64
KWD            float64
SAR            float64
AED            float64
MXN            float64
PGK             object
HUF             object
CZK             object
PLN             object
RUB            float64
TRY            float64
Unnamed: 32    float64
IDR(100).1     float64
CNY.1           object
MYR.1           object
KRW(100).1      object
TWD            float64
dtype: object
In [4]:
# float型の列だけにする
df = df.loc[:, df.dtypes==np.float64]
print(df.shape)
df.head(3)
(3801, 26)
Out[4]:
USD GBP EUR CAD CHF SEK DKK NOK AUD NZD ... THB KWD SAR AED MXN RUB TRY Unnamed: 32 IDR(100).1 TWD
2002-04-01 133.15 189.79 116.12 83.48 79.28 12.87 15.63 15.08 71.14 58.80 ... 3.07 434.14 35.52 36.26 14.81 NaN NaN NaN 1.37 3.82
2002-04-02 133.20 191.78 117.18 83.38 80.15 13.00 15.77 15.24 71.02 58.86 ... 3.06 435.01 35.53 36.27 14.80 NaN NaN NaN 1.38 3.82
2002-04-03 133.20 191.26 116.96 83.65 80.02 12.95 15.75 15.23 71.14 58.81 ... 3.04 436.58 35.53 36.27 14.77 NaN NaN NaN 1.37 3.82

3 rows × 26 columns

In [5]:
# nanが混じってる列を除去
df.dropna(axis=1, inplace=True)
print(df.shape)
df.head(3)
(3801, 23)
Out[5]:
USD GBP EUR CAD CHF SEK DKK NOK AUD NZD ... INR PHP SGD THB KWD SAR AED MXN IDR(100).1 TWD
2002-04-01 133.15 189.79 116.12 83.48 79.28 12.87 15.63 15.08 71.14 58.80 ... 2.73 2.61 72.21 3.07 434.14 35.52 36.26 14.81 1.37 3.82
2002-04-02 133.20 191.78 117.18 83.38 80.15 13.00 15.77 15.24 71.02 58.86 ... 2.73 2.62 72.18 3.06 435.01 35.53 36.27 14.80 1.38 3.82
2002-04-03 133.20 191.26 116.96 83.65 80.02 12.95 15.75 15.23 71.14 58.81 ... 2.74 2.62 72.12 3.04 436.58 35.53 36.27 14.77 1.37 3.82

3 rows × 23 columns

2. kuchart計算

上記のみずほ銀行のデータは対円レートなので(いや、よく確認してないけど)kuchart計算しやすいぞ
と思ってkuchartを計算
ここは遊び心をきかせてワンライナーで(´・ω・`)ノ
(lambda使いまくって無理やり感ありますが(^^;))

計算式はこちらの http://fxst24.blog.fc2.com/
「Ku-Chartの簡単な計算方法」のやり方がいい感じなので参考にさせてもらいました

In [6]:
ku = DataFrame((lambda arr:(lambda ret:(lambda ret,a:np.concatenate([np.subtract(ret,a),-a],axis=1))(ret,(ret.sum(axis=1)/(len(df.columns)+1))[:,np.newaxis]))(np.concatenate([[np.zeros(arr.shape[1])],np.log(arr[1:]/arr[:-1])])))(df.values),index=df.index,columns=list(df.columns)+['JPY'])
In [7]:
# プロット
KU_CLR = {'EUR':'red', 'GBP':'green', 'USD':'orange', 'JPY':'turquoise',
          'AUD':'royalblue', 'NZD':'violet', 'CAD':'blueviolet', 'CHF':'gray'}
def ku2clr(df_or_col):
    col = df_or_col
    if isinstance(df_or_col, DataFrame): col = df_or_col.columns
    return [KU_CLR[s] if s in KU_CLR else 'k' for s in col]

ku.cumsum().plot(figsize=(15,10), lw=1, color=ku2clr(ku), legend=None)
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0xca2d3c8>
In [8]:
# ↑ のだとちょっと見にくいので8通貨だけ表示してみます
major_symbols = list(KU_CLR.keys())
ku8 = ku[major_symbols]
ku8.cumsum().plot(figsize=(15,5), lw=1, color=ku2clr(major_symbols), legend=None)
plt.legend(major_symbols)
Out[8]:
<matplotlib.legend.Legend at 0xc9f0c18>
In [9]:
# 年ごとに
gb = ku8.groupby(ku8.index.year)
plt.figure(figsize=(15,5))
ax = plt.subplot()
for k,v in gb:
    v.cumsum().plot(lw=1, color=ku2clr(v), ax=ax, legend=None)
    plt.axvline(v.index[0], lw=0.5, c='k', ls=':')
plt.legend(ku8)
Out[9]:
<matplotlib.legend.Legend at 0xe718358>

このgroupbyオブジェクトをそのままforで回せるって最近知りました 便利(*´・ω・`*)

ワンライナーはさておき、
たいていはドルストレートから計算すると思うのでこんな感じにしました
(一応、基準となる通貨が揃っていればドルストじゃなくても計算できるようにはなっている)
※さっきのみずほ銀行のデータだと引数を calc_ku(df, basis='JPY') て感じで指定

In [10]:
def calc_ku(df, basis='USD'):
    ku_symbols = [s.replace(basis, '') for s in df.columns] + [basis]
    mask_symbols_mul = np.array([(1 if s[:3]!=basis else -1) for s in df.columns])
    arr = df.values
    ret = np.concatenate([[np.zeros(arr.shape[1])], np.log(arr[1:]/arr[:-1])])
    a = ((ret*mask_symbols_mul).sum(axis=1) / (arr.shape[1]+1))[:,np.newaxis]
    ku_arr = np.concatenate([np.subtract(ret*mask_symbols_mul, a), -a], axis=1)
    return DataFrame(ku_arr, index=df.index, columns=ku_symbols)
In [12]:
# こんな感じ対ドルのデータフレームがあったとして (手元のMT4から読み込んだデータです)
print(df2.shape)
df2.head()
(4022, 7)
Out[12]:
EURUSD GBPUSD USDJPY AUDUSD NZDUSD USDCAD USDCHF
Time
2002-04-01 0.8804 1.4408 133.38 0.5340 0.4417 1.5972 1.6620
2002-04-02 0.8795 1.4360 133.20 0.5342 0.4411 1.5930 1.6635
2002-04-03 0.8812 1.4358 132.72 0.5320 0.4395 1.5900 1.6598
2002-04-04 0.8776 1.4336 132.37 0.5318 0.4413 1.5941 1.6666
2002-04-05 0.8785 1.4321 131.60 0.5306 0.4375 1.5883 1.6670
In [13]:
kuh1 = calc_ku(df2)
print(kuh1.shape)
kuh1.cumsum().plot(figsize=(15,5), lw=1, color=ku2clr(kuh1))
(4022, 8)
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0xf345d30>

OK(´・ω・`)

3. oandaのデータ取得

みずほ銀行のデータを取得してみて、
ほかにログインとか認証とかしなくても取得できる為替のデータってなにがあるかなぁと調べてたら、
oandaのヒストリカルデータはapiの認証しなくても取得できることに気づいた(´・ω・`)

In [14]:
# 取り扱ってるペア
soup = BeautifulSoup(requests.get('https://www.oanda.jp/service/fxtrade.php').content, 'lxml')
oanda_symbols = [tr.select('td')[0].text.replace('/','')
                 for tr in soup.select('.tb-service tbody tr')
                 if tr.select('td')]
print(oanda_symbols)
['USDJPY', 'EURJPY', 'AUDJPY', 'GBPJPY', 'NZDJPY', 'CADJPY', 'CHFJPY', 'ZARJPY', 'EURUSD', 'GBPUSD', 'NZDUSD', 'AUDUSD', 'USDCHF', 'EURCHF', 'GBPCHF', 'EURGBP', 'AUDNZD', 'AUDCAD', 'AUDCHF', 'CADCHF', 'EURAUD', 'EURCAD', 'EURDKK', 'EURNOK', 'EURNZD', 'EURSEK', 'GBPAUD', 'GBPCAD', 'GBPNZD', 'NZDCAD', 'NZDCHF', 'USDCAD', 'USDDKK', 'USDNOK', 'USDSEK', 'AUDHKD', 'AUDSGD', 'CADHKD', 'CADSGD', 'CHFHKD', 'CHFZAR', 'EURCZK', 'EURHKD', 'EURHUF', 'EURPLN', 'EURSGD', 'EURTRY', 'EURZAR', 'GBPHKD', 'GBPPLN', 'GBPSGD', 'GBPZAR', 'HKDJPY', 'NZDHKD', 'NZDSGD', 'SGDCHF', 'SGDHKD', 'SGDJPY', 'TRYJPY', 'USDCNH', 'USDCZK', 'USDHKD', 'USDHUF', 'USDINR', 'USDMXN', 'USDPLN', 'USDSAR', 'USDSGD', 'USDTHB', 'USDTRY', 'USDZAR']
In [15]:
# M1 USDJPY を3本取得してみる
res = requests.get('https://api-fxpractice.oanda.com/v1/candles',
                   params={'granularity':'M1', 'count':3, 'instrument':'USD_JPY'})
DataFrame(res.json()['candles']).head()
Out[15]:
closeAsk closeBid complete highAsk highBid lowAsk lowBid openAsk openBid time volume
0 112.007 111.986 True 112.011 111.986 112.007 111.983 112.010 111.983 2017-09-22T20:57:00.000000Z 3
1 112.007 111.977 True 112.007 111.984 112.005 111.977 112.005 111.984 2017-09-22T20:58:00.000000Z 2
2 112.023 111.961 True 112.023 111.961 112.023 111.961 112.023 111.961 2017-09-22T20:59:00.000000Z 1

ふむふむ(´・ω・`)

In [16]:
def oanda_ohlc(symbol, tf='M1', count=10, start=None, end=None,
               unixtime=True, alignmentTimezone='America/New_York',
               adjhour=9, timeout=10, **kw):
    symbol = symbol if '_' in symbol else symbol[:3]+'_'+symbol[3:]
    headers = {'X-Accept-Datetime-Format': 'unix'} if unixtime else {}
    params = {'granularity': tf, 'count': count, 'instrument': symbol}
    params.update(**kw)
    adj = lambda x: pd.to_datetime(x)-pd.offsets.Hour(adjhour)
    if unixtime:
        f = lambda x: adj(x).to_datetime64().astype(np.int64)
        to_dt = lambda df: pd.to_datetime(df['time'], unit='us')+pd.offsets.Hour(adjhour)
    else:
        f = lambda x: adj(x).strftime('%Y-%m-%dT%H:%MZ')
        to_dt = lambda df: pd.to_datetime(df['time'])+pd.offsets.Hour(adjhour)

    if start: params['start'] = f(start)
    if end: params['end'] = f(end)
    if start or end: params.pop('count')
    # url = 'https://api-fxtrade.oanda.com/v1/candles'
    url = 'https://api-fxpractice.oanda.com/v1/candles'
    res = requests.get(url, headers=headers, params=params, timeout=timeout)
    if res.status_code!=200:
        print('status:{}\nurl:{}\nheaders:{}\nparams:{}'.format(
              res.status_code, res.url, headers, params))
        return None
    df = DataFrame(res.json()['candles'])
    df = df.set_index(to_dt(df)).drop('time', axis=1)
    df = df.loc[:, ['Ask' not in i for i in df.columns]]
    df.columns = df.columns.map(lambda x: x.replace('Bid', '').capitalize())
    df.index.name = 'Date'
    return df[['Open', 'High', 'Low', 'Close', 'Volume', 'Complete']]

oanda_df = oanda_ohlc('EURUSD', tf='H1', start='20170101', end='20170601')
In [17]:
print(oanda_df.shape)
oanda_df.head(3)
(2561, 6)
Out[17]:
Open High Low Close Volume Complete
Date
2017-01-03 08:00:00 1.04656 1.04699 1.04552 1.04567 974 True
2017-01-03 09:00:00 1.04569 1.04705 1.04564 1.04656 481 True
2017-01-03 10:00:00 1.04657 1.04800 1.04638 1.04750 664 True
In [18]:
oanda_df['Close'].plot(figsize=(15,3))
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x105e2550>

とれたヾ(´・ω・`)ノ゙

In [ ]:
 

4. ↑までとはぜんぜん関係ないけど、pandasで板のgrouping

In [19]:
# 板を取得
asks, bids = (lambda x:(x['asks'],x['bids']))(requests.get('https://api.bitflyer.jp/v1/board').json())
print('len', len(asks), len(bids))
print('asks', asks[:2])
print('bids', bids[:2])
len 1423 2221
asks [{'size': 0.0759924, 'price': 424763.0}, {'size': 29.90325023, 'price': 425000.0}]
bids [{'size': 1.9242076, 'price': 424756.0}, {'size': 0.48, 'price': 424503.0}]
In [20]:
# Seriesに変換
f = lambda x: dict(zip(['index', 'data'], zip(*[[i['price'], i['size']] for i in x])))
ask = Series(**f(asks)).sort_index(ascending=True)
bid = Series(**f(bids)).sort_index(ascending=False)
print('asks\n', ask.head(3))
print('\nbids\n', bid.head(3))
asks
 424763.0     0.075992
425000.0    29.903250
425110.0    28.000000
dtype: float64

bids
 424756.0    1.924208
424503.0    0.480000
424502.0    0.915442
dtype: float64

ふむふむ(´・ω・`)

これを

In [21]:
def grouping(book, is_ask=True, gv=100, bins=None, dropna=True):
    """
    args: book: pd.Series(data:'size', index:'price')
          is_ask: bidの板のときはFalseにする
          gv: グルーピングする単位
          bins: 少なくすると計算が速い
          dropna: 板がない部分を除去するか
    """
    idx = book.index.values
    max_ = (lambda v: v-v%gv+gv - (gv if is_ask and (v%gv==0) else 0))(idx.max())
    min_ = (lambda v: v-v%gv    - (gv if is_ask and (v%gv==0) else 0))(idx.min())
    bin_ = np.linspace(min_, max_, int((max_-min_)/gv+1))
    if bins:
        bin_ = bin_[:bins+1] if is_ask else bin_[-(bins+1):]
    cut = pd.cut(idx, bin_, right=is_ask)
    gb_idx = bin_[1:] if is_ask else bin_[:-1]
    gb_book = pd.Series(book.groupby(cut).sum().values, index=gb_idx)
    gb_book.sort_index(ascending=is_ask, inplace=True)
    if dropna:
        gb_book.dropna(inplace=True)
    return gb_book
In [22]:
g_ask = grouping(ask, True, 500, 10, True)
g_bid = grouping(bid, False, 500, 10, True)
print('asks\n', g_ask.head(3))
print('\nbids\n', g_bid.head(3))
asks
 425000.0    29.979243
425500.0    58.504000
426000.0    43.669790
dtype: float64

bids
 424500.0     5.504649
424000.0     1.898000
423500.0    52.982025
dtype: float64

そして思ったより時間かかる(´・ω・`)

In [23]:
%time g_ask = grouping(ask, True, 500, None, True)
%time g_bid = grouping(bid, False, 500, None, True)
Wall time: 65 ms
Wall time: 35 ms

numpyでやってみることにする(´・ω・`)
※これは今日書いた なんか思ったよりてこずった (^ω ^;)

In [24]:
def grouping_np(book, is_ask=True, gv=100, bins=None, dropna=True):
    idx = book.index.values
    size = book.values
    max_ = (lambda v: v-v%gv+gv - (gv if is_ask and (v%gv==0) else 0))(idx.max())
    min_ = (lambda v: v-v%gv    - (gv if is_ask and (v%gv==0) else 0))(idx.min())
    bin_ = np.linspace(min_, max_, int((max_-min_)/gv+1))
    if bins:
        bin_ = bin_[:bins+1] if is_ask else bin_[-(bins+1):]
        mask = (idx>=bin_.min()) & (idx<=bin_.max())
        idx = idx[mask]
        size = size[mask]
    bin_indice = np.digitize(idx, bin_, right=is_ask)
    gb_idx = bin_[1:] if is_ask else bin_[:-1]
    gb_book = Series(np.bincount(bin_indice-1, weights=size, minlength=len(gb_idx)), index=gb_idx)
    gb_book.sort_index(ascending=is_ask, inplace=True)
    if dropna:
        gb_book = gb_book.loc[gb_book.values>0]
    else:
        gb_book[gb_book.values==0] = np.nan # 一応さっきのと同じになるようにnanをいれとく
    return gb_book

%time g_ask_np = grouping_np(ask, True, 500, None, True)
%time g_bid_np = grouping_np(bid, False, 500, None, True)
Wall time: 0 ns
Wall time: 1e+03 µs

わーい!はやくなった!ヾ(´・ω・`)ノ゙

板ぽくしてみます

In [25]:
df = DataFrame({'asks': g_ask_np.iloc[:5], 'bids':g_bid_np.iloc[:5]})
df.index.name = 'price'
df = df.reset_index()[['asks', 'price', 'bids']].sort_values('price', ascending=False)
df['cum_asks'] = df['asks'][::-1].cumsum()
df['cum_bids'] = df['bids'].cumsum()
df = df['cum_asks asks price bids cum_bids'.split()]
df[df.isnull()] = ''
df
Out[25]:
cum_asks asks price bids cum_bids
9 251.156 71.7404 427000.0
8 179.416 47.2629 426500.0
7 132.153 43.6698 426000.0
6 88.4832 58.504 425500.0
5 29.9792 29.9792 425000.0
4 424500.0 5.50465 5.50465
3 424000.0 1.898 7.40265
2 423500.0 52.982 60.3847
1 423000.0 57.4 117.785
0 422500.0 58.7923 176.577

つかれたー(´・ω・`)ノ