%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import indicators as indi
csvpath = 'R:/WORK/DAT_ASCII_USDJPY_M1_2016.csv'
df = pd.read_csv(csvpath, sep=';', names=('Time','Open','High','Low','Close', ''),
index_col='Time', parse_dates=True)['Open High Low Close'.split()]
df.index += pd.offsets.Hour(7)
df.head()
Openのデータが1ティック目の価格のようなデータの場合は
Openを以下のようにしたほうがちょうど1分という意味では正しいか
# df['Open'] = df['Close'].shift()
# df.dropna(inplace=True)
# df.head()
これには簡単に改善できるのにちょっと遅い部分があった
それは、シグナル計算の部分で shift() を使ったときに
先頭にnanが入って型がObject型になってしまって
calc_result()の(df['Hsig']&(df['ans']==1)のような、
boolならもっと高速に計算できる部分のせいのようだった
(たぶん おそらく (´・ω・`))
def calc_answer(df, draw_range=0.0):
df['ans'] = 0
df['ans'] = df['ans'].mask(df['Close']>df['Open']+draw_range, 1)
df['ans'] = df['ans'].mask(df['Close']<df['Open']-draw_range, -1)
return df
def calc_rsi_signal(df, period=14, leveldiff=20):
rsi = indi.iRSI(df, period)
df['Hsig'] = (rsi<50-leveldiff).shift()
df['Lsig'] = (rsi>50+leveldiff).shift()
return df
def calc_result(df):
df['result'] = None
df['result'] = df['result'].mask((df['Hsig']&(df['ans']==1)) | (df['Lsig']&(df['ans']==-1)), 1)
df['result'] = df['result'].mask((df['Hsig']&(df['ans']!=1)) | (df['Lsig']&(df['ans']!=-1)), -1)
df['result'] = df['result'].mask((df['Hsig']|df['Lsig']) & (df['ans']==0), 0)
return df
def summary(df):
bars = len(df)
n = len(df['result'].dropna())
win = (df['result']==1).sum()
loss = (df['result']==-1).sum()
draw = (df['result']==0).sum()
print('bars : {}'.format(bars))
print('N : {}'.format(n))
print('win : {}({:.2f}%)({:.2f}%)'.format(win, win/n*100, win/(win+loss)*100))
print('loss : {}({:.2f}%)({:.2f}%)'.format(loss, loss/n*100, loss/(win+loss)*100))
print('draw : {}({:.2f}%)'.format(draw, draw/n*100))
print('total: {}'.format(win-loss))
def plot(df, payout=1):
df['result'].mask(df['result']==1, payout).dropna().cumsum().plot(figsize=(15,5))
%time calc_answer(df)
%time calc_rsi_signal(df)
%time calc_result(df)
summary(df)
plot(df, 0.8)
print(df['Hsig'].dtypes)
print(df['Lsig'].dtypes)
print('↓こういうことなんですね')
s = pd.Series(np.ones(3, dtype=bool))
print(s.dtypes)
print(s.shift().dtypes)
print(s.shift().fillna(False).dtypes)
calc_result()で集計前にnanをFalseで埋めた
まえのコードだと350msくらいかかってた箇所が
80msくらいになったヾ(´・ω・`)ノ゙
それとついでにspread HighLowのルールを勘違いしていたようなので
calc_answerで答えの配列を計算する部分も直した
(とはいえルールそんなに細かく調べてない(´・ω・`))
def calc_answer(df, spread=0.000):
df['ansH'] = 0
df['ansH'] = df['ansH'].mask(df['Close']>df['Open']+spread, 1)
df['ansH'] = df['ansH'].mask(df['Close']<df['Open']+spread, -1)
df['ansL'] = 0
df['ansL'] = df['ansL'].mask(df['Close']<df['Open']-spread, 1)
df['ansL'] = df['ansL'].mask(df['Close']>df['Open']-spread, -1)
return df
def calc_rsi_signal(df, period=14, leveldiff=20):
rsi = indi.iRSI(df, period)
df['Hsig'] = (rsi<50-leveldiff).shift()
df['Lsig'] = (rsi>50+leveldiff).shift()
return df
def calc_result(df):
df['Hsig'] = df['Hsig'].fillna(False)
df['Lsig'] = df['Lsig'].fillna(False)
df['result'] = None
df['result'] = df['result'].mask((df['Hsig']&(df['ansH']== 1)) | (df['Lsig']&(df['ansL']== 1)), 1)
df['result'] = df['result'].mask((df['Hsig']&(df['ansH']==-1)) | (df['Lsig']&(df['ansL']==-1)), -1)
df['result'] = df['result'].mask((df['Hsig']&(df['ansH']== 0)) | (df['Lsig']&(df['ansL']== 0)), 0)
return df
def summary(df):
bars = len(df)
n = len(df['result'].dropna())
win = (df['result']==1).sum()
loss = (df['result']==-1).sum()
draw = (df['result']==0).sum()
print('bars : {}'.format(bars))
print('N : {}'.format(n))
print('win : {}({:.2f}%)({:.2f}%)'.format(win, win/n*100, win/(win+loss)*100))
print('loss : {}({:.2f}%)({:.2f}%)'.format(loss, loss/n*100, loss/(win+loss)*100))
print('draw : {}({:.2f}%)'.format(draw, draw/n*100))
print('total: {}'.format(win-loss))
def plot(df, payout=1):
df['result'].dropna().astype(float).mask(df['result']==1, payout).cumsum().plot(figsize=(15,5))
%time calc_answer(df, 0.000)
%time calc_rsi_signal(df)
%time calc_result(df)
summary(df)
plot(df, 0.8)
class BOTester(object):
def __init__(self, df, spread=0.000):
self.df = df
self.spread = spread
self.Close = self.df['Close'].values
self.Open = self.df['Open'].values
self.NO_ENTRY = -2
def calc_answer(self):
self.ansH = np.zeros(len(self.df))
self.ansH = np.where(self.Close>self.Open+self.spread, 1, self.ansH)
self.ansH = np.where(self.Close<self.Open+self.spread, -1, self.ansH)
self.ansL = np.zeros(len(self.df))
self.ansL = np.where(self.Close<self.Open-self.spread, 1, self.ansL)
self.ansL = np.where(self.Close>self.Open-self.spread, -1, self.ansL)
def calc_rsi_signal(self, period=14, leveldiff=20):
rsi = indi.iRSI(self.df, period)
self.Hsig = (rsi<50-leveldiff)
self.Lsig = (rsi>50+leveldiff)
def calc_result(self):
self.Hsig = np.r_[[False], self.Hsig[:-1]]
self.Lsig = np.r_[[False], self.Lsig[:-1]]
self.result = np.full(len(self.df), self.NO_ENTRY, dtype=np.int64)
self.result = np.where((self.Hsig&(self.ansH== 1))|(self.Lsig&(self.ansL== 1)), 1, self.result)
self.result = np.where((self.Hsig&(self.ansH==-1))|(self.Lsig&(self.ansL==-1)), -1, self.result)
self.result = np.where((self.Hsig&(self.ansH== 0))|(self.Lsig&(self.ansL== 0)), 0, self.result)
self.result = pd.Series(self.result, index=self.df.index)
self.result = self.result[self.result!=self.NO_ENTRY]
def summary(self):
bars = len(self.df)
n = len(self.result)
win = (self.result==1).sum()
loss = (self.result==-1).sum()
draw = (self.result==0).sum()
print('bars : {}'.format(bars))
print('N : {}'.format(n))
print('win : {}({:.2f}%)({:.2f}%)'.format(win, win/n*100, win/(win+loss)*100))
print('loss : {}({:.2f}%)({:.2f}%)'.format(loss, loss/n*100, loss/(win+loss)*100))
print('draw : {}({:.2f}%)'.format(draw, draw/n*100))
print('total: {}'.format(win-loss))
def plot(self, payout=1):
self.result.mask(self.result==1, payout).cumsum().plot(figsize=(15,5))
%time bt = BOTester(df, 0.000)
%time bt.calc_answer()
%time bt.calc_rsi_signal()
%time bt.calc_result()
bt.summary()
bt.plot(0.8)
calc_answerも少し速くなった
calc_rsi_signalはshiftしないぶん速くなった
calc_resultも速くなった(df.maskじゃなくてnp.whereにしたからか?)
a = np.arange(3000000)
%timeit b = np.where(a%2==0, -1, a)
a = pd.Series(np.arange(3000000))
%timeit b = np.where(a%2==0, -1, a)
a = pd.Series(np.arange(3000000))
%timeit b = a.mask(a%2==0, -1)
numpy配列でもSeriesでもnp.whereを使ったときの速度差はない感じですが
maskは少し時間がかかるっぽいですね
indexを保持しなくていいならnp.whereが速くていいか
とはいえ繰り返してやる処理でないならそんなに気にしないですけどね(´・ω・`)