mirror of
https://github.com/c9s/bbgo.git
synced 2024-11-10 09:11:55 +00:00
Merge pull request #809 from zenixls2/feature/logistic_regression
feature: logistic regression
This commit is contained in:
commit
e633cedd3c
|
@ -94,6 +94,10 @@ type SeriesExtend interface {
|
|||
Covariance(b Series, length int) float64
|
||||
Correlation(b Series, length int, method ...CorrFunc) float64
|
||||
Rank(length int) SeriesExtend
|
||||
Sigmoid() SeriesExtend
|
||||
Softmax(window int) SeriesExtend
|
||||
Entropy(window int) float64
|
||||
CrossEntropy(b Series, window int) float64
|
||||
}
|
||||
|
||||
type SeriesBase struct {
|
||||
|
@ -524,7 +528,69 @@ var _ Series = &MulSeriesResult{}
|
|||
// if limit is given, will only calculate the first limit numbers (a.Index[0..limit])
|
||||
// otherwise will operate on all elements
|
||||
func Dot(a interface{}, b interface{}, limit ...int) float64 {
|
||||
return Sum(Mul(a, b), limit...)
|
||||
var aaf float64
|
||||
var aas Series
|
||||
var bbf float64
|
||||
var bbs Series
|
||||
var isaf, isbf bool
|
||||
|
||||
switch tp := a.(type) {
|
||||
case float64:
|
||||
aaf = tp
|
||||
isaf = true
|
||||
case Series:
|
||||
aas = tp
|
||||
isaf = false
|
||||
default:
|
||||
panic("input should be either Series or float64")
|
||||
}
|
||||
switch tp := b.(type) {
|
||||
case float64:
|
||||
bbf = tp
|
||||
isbf = true
|
||||
case Series:
|
||||
bbs = tp
|
||||
isbf = false
|
||||
default:
|
||||
panic("input should be either Series or float64")
|
||||
|
||||
}
|
||||
l := 1
|
||||
if len(limit) > 0 {
|
||||
l = limit[0]
|
||||
} else if isaf && isbf {
|
||||
l = 1
|
||||
} else {
|
||||
if !isaf {
|
||||
l = aas.Length()
|
||||
}
|
||||
if !isbf {
|
||||
if l > bbs.Length() {
|
||||
l = bbs.Length()
|
||||
}
|
||||
}
|
||||
}
|
||||
if isaf && isbf {
|
||||
return aaf * bbf * float64(l)
|
||||
} else if isaf && !isbf {
|
||||
sum := 0.
|
||||
for i := 0; i < l; i++ {
|
||||
sum += aaf * bbs.Index(i)
|
||||
}
|
||||
return sum
|
||||
} else if !isaf && isbf {
|
||||
sum := 0.
|
||||
for i := 0; i < l; i++ {
|
||||
sum += aas.Index(i) * bbf
|
||||
}
|
||||
return sum
|
||||
} else {
|
||||
sum := 0.
|
||||
for i := 0; i < l; i++ {
|
||||
sum += aas.Index(i) * bbs.Index(i)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
}
|
||||
|
||||
// Extract elements from the Series to a float64 array, following the order of Index(0..limit)
|
||||
|
@ -881,4 +947,175 @@ func Rolling(a Series, window int) *RollingResult {
|
|||
return &RollingResult{a, window}
|
||||
}
|
||||
|
||||
type SigmoidResult struct {
|
||||
a Series
|
||||
}
|
||||
|
||||
func (s *SigmoidResult) Last() float64 {
|
||||
return 1. / (1. + math.Exp(-s.a.Last()))
|
||||
}
|
||||
|
||||
func (s *SigmoidResult) Index(i int) float64 {
|
||||
return 1. / (1. + math.Exp(-s.a.Index(i)))
|
||||
}
|
||||
|
||||
func (s *SigmoidResult) Length() int {
|
||||
return s.a.Length()
|
||||
}
|
||||
|
||||
// Sigmoid returns the input values in range of -1 to 1
|
||||
// along the sigmoid or s-shaped curve.
|
||||
// Commonly used in machine learning while training neural networks
|
||||
// as an activation function.
|
||||
func Sigmoid(a Series) SeriesExtend {
|
||||
return NewSeries(&SigmoidResult{a})
|
||||
}
|
||||
|
||||
// SoftMax returns the input value in the range of 0 to 1
|
||||
// with sum of all the probabilities being equal to one.
|
||||
// It is commonly used in machine learning neural networks.
|
||||
// Will return Softmax SeriesExtend result based in latest [window] numbers from [a] Series
|
||||
func Softmax(a Series, window int) SeriesExtend {
|
||||
s := 0.0
|
||||
max := Highest(a, window)
|
||||
for i := 0; i < window; i++ {
|
||||
s += math.Exp(a.Index(i) - max)
|
||||
}
|
||||
out := NewQueue(window)
|
||||
for i := window - 1; i >= 0; i-- {
|
||||
out.Update(math.Exp(a.Index(i)-max) / s)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Entropy computes the Shannon entropy of a distribution or the distance between
|
||||
// two distributions. The natural logarithm is used.
|
||||
// - sum(v * ln(v))
|
||||
func Entropy(a Series, window int) (e float64) {
|
||||
for i := 0; i < window; i++ {
|
||||
v := a.Index(i)
|
||||
if v != 0 {
|
||||
e -= v * math.Log(v)
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// CrossEntropy computes the cross-entropy between the two distributions
|
||||
func CrossEntropy(a, b Series, window int) (e float64) {
|
||||
for i := 0; i < window; i++ {
|
||||
v := a.Index(i)
|
||||
if v != 0 {
|
||||
e -= v * math.Log(b.Index(i))
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
func sigmoid(z float64) float64 {
|
||||
return 1. / (1. + math.Exp(-z))
|
||||
}
|
||||
|
||||
func propagate(w []float64, gradient float64, x [][]float64, y []float64) (float64, []float64, float64) {
|
||||
logloss_epoch := 0.0
|
||||
var activations []float64
|
||||
var dw []float64
|
||||
m := len(y)
|
||||
db := 0.0
|
||||
for i, xx := range x {
|
||||
result := 0.0
|
||||
for j, ww := range w {
|
||||
result += ww * xx[j]
|
||||
}
|
||||
a := sigmoid(result + gradient)
|
||||
activations = append(activations, a)
|
||||
logloss := a*math.Log1p(y[i]) + (1.-a)*math.Log1p(1-y[i])
|
||||
logloss_epoch += logloss
|
||||
|
||||
db += a - y[i]
|
||||
}
|
||||
for j := range w {
|
||||
err := 0.0
|
||||
for i, xx := range x {
|
||||
err_i := activations[i] - y[i]
|
||||
err += err_i * xx[j]
|
||||
}
|
||||
err /= float64(m)
|
||||
dw = append(dw, err)
|
||||
}
|
||||
|
||||
cost := -(logloss_epoch / float64(len(x)))
|
||||
db /= float64(m)
|
||||
return cost, dw, db
|
||||
}
|
||||
|
||||
func LogisticRegression(x []Series, y Series, lookback, iterations int, learningRate float64) *LogisticRegressionModel {
|
||||
features := len(x)
|
||||
if features == 0 {
|
||||
panic("no feature to train")
|
||||
}
|
||||
w := make([]float64, features)
|
||||
if lookback > x[0].Length() {
|
||||
lookback = x[0].Length()
|
||||
}
|
||||
xx := make([][]float64, lookback)
|
||||
for i := 0; i < lookback; i++ {
|
||||
for j := 0; j < features; j++ {
|
||||
xx[i] = append(xx[i], x[j].Index(lookback-i-1))
|
||||
}
|
||||
}
|
||||
yy := Reverse(y, lookback)
|
||||
|
||||
b := 0.
|
||||
for i := 0; i < iterations; i++ {
|
||||
_, dw, db := propagate(w, b, xx, yy)
|
||||
for j := range w {
|
||||
w[j] = w[j] - (learningRate * dw[j])
|
||||
}
|
||||
b -= learningRate * db
|
||||
}
|
||||
return &LogisticRegressionModel{
|
||||
Weight: w,
|
||||
Gradient: b,
|
||||
LearningRate: learningRate,
|
||||
}
|
||||
}
|
||||
|
||||
type LogisticRegressionModel struct {
|
||||
Weight []float64
|
||||
Gradient float64
|
||||
LearningRate float64
|
||||
}
|
||||
|
||||
/*
|
||||
// Might not be correct.
|
||||
// Please double check before uncomment this
|
||||
func (l *LogisticRegressionModel) Update(x []float64, y float64) {
|
||||
z := 0.0
|
||||
for i, w := l.Weight {
|
||||
z += w * x[i]
|
||||
}
|
||||
a := sigmoid(z + l.Gradient)
|
||||
//logloss := a * math.Log1p(y) + (1.-a)*math.Log1p(1-y)
|
||||
db = a - y
|
||||
var dw []float64
|
||||
for j, ww := range l.Weight {
|
||||
err := db * x[j]
|
||||
dw = append(dw, err)
|
||||
}
|
||||
for i := range l.Weight {
|
||||
l.Weight[i] -= l.LearningRate * dw[i]
|
||||
}
|
||||
l.Gradient -= l.LearningRate * db
|
||||
}
|
||||
*/
|
||||
|
||||
func (l *LogisticRegressionModel) Predict(x []float64) float64 {
|
||||
z := 0.0
|
||||
for i, w := range l.Weight {
|
||||
z += w * x[i]
|
||||
}
|
||||
return sigmoid(z + l.Gradient)
|
||||
}
|
||||
|
||||
// TODO: ta.linreg
|
||||
|
|
|
@ -2,6 +2,7 @@ package types
|
|||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"gonum.org/v1/gonum/stat"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -84,3 +85,62 @@ func TestSkew(t *testing.T) {
|
|||
sk := Skew(&a, 4)
|
||||
assert.InDelta(t, sk, 1.129338, 0.001)
|
||||
}
|
||||
|
||||
func TestEntropy(t *testing.T) {
|
||||
var a = Float64Slice{.2, .0, .6, .2}
|
||||
e := stat.Entropy(a)
|
||||
assert.InDelta(t, e, Entropy(&a, a.Length()), 0.0001)
|
||||
}
|
||||
|
||||
func TestCrossEntropy(t *testing.T) {
|
||||
var a = Float64Slice{.2, .0, .6, .2}
|
||||
var b = Float64Slice{.3, .6, .0, .1}
|
||||
e := stat.CrossEntropy(a, b)
|
||||
assert.InDelta(t, e, CrossEntropy(&a, &b, a.Length()), 0.0001)
|
||||
}
|
||||
|
||||
func TestSoftmax(t *testing.T) {
|
||||
var a = Float64Slice{3.0, 1.0, 0.2}
|
||||
out := Softmax(&a, a.Length())
|
||||
r := Float64Slice{0.8360188027814407, 0.11314284146556013, 0.05083835575299916}
|
||||
for i := 0; i < out.Length(); i++ {
|
||||
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSigmoid(t *testing.T) {
|
||||
a := Float64Slice{3.0, 1.0, 2.1}
|
||||
out := Sigmoid(&a)
|
||||
r := Float64Slice{0.9525741268224334, 0.7310585786300049, 0.8909031788043871}
|
||||
for i := 0; i < out.Length(); i++ {
|
||||
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
|
||||
}
|
||||
}
|
||||
|
||||
// from https://en.wikipedia.org/wiki/Logistic_regression
|
||||
func TestLogisticRegression(t *testing.T) {
|
||||
a := []Float64Slice{{0.5, 0.75, 1., 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3., 3.25, 3.5, 4., 4.25, 4.5, 4.75, 5., 5.5}}
|
||||
b := Float64Slice{0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1}
|
||||
var x []Series
|
||||
x = append(x, &a[0])
|
||||
|
||||
model := LogisticRegression(x, &b, a[0].Length(), 8000, 0.0009)
|
||||
inputs := []float64{1., 2., 2.7, 3., 4., 5.}
|
||||
results := []bool{false, false, true, true, true, true}
|
||||
for i, x := range inputs {
|
||||
input := []float64{x}
|
||||
pred := model.Predict(input)
|
||||
assert.Equal(t, pred > 0.5, results[i])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDot(t *testing.T) {
|
||||
a := Float64Slice{7, 6, 5, 4, 3, 2, 1, 0}
|
||||
b := Float64Slice{200., 201., 203., 204., 203., 199.}
|
||||
out1 := Dot(&a, &b, 3)
|
||||
assert.InDelta(t, out1, 611., 0.001)
|
||||
out2 := Dot(&a, 3., 2)
|
||||
assert.InDelta(t, out2, 3., 0.001)
|
||||
out3 := Dot(3., &a, 2)
|
||||
assert.InDelta(t, out2, out3, 0.001)
|
||||
}
|
||||
|
|
|
@ -124,3 +124,19 @@ func (s *SeriesBase) Correlation(b Series, length int, method ...CorrFunc) float
|
|||
func (s *SeriesBase) Rank(length int) SeriesExtend {
|
||||
return Rank(s, length)
|
||||
}
|
||||
|
||||
func (s *SeriesBase) Sigmoid() SeriesExtend {
|
||||
return Sigmoid(s)
|
||||
}
|
||||
|
||||
func (s *SeriesBase) Softmax(window int) SeriesExtend {
|
||||
return Softmax(s, window)
|
||||
}
|
||||
|
||||
func (s *SeriesBase) Entropy(window int) float64 {
|
||||
return Entropy(s, window)
|
||||
}
|
||||
|
||||
func (s *SeriesBase) CrossEntropy(b Series, window int) float64 {
|
||||
return CrossEntropy(s, b, window)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user