mirror of
https://github.com/c9s/bbgo.git
synced 2024-11-26 16:55:15 +00:00
feature: add entropy, cross entropy, sigmoid, softmax, and logistic regression
This commit is contained in:
parent
46d6ecc663
commit
0e64a14d7f
|
@ -94,6 +94,10 @@ type SeriesExtend interface {
|
||||||
Covariance(b Series, length int) float64
|
Covariance(b Series, length int) float64
|
||||||
Correlation(b Series, length int, method ...CorrFunc) float64
|
Correlation(b Series, length int, method ...CorrFunc) float64
|
||||||
Rank(length int) SeriesExtend
|
Rank(length int) SeriesExtend
|
||||||
|
Sigmoid() SeriesExtend
|
||||||
|
Softmax(window int) SeriesExtend
|
||||||
|
Entropy(window int) float64
|
||||||
|
CrossEntropy(b Series, window int) float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type SeriesBase struct {
|
type SeriesBase struct {
|
||||||
|
@ -524,7 +528,69 @@ var _ Series = &MulSeriesResult{}
|
||||||
// if limit is given, will only calculate the first limit numbers (a.Index[0..limit])
|
// if limit is given, will only calculate the first limit numbers (a.Index[0..limit])
|
||||||
// otherwise will operate on all elements
|
// otherwise will operate on all elements
|
||||||
func Dot(a interface{}, b interface{}, limit ...int) float64 {
|
func Dot(a interface{}, b interface{}, limit ...int) float64 {
|
||||||
return Sum(Mul(a, b), limit...)
|
var aaf float64
|
||||||
|
var aas Series
|
||||||
|
var bbf float64
|
||||||
|
var bbs Series
|
||||||
|
var isaf, isbf bool
|
||||||
|
|
||||||
|
switch tp := a.(type) {
|
||||||
|
case float64:
|
||||||
|
aaf = tp
|
||||||
|
isaf = true
|
||||||
|
case Series:
|
||||||
|
aas = tp
|
||||||
|
isaf = false
|
||||||
|
default:
|
||||||
|
panic("input should be either Series or float64")
|
||||||
|
}
|
||||||
|
switch tp := b.(type) {
|
||||||
|
case float64:
|
||||||
|
bbf = tp
|
||||||
|
isbf = true
|
||||||
|
case Series:
|
||||||
|
bbs = tp
|
||||||
|
isbf = false
|
||||||
|
default:
|
||||||
|
panic("input should be either Series or float64")
|
||||||
|
|
||||||
|
}
|
||||||
|
l := 1
|
||||||
|
if len(limit) > 0 {
|
||||||
|
l = limit[0]
|
||||||
|
} else if isaf && isbf {
|
||||||
|
l = 1
|
||||||
|
} else {
|
||||||
|
if !isaf {
|
||||||
|
l = aas.Length()
|
||||||
|
}
|
||||||
|
if !isbf {
|
||||||
|
if l > bbs.Length() {
|
||||||
|
l = bbs.Length()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isaf && isbf {
|
||||||
|
return aaf * bbf * float64(l)
|
||||||
|
} else if isaf && !isbf {
|
||||||
|
sum := 0.
|
||||||
|
for i := 0; i < l; i++ {
|
||||||
|
sum += aaf * bbs.Index(i)
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
} else if !isaf && isbf {
|
||||||
|
sum := 0.
|
||||||
|
for i := 0; i < l; i++ {
|
||||||
|
sum += aas.Index(i) * bbf
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
} else {
|
||||||
|
sum := 0.
|
||||||
|
for i := 0; i < l; i++ {
|
||||||
|
sum += aas.Index(i) * bbs.Index(i)
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract elements from the Series to a float64 array, following the order of Index(0..limit)
|
// Extract elements from the Series to a float64 array, following the order of Index(0..limit)
|
||||||
|
@ -881,4 +947,175 @@ func Rolling(a Series, window int) *RollingResult {
|
||||||
return &RollingResult{a, window}
|
return &RollingResult{a, window}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SigmoidResult struct {
|
||||||
|
a Series
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SigmoidResult) Last() float64 {
|
||||||
|
return 1. / (1. + math.Exp(-s.a.Last()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SigmoidResult) Index(i int) float64 {
|
||||||
|
return 1. / (1. + math.Exp(-s.a.Index(i)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SigmoidResult) Length() int {
|
||||||
|
return s.a.Length()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sigmoid returns the input values in range of -1 to 1
|
||||||
|
// along the sigmoid or s-shaped curve.
|
||||||
|
// Commonly used in machine learning while training neural networks
|
||||||
|
// as an activation function.
|
||||||
|
func Sigmoid(a Series) SeriesExtend {
|
||||||
|
return NewSeries(&SigmoidResult{a})
|
||||||
|
}
|
||||||
|
|
||||||
|
// SoftMax returns the input value in the range of 0 to 1
|
||||||
|
// with sum of all the probabilities being equal to one.
|
||||||
|
// It is commonly used in machine learning neural networks.
|
||||||
|
// Will return Softmax SeriesExtend result based in latest [window] numbers from [a] Series
|
||||||
|
func Softmax(a Series, window int) SeriesExtend {
|
||||||
|
s := 0.0
|
||||||
|
max := Highest(a, window)
|
||||||
|
for i := 0; i < window; i++ {
|
||||||
|
s += math.Exp(a.Index(i) - max)
|
||||||
|
}
|
||||||
|
out := NewQueue(window)
|
||||||
|
for i := window - 1; i >= 0; i-- {
|
||||||
|
out.Update(math.Exp(a.Index(i)-max) / s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entropy computes the Shannon entropy of a distribution or the distance between
|
||||||
|
// two distributions. The natural logarithm is used.
|
||||||
|
// - sum(v * ln(v))
|
||||||
|
func Entropy(a Series, window int) (e float64) {
|
||||||
|
for i := 0; i < window; i++ {
|
||||||
|
v := a.Index(i)
|
||||||
|
if v != 0 {
|
||||||
|
e -= v * math.Log(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
// CrossEntropy computes the cross-entropy between the two distributions
|
||||||
|
func CrossEntropy(a, b Series, window int) (e float64) {
|
||||||
|
for i := 0; i < window; i++ {
|
||||||
|
v := a.Index(i)
|
||||||
|
if v != 0 {
|
||||||
|
e -= v * math.Log(b.Index(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
func sigmoid(z float64) float64 {
|
||||||
|
return 1. / (1. + math.Exp(-z))
|
||||||
|
}
|
||||||
|
|
||||||
|
func propagate(w []float64, gradient float64, x [][]float64, y []float64) (float64, []float64, float64) {
|
||||||
|
logloss_epoch := 0.0
|
||||||
|
var activations []float64
|
||||||
|
var dw []float64
|
||||||
|
m := len(y)
|
||||||
|
db := 0.0
|
||||||
|
for i, xx := range x {
|
||||||
|
result := 0.0
|
||||||
|
for j, ww := range w {
|
||||||
|
result += ww * xx[j]
|
||||||
|
}
|
||||||
|
a := sigmoid(result + gradient)
|
||||||
|
activations = append(activations, a)
|
||||||
|
logloss := a*math.Log1p(y[i]) + (1.-a)*math.Log1p(1-y[i])
|
||||||
|
logloss_epoch += logloss
|
||||||
|
|
||||||
|
db += a - y[i]
|
||||||
|
}
|
||||||
|
for j := range w {
|
||||||
|
err := 0.0
|
||||||
|
for i, xx := range x {
|
||||||
|
err_i := activations[i] - y[i]
|
||||||
|
err += err_i * xx[j]
|
||||||
|
}
|
||||||
|
err /= float64(m)
|
||||||
|
dw = append(dw, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cost := -(logloss_epoch / float64(len(x)))
|
||||||
|
db /= float64(m)
|
||||||
|
return cost, dw, db
|
||||||
|
}
|
||||||
|
|
||||||
|
func LogisticRegression(x []Series, y Series, lookback, iterations int, learningRate float64) *LogisticRegressionModel {
|
||||||
|
features := len(x)
|
||||||
|
if features == 0 {
|
||||||
|
panic("no feature to train")
|
||||||
|
}
|
||||||
|
w := make([]float64, features)
|
||||||
|
if lookback > x[0].Length() {
|
||||||
|
lookback = x[0].Length()
|
||||||
|
}
|
||||||
|
xx := make([][]float64, lookback)
|
||||||
|
for i := 0; i < lookback; i++ {
|
||||||
|
for j := 0; j < features; j++ {
|
||||||
|
xx[i] = append(xx[i], x[j].Index(lookback-i-1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
yy := Reverse(y, lookback)
|
||||||
|
|
||||||
|
b := 0.
|
||||||
|
for i := 0; i < iterations; i++ {
|
||||||
|
_, dw, db := propagate(w, b, xx, yy)
|
||||||
|
for j := range w {
|
||||||
|
w[j] = w[j] - (learningRate * dw[j])
|
||||||
|
}
|
||||||
|
b -= learningRate * db
|
||||||
|
}
|
||||||
|
return &LogisticRegressionModel{
|
||||||
|
Weight: w,
|
||||||
|
Gradient: b,
|
||||||
|
LearningRate: learningRate,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type LogisticRegressionModel struct {
|
||||||
|
Weight []float64
|
||||||
|
Gradient float64
|
||||||
|
LearningRate float64
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
// Might not be correct.
|
||||||
|
// Please double check before uncomment this
|
||||||
|
func (l *LogisticRegressionModel) Update(x []float64, y float64) {
|
||||||
|
z := 0.0
|
||||||
|
for i, w := l.Weight {
|
||||||
|
z += w * x[i]
|
||||||
|
}
|
||||||
|
a := sigmoid(z + l.Gradient)
|
||||||
|
//logloss := a * math.Log1p(y) + (1.-a)*math.Log1p(1-y)
|
||||||
|
db = a - y
|
||||||
|
var dw []float64
|
||||||
|
for j, ww := range l.Weight {
|
||||||
|
err := db * x[j]
|
||||||
|
dw = append(dw, err)
|
||||||
|
}
|
||||||
|
for i := range l.Weight {
|
||||||
|
l.Weight[i] -= l.LearningRate * dw[i]
|
||||||
|
}
|
||||||
|
l.Gradient -= l.LearningRate * db
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
func (l *LogisticRegressionModel) Predict(x []float64) float64 {
|
||||||
|
z := 0.0
|
||||||
|
for i, w := range l.Weight {
|
||||||
|
z += w * x[i]
|
||||||
|
}
|
||||||
|
return sigmoid(z + l.Gradient)
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: ta.linreg
|
// TODO: ta.linreg
|
||||||
|
|
|
@ -2,6 +2,7 @@ package types
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"gonum.org/v1/gonum/stat"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,3 +85,62 @@ func TestSkew(t *testing.T) {
|
||||||
sk := Skew(&a, 4)
|
sk := Skew(&a, 4)
|
||||||
assert.InDelta(t, sk, 1.129338, 0.001)
|
assert.InDelta(t, sk, 1.129338, 0.001)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEntropy(t *testing.T) {
|
||||||
|
var a = Float64Slice{.2, .0, .6, .2}
|
||||||
|
e := stat.Entropy(a)
|
||||||
|
assert.InDelta(t, e, Entropy(&a, a.Length()), 0.0001)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCrossEntropy(t *testing.T) {
|
||||||
|
var a = Float64Slice{.2, .0, .6, .2}
|
||||||
|
var b = Float64Slice{.3, .6, .0, .1}
|
||||||
|
e := stat.CrossEntropy(a, b)
|
||||||
|
assert.InDelta(t, e, CrossEntropy(&a, &b, a.Length()), 0.0001)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSoftmax(t *testing.T) {
|
||||||
|
var a = Float64Slice{3.0, 1.0, 0.2}
|
||||||
|
out := Softmax(&a, a.Length())
|
||||||
|
r := Float64Slice{0.8360188027814407, 0.11314284146556013, 0.05083835575299916}
|
||||||
|
for i := 0; i < out.Length(); i++ {
|
||||||
|
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSigmoid(t *testing.T) {
|
||||||
|
a := Float64Slice{3.0, 1.0, 2.1}
|
||||||
|
out := Sigmoid(&a)
|
||||||
|
r := Float64Slice{0.9525741268224334, 0.7310585786300049, 0.8909031788043871}
|
||||||
|
for i := 0; i < out.Length(); i++ {
|
||||||
|
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// from https://en.wikipedia.org/wiki/Logistic_regression
|
||||||
|
func TestLogisticRegression(t *testing.T) {
|
||||||
|
a := []Float64Slice{{0.5, 0.75, 1., 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3., 3.25, 3.5, 4., 4.25, 4.5, 4.75, 5., 5.5}}
|
||||||
|
b := Float64Slice{0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1}
|
||||||
|
var x []Series
|
||||||
|
x = append(x, &a[0])
|
||||||
|
|
||||||
|
model := LogisticRegression(x, &b, a[0].Length(), 8000, 0.0009)
|
||||||
|
inputs := []float64{1., 2., 2.7, 3., 4., 5.}
|
||||||
|
results := []bool{false, false, true, true, true, true}
|
||||||
|
for i, x := range inputs {
|
||||||
|
input := []float64{x}
|
||||||
|
pred := model.Predict(input)
|
||||||
|
assert.Equal(t, pred > 0.5, results[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDot(t *testing.T) {
|
||||||
|
a := Float64Slice{7, 6, 5, 4, 3, 2, 1, 0}
|
||||||
|
b := Float64Slice{200., 201., 203., 204., 203., 199.}
|
||||||
|
out1 := Dot(&a, &b, 3)
|
||||||
|
assert.InDelta(t, out1, 611., 0.001)
|
||||||
|
out2 := Dot(&a, 3., 2)
|
||||||
|
assert.InDelta(t, out2, 3., 0.001)
|
||||||
|
out3 := Dot(3., &a, 2)
|
||||||
|
assert.InDelta(t, out2, out3, 0.001)
|
||||||
|
}
|
||||||
|
|
|
@ -124,3 +124,19 @@ func (s *SeriesBase) Correlation(b Series, length int, method ...CorrFunc) float
|
||||||
func (s *SeriesBase) Rank(length int) SeriesExtend {
|
func (s *SeriesBase) Rank(length int) SeriesExtend {
|
||||||
return Rank(s, length)
|
return Rank(s, length)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SeriesBase) Sigmoid() SeriesExtend {
|
||||||
|
return Sigmoid(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SeriesBase) Softmax(window int) SeriesExtend {
|
||||||
|
return Softmax(s, window)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SeriesBase) Entropy(window int) float64 {
|
||||||
|
return Entropy(s, window)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SeriesBase) CrossEntropy(b Series, window int) float64 {
|
||||||
|
return CrossEntropy(s, b, window)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user