feature: add entropy, cross entropy, sigmoid, softmax, and logistic regression

This commit is contained in:
zenix 2022-07-08 16:58:59 +09:00
parent 46d6ecc663
commit 0e64a14d7f
3 changed files with 314 additions and 1 deletions

View File

@ -94,6 +94,10 @@ type SeriesExtend interface {
Covariance(b Series, length int) float64 Covariance(b Series, length int) float64
Correlation(b Series, length int, method ...CorrFunc) float64 Correlation(b Series, length int, method ...CorrFunc) float64
Rank(length int) SeriesExtend Rank(length int) SeriesExtend
Sigmoid() SeriesExtend
Softmax(window int) SeriesExtend
Entropy(window int) float64
CrossEntropy(b Series, window int) float64
} }
type SeriesBase struct { type SeriesBase struct {
@ -524,7 +528,69 @@ var _ Series = &MulSeriesResult{}
// if limit is given, will only calculate the first limit numbers (a.Index[0..limit]) // if limit is given, will only calculate the first limit numbers (a.Index[0..limit])
// otherwise will operate on all elements // otherwise will operate on all elements
func Dot(a interface{}, b interface{}, limit ...int) float64 { func Dot(a interface{}, b interface{}, limit ...int) float64 {
return Sum(Mul(a, b), limit...) var aaf float64
var aas Series
var bbf float64
var bbs Series
var isaf, isbf bool
switch tp := a.(type) {
case float64:
aaf = tp
isaf = true
case Series:
aas = tp
isaf = false
default:
panic("input should be either Series or float64")
}
switch tp := b.(type) {
case float64:
bbf = tp
isbf = true
case Series:
bbs = tp
isbf = false
default:
panic("input should be either Series or float64")
}
l := 1
if len(limit) > 0 {
l = limit[0]
} else if isaf && isbf {
l = 1
} else {
if !isaf {
l = aas.Length()
}
if !isbf {
if l > bbs.Length() {
l = bbs.Length()
}
}
}
if isaf && isbf {
return aaf * bbf * float64(l)
} else if isaf && !isbf {
sum := 0.
for i := 0; i < l; i++ {
sum += aaf * bbs.Index(i)
}
return sum
} else if !isaf && isbf {
sum := 0.
for i := 0; i < l; i++ {
sum += aas.Index(i) * bbf
}
return sum
} else {
sum := 0.
for i := 0; i < l; i++ {
sum += aas.Index(i) * bbs.Index(i)
}
return sum
}
} }
// Extract elements from the Series to a float64 array, following the order of Index(0..limit) // Extract elements from the Series to a float64 array, following the order of Index(0..limit)
@ -881,4 +947,175 @@ func Rolling(a Series, window int) *RollingResult {
return &RollingResult{a, window} return &RollingResult{a, window}
} }
type SigmoidResult struct {
a Series
}
func (s *SigmoidResult) Last() float64 {
return 1. / (1. + math.Exp(-s.a.Last()))
}
func (s *SigmoidResult) Index(i int) float64 {
return 1. / (1. + math.Exp(-s.a.Index(i)))
}
func (s *SigmoidResult) Length() int {
return s.a.Length()
}
// Sigmoid returns the input values in range of -1 to 1
// along the sigmoid or s-shaped curve.
// Commonly used in machine learning while training neural networks
// as an activation function.
func Sigmoid(a Series) SeriesExtend {
return NewSeries(&SigmoidResult{a})
}
// SoftMax returns the input value in the range of 0 to 1
// with sum of all the probabilities being equal to one.
// It is commonly used in machine learning neural networks.
// Will return Softmax SeriesExtend result based in latest [window] numbers from [a] Series
func Softmax(a Series, window int) SeriesExtend {
s := 0.0
max := Highest(a, window)
for i := 0; i < window; i++ {
s += math.Exp(a.Index(i) - max)
}
out := NewQueue(window)
for i := window - 1; i >= 0; i-- {
out.Update(math.Exp(a.Index(i)-max) / s)
}
return out
}
// Entropy computes the Shannon entropy of a distribution or the distance between
// two distributions. The natural logarithm is used.
// - sum(v * ln(v))
func Entropy(a Series, window int) (e float64) {
for i := 0; i < window; i++ {
v := a.Index(i)
if v != 0 {
e -= v * math.Log(v)
}
}
return e
}
// CrossEntropy computes the cross-entropy between the two distributions
func CrossEntropy(a, b Series, window int) (e float64) {
for i := 0; i < window; i++ {
v := a.Index(i)
if v != 0 {
e -= v * math.Log(b.Index(i))
}
}
return e
}
func sigmoid(z float64) float64 {
return 1. / (1. + math.Exp(-z))
}
func propagate(w []float64, gradient float64, x [][]float64, y []float64) (float64, []float64, float64) {
logloss_epoch := 0.0
var activations []float64
var dw []float64
m := len(y)
db := 0.0
for i, xx := range x {
result := 0.0
for j, ww := range w {
result += ww * xx[j]
}
a := sigmoid(result + gradient)
activations = append(activations, a)
logloss := a*math.Log1p(y[i]) + (1.-a)*math.Log1p(1-y[i])
logloss_epoch += logloss
db += a - y[i]
}
for j := range w {
err := 0.0
for i, xx := range x {
err_i := activations[i] - y[i]
err += err_i * xx[j]
}
err /= float64(m)
dw = append(dw, err)
}
cost := -(logloss_epoch / float64(len(x)))
db /= float64(m)
return cost, dw, db
}
func LogisticRegression(x []Series, y Series, lookback, iterations int, learningRate float64) *LogisticRegressionModel {
features := len(x)
if features == 0 {
panic("no feature to train")
}
w := make([]float64, features)
if lookback > x[0].Length() {
lookback = x[0].Length()
}
xx := make([][]float64, lookback)
for i := 0; i < lookback; i++ {
for j := 0; j < features; j++ {
xx[i] = append(xx[i], x[j].Index(lookback-i-1))
}
}
yy := Reverse(y, lookback)
b := 0.
for i := 0; i < iterations; i++ {
_, dw, db := propagate(w, b, xx, yy)
for j := range w {
w[j] = w[j] - (learningRate * dw[j])
}
b -= learningRate * db
}
return &LogisticRegressionModel{
Weight: w,
Gradient: b,
LearningRate: learningRate,
}
}
type LogisticRegressionModel struct {
Weight []float64
Gradient float64
LearningRate float64
}
/*
// Might not be correct.
// Please double check before uncomment this
func (l *LogisticRegressionModel) Update(x []float64, y float64) {
z := 0.0
for i, w := l.Weight {
z += w * x[i]
}
a := sigmoid(z + l.Gradient)
//logloss := a * math.Log1p(y) + (1.-a)*math.Log1p(1-y)
db = a - y
var dw []float64
for j, ww := range l.Weight {
err := db * x[j]
dw = append(dw, err)
}
for i := range l.Weight {
l.Weight[i] -= l.LearningRate * dw[i]
}
l.Gradient -= l.LearningRate * db
}
*/
func (l *LogisticRegressionModel) Predict(x []float64) float64 {
z := 0.0
for i, w := range l.Weight {
z += w * x[i]
}
return sigmoid(z + l.Gradient)
}
// TODO: ta.linreg // TODO: ta.linreg

View File

@ -2,6 +2,7 @@ package types
import ( import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"gonum.org/v1/gonum/stat"
"testing" "testing"
) )
@ -84,3 +85,62 @@ func TestSkew(t *testing.T) {
sk := Skew(&a, 4) sk := Skew(&a, 4)
assert.InDelta(t, sk, 1.129338, 0.001) assert.InDelta(t, sk, 1.129338, 0.001)
} }
func TestEntropy(t *testing.T) {
var a = Float64Slice{.2, .0, .6, .2}
e := stat.Entropy(a)
assert.InDelta(t, e, Entropy(&a, a.Length()), 0.0001)
}
func TestCrossEntropy(t *testing.T) {
var a = Float64Slice{.2, .0, .6, .2}
var b = Float64Slice{.3, .6, .0, .1}
e := stat.CrossEntropy(a, b)
assert.InDelta(t, e, CrossEntropy(&a, &b, a.Length()), 0.0001)
}
func TestSoftmax(t *testing.T) {
var a = Float64Slice{3.0, 1.0, 0.2}
out := Softmax(&a, a.Length())
r := Float64Slice{0.8360188027814407, 0.11314284146556013, 0.05083835575299916}
for i := 0; i < out.Length(); i++ {
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
}
}
func TestSigmoid(t *testing.T) {
a := Float64Slice{3.0, 1.0, 2.1}
out := Sigmoid(&a)
r := Float64Slice{0.9525741268224334, 0.7310585786300049, 0.8909031788043871}
for i := 0; i < out.Length(); i++ {
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
}
}
// from https://en.wikipedia.org/wiki/Logistic_regression
func TestLogisticRegression(t *testing.T) {
a := []Float64Slice{{0.5, 0.75, 1., 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3., 3.25, 3.5, 4., 4.25, 4.5, 4.75, 5., 5.5}}
b := Float64Slice{0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1}
var x []Series
x = append(x, &a[0])
model := LogisticRegression(x, &b, a[0].Length(), 8000, 0.0009)
inputs := []float64{1., 2., 2.7, 3., 4., 5.}
results := []bool{false, false, true, true, true, true}
for i, x := range inputs {
input := []float64{x}
pred := model.Predict(input)
assert.Equal(t, pred > 0.5, results[i])
}
}
func TestDot(t *testing.T) {
a := Float64Slice{7, 6, 5, 4, 3, 2, 1, 0}
b := Float64Slice{200., 201., 203., 204., 203., 199.}
out1 := Dot(&a, &b, 3)
assert.InDelta(t, out1, 611., 0.001)
out2 := Dot(&a, 3., 2)
assert.InDelta(t, out2, 3., 0.001)
out3 := Dot(3., &a, 2)
assert.InDelta(t, out2, out3, 0.001)
}

View File

@ -124,3 +124,19 @@ func (s *SeriesBase) Correlation(b Series, length int, method ...CorrFunc) float
func (s *SeriesBase) Rank(length int) SeriesExtend { func (s *SeriesBase) Rank(length int) SeriesExtend {
return Rank(s, length) return Rank(s, length)
} }
func (s *SeriesBase) Sigmoid() SeriesExtend {
return Sigmoid(s)
}
func (s *SeriesBase) Softmax(window int) SeriesExtend {
return Softmax(s, window)
}
func (s *SeriesBase) Entropy(window int) float64 {
return Entropy(s, window)
}
func (s *SeriesBase) CrossEntropy(b Series, window int) float64 {
return CrossEntropy(s, b, window)
}