Merge pull request #809 from zenixls2/feature/logistic_regression

feature: logistic regression
This commit is contained in:
Zenix 2022-07-09 17:27:20 +09:00 committed by GitHub
commit e633cedd3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 314 additions and 1 deletions

View File

@ -94,6 +94,10 @@ type SeriesExtend interface {
Covariance(b Series, length int) float64
Correlation(b Series, length int, method ...CorrFunc) float64
Rank(length int) SeriesExtend
Sigmoid() SeriesExtend
Softmax(window int) SeriesExtend
Entropy(window int) float64
CrossEntropy(b Series, window int) float64
}
type SeriesBase struct {
@ -524,7 +528,69 @@ var _ Series = &MulSeriesResult{}
// if limit is given, will only calculate the first limit numbers (a.Index[0..limit])
// otherwise will operate on all elements
func Dot(a interface{}, b interface{}, limit ...int) float64 {
return Sum(Mul(a, b), limit...)
var aaf float64
var aas Series
var bbf float64
var bbs Series
var isaf, isbf bool
switch tp := a.(type) {
case float64:
aaf = tp
isaf = true
case Series:
aas = tp
isaf = false
default:
panic("input should be either Series or float64")
}
switch tp := b.(type) {
case float64:
bbf = tp
isbf = true
case Series:
bbs = tp
isbf = false
default:
panic("input should be either Series or float64")
}
l := 1
if len(limit) > 0 {
l = limit[0]
} else if isaf && isbf {
l = 1
} else {
if !isaf {
l = aas.Length()
}
if !isbf {
if l > bbs.Length() {
l = bbs.Length()
}
}
}
if isaf && isbf {
return aaf * bbf * float64(l)
} else if isaf && !isbf {
sum := 0.
for i := 0; i < l; i++ {
sum += aaf * bbs.Index(i)
}
return sum
} else if !isaf && isbf {
sum := 0.
for i := 0; i < l; i++ {
sum += aas.Index(i) * bbf
}
return sum
} else {
sum := 0.
for i := 0; i < l; i++ {
sum += aas.Index(i) * bbs.Index(i)
}
return sum
}
}
// Extract elements from the Series to a float64 array, following the order of Index(0..limit)
@ -881,4 +947,175 @@ func Rolling(a Series, window int) *RollingResult {
return &RollingResult{a, window}
}
type SigmoidResult struct {
a Series
}
func (s *SigmoidResult) Last() float64 {
return 1. / (1. + math.Exp(-s.a.Last()))
}
func (s *SigmoidResult) Index(i int) float64 {
return 1. / (1. + math.Exp(-s.a.Index(i)))
}
func (s *SigmoidResult) Length() int {
return s.a.Length()
}
// Sigmoid returns the input values in range of -1 to 1
// along the sigmoid or s-shaped curve.
// Commonly used in machine learning while training neural networks
// as an activation function.
func Sigmoid(a Series) SeriesExtend {
return NewSeries(&SigmoidResult{a})
}
// SoftMax returns the input value in the range of 0 to 1
// with sum of all the probabilities being equal to one.
// It is commonly used in machine learning neural networks.
// Will return Softmax SeriesExtend result based in latest [window] numbers from [a] Series
func Softmax(a Series, window int) SeriesExtend {
s := 0.0
max := Highest(a, window)
for i := 0; i < window; i++ {
s += math.Exp(a.Index(i) - max)
}
out := NewQueue(window)
for i := window - 1; i >= 0; i-- {
out.Update(math.Exp(a.Index(i)-max) / s)
}
return out
}
// Entropy computes the Shannon entropy of a distribution or the distance between
// two distributions. The natural logarithm is used.
// - sum(v * ln(v))
func Entropy(a Series, window int) (e float64) {
for i := 0; i < window; i++ {
v := a.Index(i)
if v != 0 {
e -= v * math.Log(v)
}
}
return e
}
// CrossEntropy computes the cross-entropy between the two distributions
func CrossEntropy(a, b Series, window int) (e float64) {
for i := 0; i < window; i++ {
v := a.Index(i)
if v != 0 {
e -= v * math.Log(b.Index(i))
}
}
return e
}
func sigmoid(z float64) float64 {
return 1. / (1. + math.Exp(-z))
}
func propagate(w []float64, gradient float64, x [][]float64, y []float64) (float64, []float64, float64) {
logloss_epoch := 0.0
var activations []float64
var dw []float64
m := len(y)
db := 0.0
for i, xx := range x {
result := 0.0
for j, ww := range w {
result += ww * xx[j]
}
a := sigmoid(result + gradient)
activations = append(activations, a)
logloss := a*math.Log1p(y[i]) + (1.-a)*math.Log1p(1-y[i])
logloss_epoch += logloss
db += a - y[i]
}
for j := range w {
err := 0.0
for i, xx := range x {
err_i := activations[i] - y[i]
err += err_i * xx[j]
}
err /= float64(m)
dw = append(dw, err)
}
cost := -(logloss_epoch / float64(len(x)))
db /= float64(m)
return cost, dw, db
}
func LogisticRegression(x []Series, y Series, lookback, iterations int, learningRate float64) *LogisticRegressionModel {
features := len(x)
if features == 0 {
panic("no feature to train")
}
w := make([]float64, features)
if lookback > x[0].Length() {
lookback = x[0].Length()
}
xx := make([][]float64, lookback)
for i := 0; i < lookback; i++ {
for j := 0; j < features; j++ {
xx[i] = append(xx[i], x[j].Index(lookback-i-1))
}
}
yy := Reverse(y, lookback)
b := 0.
for i := 0; i < iterations; i++ {
_, dw, db := propagate(w, b, xx, yy)
for j := range w {
w[j] = w[j] - (learningRate * dw[j])
}
b -= learningRate * db
}
return &LogisticRegressionModel{
Weight: w,
Gradient: b,
LearningRate: learningRate,
}
}
type LogisticRegressionModel struct {
Weight []float64
Gradient float64
LearningRate float64
}
/*
// Might not be correct.
// Please double check before uncomment this
func (l *LogisticRegressionModel) Update(x []float64, y float64) {
z := 0.0
for i, w := l.Weight {
z += w * x[i]
}
a := sigmoid(z + l.Gradient)
//logloss := a * math.Log1p(y) + (1.-a)*math.Log1p(1-y)
db = a - y
var dw []float64
for j, ww := range l.Weight {
err := db * x[j]
dw = append(dw, err)
}
for i := range l.Weight {
l.Weight[i] -= l.LearningRate * dw[i]
}
l.Gradient -= l.LearningRate * db
}
*/
func (l *LogisticRegressionModel) Predict(x []float64) float64 {
z := 0.0
for i, w := range l.Weight {
z += w * x[i]
}
return sigmoid(z + l.Gradient)
}
// TODO: ta.linreg

View File

@ -2,6 +2,7 @@ package types
import (
"github.com/stretchr/testify/assert"
"gonum.org/v1/gonum/stat"
"testing"
)
@ -84,3 +85,62 @@ func TestSkew(t *testing.T) {
sk := Skew(&a, 4)
assert.InDelta(t, sk, 1.129338, 0.001)
}
func TestEntropy(t *testing.T) {
var a = Float64Slice{.2, .0, .6, .2}
e := stat.Entropy(a)
assert.InDelta(t, e, Entropy(&a, a.Length()), 0.0001)
}
func TestCrossEntropy(t *testing.T) {
var a = Float64Slice{.2, .0, .6, .2}
var b = Float64Slice{.3, .6, .0, .1}
e := stat.CrossEntropy(a, b)
assert.InDelta(t, e, CrossEntropy(&a, &b, a.Length()), 0.0001)
}
func TestSoftmax(t *testing.T) {
var a = Float64Slice{3.0, 1.0, 0.2}
out := Softmax(&a, a.Length())
r := Float64Slice{0.8360188027814407, 0.11314284146556013, 0.05083835575299916}
for i := 0; i < out.Length(); i++ {
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
}
}
func TestSigmoid(t *testing.T) {
a := Float64Slice{3.0, 1.0, 2.1}
out := Sigmoid(&a)
r := Float64Slice{0.9525741268224334, 0.7310585786300049, 0.8909031788043871}
for i := 0; i < out.Length(); i++ {
assert.InDelta(t, r.Index(i), out.Index(i), 0.001)
}
}
// from https://en.wikipedia.org/wiki/Logistic_regression
func TestLogisticRegression(t *testing.T) {
a := []Float64Slice{{0.5, 0.75, 1., 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3., 3.25, 3.5, 4., 4.25, 4.5, 4.75, 5., 5.5}}
b := Float64Slice{0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1}
var x []Series
x = append(x, &a[0])
model := LogisticRegression(x, &b, a[0].Length(), 8000, 0.0009)
inputs := []float64{1., 2., 2.7, 3., 4., 5.}
results := []bool{false, false, true, true, true, true}
for i, x := range inputs {
input := []float64{x}
pred := model.Predict(input)
assert.Equal(t, pred > 0.5, results[i])
}
}
func TestDot(t *testing.T) {
a := Float64Slice{7, 6, 5, 4, 3, 2, 1, 0}
b := Float64Slice{200., 201., 203., 204., 203., 199.}
out1 := Dot(&a, &b, 3)
assert.InDelta(t, out1, 611., 0.001)
out2 := Dot(&a, 3., 2)
assert.InDelta(t, out2, 3., 0.001)
out3 := Dot(3., &a, 2)
assert.InDelta(t, out2, out3, 0.001)
}

View File

@ -124,3 +124,19 @@ func (s *SeriesBase) Correlation(b Series, length int, method ...CorrFunc) float
func (s *SeriesBase) Rank(length int) SeriesExtend {
return Rank(s, length)
}
func (s *SeriesBase) Sigmoid() SeriesExtend {
return Sigmoid(s)
}
func (s *SeriesBase) Softmax(window int) SeriesExtend {
return Softmax(s, window)
}
func (s *SeriesBase) Entropy(window int) float64 {
return Entropy(s, window)
}
func (s *SeriesBase) CrossEntropy(b Series, window int) float64 {
return CrossEntropy(s, b, window)
}