freqtrade_origin/en/2024.7/freqai-feature-engineering/index.html

2247 lines
90 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="Freqtrade is a free and open source crypto trading bot written in Python, designed to support all major exchanges and be controlled via Telegram or builtin Web UI">
<link rel="canonical" href="https://www.freqtrade.io/en/2024.7/freqai-feature-engineering/">
<link rel="prev" href="../freqai-parameter-table/">
<link rel="next" href="../freqai-running/">
<link rel="icon" href="../images/logo.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.34">
<title>Feature engineering - Freqtrade</title>
<link rel="stylesheet" href="../assets/stylesheets/main.35f28582.min.css">
<link rel="stylesheet" href="../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../stylesheets/ft.extra.css">
<script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
<script id="__analytics">function __md_analytics(){function e(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],e("js",new Date),e("config","G-VH170LG9M5"),document.addEventListener("DOMContentLoaded",(function(){document.forms.search&&document.forms.search.query.addEventListener("blur",(function(){this.value&&e("event","search",{search_term:this.value})}));document$.subscribe((function(){var t=document.forms.feedback;if(void 0!==t)for(var a of t.querySelectorAll("[type=submit]"))a.addEventListener("click",(function(a){a.preventDefault();var n=document.location.pathname,d=this.getAttribute("data-md-value");e("event","feedback",{page:n,data:d}),t.firstElementChild.disabled=!0;var r=t.querySelector(".md-feedback__note [data-md-value='"+d+"']");r&&(r.hidden=!1)})),t.hidden=!1})),location$.subscribe((function(t){e("config","G-VH170LG9M5",{page_path:t.pathname})}))}));var t=document.createElement("script");t.async=!0,t.src="https://www.googletagmanager.com/gtag/js?id=G-VH170LG9M5",document.getElementById("__analytics").insertAdjacentElement("afterEnd",t)}</script>
<script>"undefined"!=typeof __md_analytics&&__md_analytics()</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="tear">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#feature-engineering" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<div data-md-color-scheme="default" data-md-component="outdated" hidden>
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href=".." title="Freqtrade" class="md-header__button md-logo" aria-label="Freqtrade" data-md-component="logo">
<img src="../images/logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Freqtrade
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Feature engineering
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="tear" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 6H7c-3.31 0-6 2.69-6 6s2.69 6 6 6h10c3.31 0 6-2.69 6-6s-2.69-6-6-6m0 10H7c-2.21 0-4-1.79-4-4s1.79-4 4-4h10c2.21 0 4 1.79 4 4s-1.79 4-4 4M7 9c-1.66 0-3 1.34-3 3s1.34 3 3 3 3-1.34 3-3-1.34-3-3-3"/></svg>
</label>
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="blue-grey" data-md-color-accent="tear" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h10a5 5 0 0 0 5-5 5 5 0 0 0-5-5m0 8a3 3 0 0 1-3-3 3 3 0 0 1 3-3 3 3 0 0 1 3 3 3 3 0 0 1-3 3"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/freqtrade/freqtrade" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
GitHub
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<!-- Main navigation -->
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href=".." title="Freqtrade" class="md-nav__button md-logo" aria-label="Freqtrade" data-md-component="logo">
<img src="../images/logo.png" alt="logo">
</a>
Freqtrade
</label>
<div class="md-nav__source">
<a href="https://github.com/freqtrade/freqtrade" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
GitHub
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href=".." class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../docker_quickstart/" class="md-nav__link">
<span class="md-ellipsis">
Quickstart with Docker
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Installation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Installation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../installation/" class="md-nav__link">
<span class="md-ellipsis">
Linux/MacOS/Raspberry
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../windows_installation/" class="md-nav__link">
<span class="md-ellipsis">
Windows
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../bot-basics/" class="md-nav__link">
<span class="md-ellipsis">
Freqtrade Basics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../configuration/" class="md-nav__link">
<span class="md-ellipsis">
Configuration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../strategy-customization/" class="md-nav__link">
<span class="md-ellipsis">
Strategy Customization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../strategy-callbacks/" class="md-nav__link">
<span class="md-ellipsis">
Strategy Callbacks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../stoploss/" class="md-nav__link">
<span class="md-ellipsis">
Stoploss
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../plugins/" class="md-nav__link">
<span class="md-ellipsis">
Plugins
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../bot-usage/" class="md-nav__link">
<span class="md-ellipsis">
Start the bot
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_11" >
<label class="md-nav__link" for="__nav_11" id="__nav_11_label" tabindex="0">
<span class="md-ellipsis">
Control the bot
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_11">
<span class="md-nav__icon md-icon"></span>
Control the bot
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../telegram-usage/" class="md-nav__link">
<span class="md-ellipsis">
Telegram
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../freq-ui/" class="md-nav__link">
<span class="md-ellipsis">
freqUI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../rest-api/" class="md-nav__link">
<span class="md-ellipsis">
REST API
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../webhook-config/" class="md-nav__link">
<span class="md-ellipsis">
Web Hook
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../data-download/" class="md-nav__link">
<span class="md-ellipsis">
Data Downloading
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../backtesting/" class="md-nav__link">
<span class="md-ellipsis">
Backtesting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../hyperopt/" class="md-nav__link">
<span class="md-ellipsis">
Hyperopt
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_15" checked>
<label class="md-nav__link" for="__nav_15" id="__nav_15_label" tabindex="0">
<span class="md-ellipsis">
FreqAI
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_15_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_15">
<span class="md-nav__icon md-icon"></span>
FreqAI
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../freqai/" class="md-nav__link">
<span class="md-ellipsis">
Introduction
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../freqai-configuration/" class="md-nav__link">
<span class="md-ellipsis">
Configuration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../freqai-parameter-table/" class="md-nav__link">
<span class="md-ellipsis">
Parameter table
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
Feature engineering
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Feature engineering
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#defining-the-features" class="md-nav__link">
<span class="md-ellipsis">
Defining the features
</span>
</a>
<nav class="md-nav" aria-label="Defining the features">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#gain-finer-control-over-feature_engineering_-functions-with-metadata" class="md-nav__link">
<span class="md-ellipsis">
Gain finer control over feature_engineering_* functions with metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#returning-additional-info-from-training" class="md-nav__link">
<span class="md-ellipsis">
Returning additional info from training
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#weighting-features-for-temporal-importance" class="md-nav__link">
<span class="md-ellipsis">
Weighting features for temporal importance
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#building-the-data-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Building the data pipeline
</span>
</a>
<nav class="md-nav" aria-label="Building the data pipeline">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#customizing-the-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Customizing the pipeline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#migrating-a-custom-ifreqaimodel-to-the-new-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Migrating a custom IFreqaiModel to the new Pipeline
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#outlier-detection" class="md-nav__link">
<span class="md-ellipsis">
Outlier detection
</span>
</a>
<nav class="md-nav" aria-label="Outlier detection">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#identifying-outliers-with-the-dissimilarity-index-di" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers with the Dissimilarity Index (DI)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#identifying-outliers-using-a-support-vector-machine-svm" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers using a Support Vector Machine (SVM)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#identifying-outliers-with-dbscan" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers with DBSCAN
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#data-dimensionality-reduction-with-principal-component-analysis" class="md-nav__link">
<span class="md-ellipsis">
Data dimensionality reduction with Principal Component Analysis
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../freqai-running/" class="md-nav__link">
<span class="md-ellipsis">
Running FreqAI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../freqai-reinforcement-learning/" class="md-nav__link">
<span class="md-ellipsis">
Reinforcement Learning
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../freqai-developers/" class="md-nav__link">
<span class="md-ellipsis">
Developer guide
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../leverage/" class="md-nav__link">
<span class="md-ellipsis">
Short / Leverage
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../utils/" class="md-nav__link">
<span class="md-ellipsis">
Utility Sub-commands
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../plotting/" class="md-nav__link">
<span class="md-ellipsis">
Plotting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../exchanges/" class="md-nav__link">
<span class="md-ellipsis">
Exchange-specific Notes
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_20" >
<label class="md-nav__link" for="__nav_20" id="__nav_20_label" tabindex="0">
<span class="md-ellipsis">
Data Analysis
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_20_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_20">
<span class="md-nav__icon md-icon"></span>
Data Analysis
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../data-analysis/" class="md-nav__link">
<span class="md-ellipsis">
Jupyter Notebooks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../strategy_analysis_example/" class="md-nav__link">
<span class="md-ellipsis">
Strategy analysis
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../advanced-backtesting/" class="md-nav__link">
<span class="md-ellipsis">
Backtest analysis
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_21" >
<label class="md-nav__link" for="__nav_21" id="__nav_21_label" tabindex="0">
<span class="md-ellipsis">
Advanced Topics
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_21_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_21">
<span class="md-nav__icon md-icon"></span>
Advanced Topics
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../advanced-setup/" class="md-nav__link">
<span class="md-ellipsis">
Advanced Post-installation Tasks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../trade-object/" class="md-nav__link">
<span class="md-ellipsis">
Trade Object
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../lookahead-analysis/" class="md-nav__link">
<span class="md-ellipsis">
Lookahead analysis
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../recursive-analysis/" class="md-nav__link">
<span class="md-ellipsis">
Recursive analysis
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../strategy-advanced/" class="md-nav__link">
<span class="md-ellipsis">
Advanced Strategy
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../advanced-hyperopt/" class="md-nav__link">
<span class="md-ellipsis">
Advanced Hyperopt
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../advanced-orderflow/" class="md-nav__link">
<span class="md-ellipsis">
Orderflow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../producer-consumer/" class="md-nav__link">
<span class="md-ellipsis">
Producer/Consumer mode
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../sql_cheatsheet/" class="md-nav__link">
<span class="md-ellipsis">
SQL Cheat-sheet
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../edge/" class="md-nav__link">
<span class="md-ellipsis">
Edge Positioning
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../faq/" class="md-nav__link">
<span class="md-ellipsis">
FAQ
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../strategy_migration/" class="md-nav__link">
<span class="md-ellipsis">
Strategy migration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../updating/" class="md-nav__link">
<span class="md-ellipsis">
Updating Freqtrade
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../deprecated/" class="md-nav__link">
<span class="md-ellipsis">
Deprecated Features
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../developer/" class="md-nav__link">
<span class="md-ellipsis">
Contributors Guide
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<!-- Table of contents -->
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#defining-the-features" class="md-nav__link">
<span class="md-ellipsis">
Defining the features
</span>
</a>
<nav class="md-nav" aria-label="Defining the features">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#gain-finer-control-over-feature_engineering_-functions-with-metadata" class="md-nav__link">
<span class="md-ellipsis">
Gain finer control over feature_engineering_* functions with metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#returning-additional-info-from-training" class="md-nav__link">
<span class="md-ellipsis">
Returning additional info from training
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#weighting-features-for-temporal-importance" class="md-nav__link">
<span class="md-ellipsis">
Weighting features for temporal importance
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#building-the-data-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Building the data pipeline
</span>
</a>
<nav class="md-nav" aria-label="Building the data pipeline">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#customizing-the-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Customizing the pipeline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#migrating-a-custom-ifreqaimodel-to-the-new-pipeline" class="md-nav__link">
<span class="md-ellipsis">
Migrating a custom IFreqaiModel to the new Pipeline
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#outlier-detection" class="md-nav__link">
<span class="md-ellipsis">
Outlier detection
</span>
</a>
<nav class="md-nav" aria-label="Outlier detection">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#identifying-outliers-with-the-dissimilarity-index-di" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers with the Dissimilarity Index (DI)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#identifying-outliers-using-a-support-vector-machine-svm" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers using a Support Vector Machine (SVM)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#identifying-outliers-with-dbscan" class="md-nav__link">
<span class="md-ellipsis">
Identifying outliers with DBSCAN
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#data-dimensionality-reduction-with-principal-component-analysis" class="md-nav__link">
<span class="md-ellipsis">
Data dimensionality reduction with Principal Component Analysis
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="feature-engineering">Feature engineering<a class="headerlink" href="#feature-engineering" title="Permanent link">&para;</a></h1>
<h2 id="defining-the-features">Defining the features<a class="headerlink" href="#defining-the-features" title="Permanent link">&para;</a></h2>
<p>Low level feature engineering is performed in the user strategy within a set of functions called <code>feature_engineering_*</code>. These function set the <code>base features</code> such as, <code>RSI</code>, <code>MFI</code>, <code>EMA</code>, <code>SMA</code>, time of day, volume, etc. The <code>base features</code> can be custom indicators or they can be imported from any technical-analysis library that you can find. FreqAI is equipped with a set of functions to simplify rapid large-scale feature engineering:</p>
<table>
<thead>
<tr>
<th>Function</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>feature_engineering_expand_all()</code></td>
<td>This optional function will automatically expand the defined features on the config defined <code>indicator_periods_candles</code>, <code>include_timeframes</code>, <code>include_shifted_candles</code>, and <code>include_corr_pairs</code>.</td>
</tr>
<tr>
<td><code>feature_engineering_expand_basic()</code></td>
<td>This optional function will automatically expand the defined features on the config defined <code>include_timeframes</code>, <code>include_shifted_candles</code>, and <code>include_corr_pairs</code>. Note: this function does <em>not</em> expand across <code>indicator_periods_candles</code>.</td>
</tr>
<tr>
<td><code>feature_engineering_standard()</code></td>
<td>This optional function will be called once with the dataframe of the base timeframe. This is the final function to be called, which means that the dataframe entering this function will contain all the features and columns from the base asset created by the other <code>feature_engineering_expand</code> functions. This function is a good place to do custom exotic feature extractions (e.g. tsfresh). This function is also a good place for any feature that should not be auto-expanded upon (e.g., day of the week).</td>
</tr>
<tr>
<td><code>set_freqai_targets()</code></td>
<td>Required function to set the targets for the model. All targets must be prepended with <code>&amp;</code> to be recognized by the FreqAI internals.</td>
</tr>
</tbody>
</table>
<p>Meanwhile, high level feature engineering is handled within <code>"feature_parameters":{}</code> in the FreqAI config. Within this file, it is possible to decide large scale feature expansions on top of the <code>base_features</code> such as "including correlated pairs" or "including informative timeframes" or even "including recent candles."</p>
<p>It is advisable to start from the template <code>feature_engineering_*</code> functions in the source provided example strategy (found in <code>templates/FreqaiExampleStrategy.py</code>) to ensure that the feature definitions are following the correct conventions. Here is an example of how to set the indicators and labels in the strategy:</p>
<div class="highlight"><pre><span></span><code> <span class="k">def</span> <span class="nf">feature_engineering_expand_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataframe</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">period</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> *Only functional with FreqAI enabled strategies*</span>
<span class="sd"> This function will automatically expand the defined features on the config defined</span>
<span class="sd"> `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and</span>
<span class="sd"> `include_corr_pairs`. In other words, a single feature defined in this function</span>
<span class="sd"> will automatically expand to a total of</span>
<span class="sd"> `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` *</span>
<span class="sd"> `include_corr_pairs` numbers of features added to the model.</span>
<span class="sd"> All features must be prepended with `%` to be recognized by FreqAI internals.</span>
<span class="sd"> Access metadata such as the current pair/timeframe/period with:</span>
<span class="sd"> `metadata[&quot;pair&quot;]` `metadata[&quot;tf&quot;]` `metadata[&quot;period&quot;]`</span>
<span class="sd"> :param df: strategy dataframe which will receive the features</span>
<span class="sd"> :param period: period of the indicator - usage example:</span>
<span class="sd"> :param metadata: metadata of current pair</span>
<span class="sd"> dataframe[&quot;%-ema-period&quot;] = ta.EMA(dataframe, timeperiod=period)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">si-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">RSI</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;%-mfi-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">MFI</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-a</span><span class="s2">dx-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">ADX</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-s</span><span class="s2">ma-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">SMA</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-e</span><span class="s2">ma-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">EMA</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">bollinger</span> <span class="o">=</span> <span class="n">qtpylib</span><span class="o">.</span><span class="n">bollinger_bands</span><span class="p">(</span>
<span class="n">qtpylib</span><span class="o">.</span><span class="n">typical_price</span><span class="p">(</span><span class="n">dataframe</span><span class="p">),</span> <span class="n">window</span><span class="o">=</span><span class="n">period</span><span class="p">,</span> <span class="n">stds</span><span class="o">=</span><span class="mf">2.2</span>
<span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_lowerband-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">bollinger</span><span class="p">[</span><span class="s2">&quot;lower&quot;</span><span class="p">]</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_middleband-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">bollinger</span><span class="p">[</span><span class="s2">&quot;mid&quot;</span><span class="p">]</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_upperband-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">bollinger</span><span class="p">[</span><span class="s2">&quot;upper&quot;</span><span class="p">]</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;%-bb_width-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_upperband-period&quot;</span><span class="p">]</span>
<span class="o">-</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_lowerband-period&quot;</span><span class="p">]</span>
<span class="p">)</span> <span class="o">/</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_middleband-period&quot;</span><span class="p">]</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-c</span><span class="s2">lose-bb_lower-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;close&quot;</span><span class="p">]</span> <span class="o">/</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;bb_lowerband-period&quot;</span><span class="p">]</span>
<span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">oc-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">ROC</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">elative_volume-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;volume&quot;</span><span class="p">]</span> <span class="o">/</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;volume&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">rolling</span><span class="p">(</span><span class="n">period</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">dataframe</span>
<span class="k">def</span> <span class="nf">feature_engineering_expand_basic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataframe</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> *Only functional with FreqAI enabled strategies*</span>
<span class="sd"> This function will automatically expand the defined features on the config defined</span>
<span class="sd"> `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`.</span>
<span class="sd"> In other words, a single feature defined in this function</span>
<span class="sd"> will automatically expand to a total of</span>
<span class="sd"> `include_timeframes` * `include_shifted_candles` * `include_corr_pairs`</span>
<span class="sd"> numbers of features added to the model.</span>
<span class="sd"> Features defined here will *not* be automatically duplicated on user defined</span>
<span class="sd"> `indicator_periods_candles`</span>
<span class="sd"> Access metadata such as the current pair/timeframe with:</span>
<span class="sd"> `metadata[&quot;pair&quot;]` `metadata[&quot;tf&quot;]`</span>
<span class="sd"> All features must be prepended with `%` to be recognized by FreqAI internals.</span>
<span class="sd"> :param df: strategy dataframe which will receive the features</span>
<span class="sd"> :param metadata: metadata of current pair</span>
<span class="sd"> dataframe[&quot;%-pct-change&quot;] = dataframe[&quot;close&quot;].pct_change()</span>
<span class="sd"> dataframe[&quot;%-ema-200&quot;] = ta.EMA(dataframe, timeperiod=200)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;%-pct-change&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;close&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">aw_volume&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;volume&quot;</span><span class="p">]</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">aw_price&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;close&quot;</span><span class="p">]</span>
<span class="k">return</span> <span class="n">dataframe</span>
<span class="k">def</span> <span class="nf">feature_engineering_standard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataframe</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> *Only functional with FreqAI enabled strategies*</span>
<span class="sd"> This optional function will be called once with the dataframe of the base timeframe.</span>
<span class="sd"> This is the final function to be called, which means that the dataframe entering this</span>
<span class="sd"> function will contain all the features and columns created by all other</span>
<span class="sd"> freqai_feature_engineering_* functions.</span>
<span class="sd"> This function is a good place to do custom exotic feature extractions (e.g. tsfresh).</span>
<span class="sd"> This function is a good place for any feature that should not be auto-expanded upon</span>
<span class="sd"> (e.g. day of the week).</span>
<span class="sd"> Access metadata such as the current pair with:</span>
<span class="sd"> `metadata[&quot;pair&quot;]`</span>
<span class="sd"> All features must be prepended with `%` to be recognized by FreqAI internals.</span>
<span class="sd"> :param df: strategy dataframe which will receive the features</span>
<span class="sd"> :param metadata: metadata of current pair</span>
<span class="sd"> usage example: dataframe[&quot;%-day_of_week&quot;] = (dataframe[&quot;date&quot;].dt.dayofweek + 1) / 7</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-d</span><span class="s2">ay_of_week&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">dayofweek</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">/</span> <span class="mi">7</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-ho</span><span class="s2">ur_of_day&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">hour</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">/</span> <span class="mi">25</span>
<span class="k">return</span> <span class="n">dataframe</span>
<span class="k">def</span> <span class="nf">set_freqai_targets</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataframe</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> *Only functional with FreqAI enabled strategies*</span>
<span class="sd"> Required function to set the targets for the model.</span>
<span class="sd"> All targets must be prepended with `&amp;` to be recognized by the FreqAI internals.</span>
<span class="sd"> Access metadata such as the current pair with:</span>
<span class="sd"> `metadata[&quot;pair&quot;]`</span>
<span class="sd"> :param df: strategy dataframe which will receive the targets</span>
<span class="sd"> :param metadata: metadata of current pair</span>
<span class="sd"> usage example: dataframe[&quot;&amp;-target&quot;] = dataframe[&quot;close&quot;].shift(-1) / dataframe[&quot;close&quot;]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;&amp;-s_close&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;close&quot;</span><span class="p">]</span>
<span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="o">-</span><span class="bp">self</span><span class="o">.</span><span class="n">freqai_info</span><span class="p">[</span><span class="s2">&quot;feature_parameters&quot;</span><span class="p">][</span><span class="s2">&quot;label_period_candles&quot;</span><span class="p">])</span>
<span class="o">.</span><span class="n">rolling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">freqai_info</span><span class="p">[</span><span class="s2">&quot;feature_parameters&quot;</span><span class="p">][</span><span class="s2">&quot;label_period_candles&quot;</span><span class="p">])</span>
<span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="o">/</span> <span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;close&quot;</span><span class="p">]</span>
<span class="o">-</span> <span class="mi">1</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">dataframe</span>
</code></pre></div>
<p>In the presented example, the user does not wish to pass the <code>bb_lowerband</code> as a feature to the model,
and has therefore not prepended it with <code>%</code>. The user does, however, wish to pass <code>bb_width</code> to the
model for training/prediction and has therefore prepended it with <code>%</code>.</p>
<p>After having defined the <code>base features</code>, the next step is to expand upon them using the powerful <code>feature_parameters</code> in the configuration file:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">//...</span>
<span class="w"> </span><span class="nt">&quot;feature_parameters&quot;</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;include_timeframes&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;5m&quot;</span><span class="p">,</span><span class="s2">&quot;15m&quot;</span><span class="p">,</span><span class="s2">&quot;4h&quot;</span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;include_corr_pairlist&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="s2">&quot;ETH/USD&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s2">&quot;LINK/USD&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s2">&quot;BNB/USD&quot;</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;label_period_candles&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">24</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;include_shifted_candles&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;indicator_periods_candles&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">]</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="c1">//...</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>The <code>include_timeframes</code> in the config above are the timeframes (<code>tf</code>) of each call to <code>feature_engineering_expand_*()</code> in the strategy. In the presented case, the user is asking for the <code>5m</code>, <code>15m</code>, and <code>4h</code> timeframes of the <code>rsi</code>, <code>mfi</code>, <code>roc</code>, and <code>bb_width</code> to be included in the feature set.</p>
<p>You can ask for each of the defined features to be included also for informative pairs using the <code>include_corr_pairlist</code>. This means that the feature set will include all the features from <code>feature_engineering_expand_*()</code> on all the <code>include_timeframes</code> for each of the correlated pairs defined in the config (<code>ETH/USD</code>, <code>LINK/USD</code>, and <code>BNB/USD</code> in the presented example).</p>
<p><code>include_shifted_candles</code> indicates the number of previous candles to include in the feature set. For example, <code>include_shifted_candles: 2</code> tells FreqAI to include the past 2 candles for each of the features in the feature set.</p>
<p>In total, the number of features the user of the presented example strategy has created is: length of <code>include_timeframes</code> * no. features in <code>feature_engineering_expand_*()</code> * length of <code>include_corr_pairlist</code> * no. <code>include_shifted_candles</code> * length of <code>indicator_periods_candles</code>
<span class="arithmatex">\(= 3 * 3 * 3 * 2 * 2 = 108\)</span>.</p>
<p>!!! note "Learn more about creative feature engineering"
Check out our <a href="https://emergentmethods.medium.com/freqai-from-price-to-prediction-6fadac18b665">medium article</a> geared toward helping users learn how to creatively engineer features.</p>
<h3 id="gain-finer-control-over-feature_engineering_-functions-with-metadata">Gain finer control over <code>feature_engineering_*</code> functions with <code>metadata</code><a class="headerlink" href="#gain-finer-control-over-feature_engineering_-functions-with-metadata" title="Permanent link">&para;</a></h3>
<p>All <code>feature_engineering_*</code> and <code>set_freqai_targets()</code> functions are passed a <code>metadata</code> dictionary which contains information about the <code>pair</code>, <code>tf</code> (timeframe), and <code>period</code> that FreqAI is automating for feature building. As such, a user can use <code>metadata</code> inside <code>feature_engineering_*</code> functions as criteria for blocking/reserving features for certain timeframes, periods, pairs etc.</p>
<div class="highlight"><pre><span></span><code><span class="k">def</span> <span class="nf">feature_engineering_expand_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataframe</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">period</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">if</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;tf&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;1h&quot;</span><span class="p">:</span>
<span class="n">dataframe</span><span class="p">[</span><span class="s2">&quot;</span><span class="si">%-r</span><span class="s2">oc-period&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ta</span><span class="o">.</span><span class="n">ROC</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">timeperiod</span><span class="o">=</span><span class="n">period</span><span class="p">)</span>
</code></pre></div>
<p>This will block <code>ta.ROC()</code> from being added to any timeframes other than <code>"1h"</code>.</p>
<h3 id="returning-additional-info-from-training">Returning additional info from training<a class="headerlink" href="#returning-additional-info-from-training" title="Permanent link">&para;</a></h3>
<p>Important metrics can be returned to the strategy at the end of each model training by assigning them to <code>dk.data['extra_returns_per_train']['my_new_value'] = XYZ</code> inside the custom prediction model class. </p>
<p>FreqAI takes the <code>my_new_value</code> assigned in this dictionary and expands it to fit the dataframe that is returned to the strategy. You can then use the returned metrics in your strategy through <code>dataframe['my_new_value']</code>. An example of how return values can be used in FreqAI are the <code>&amp;*_mean</code> and <code>&amp;*_std</code> values that are used to <a href="../freqai-configuration/#creating-a-dynamic-target-threshold">created a dynamic target threshold</a>.</p>
<p>Another example, where the user wants to use live metrics from the trade database, is shown below:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;extra_returns_per_train&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="nt">&quot;total_profit&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">4</span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>You need to set the standard dictionary in the config so that FreqAI can return proper dataframe shapes. These values will likely be overridden by the prediction model, but in the case where the model has yet to set them, or needs a default initial value, the pre-set values are what will be returned.</p>
<h3 id="weighting-features-for-temporal-importance">Weighting features for temporal importance<a class="headerlink" href="#weighting-features-for-temporal-importance" title="Permanent link">&para;</a></h3>
<p>FreqAI allows you to set a <code>weight_factor</code> to weight recent data more strongly than past data via an exponential function:</p>
<div class="arithmatex">\[ W_i = \exp(\frac{-i}{\alpha*n}) \]</div>
<p>where <span class="arithmatex">\(W_i\)</span> is the weight of data point <span class="arithmatex">\(i\)</span> in a total set of <span class="arithmatex">\(n\)</span> data points. Below is a figure showing the effect of different weight factors on the data points in a feature set.</p>
<p><img alt="weight-factor" src="../assets/freqai_weight-factor.jpg" /></p>
<h2 id="building-the-data-pipeline">Building the data pipeline<a class="headerlink" href="#building-the-data-pipeline" title="Permanent link">&para;</a></h2>
<p>By default, FreqAI builds a dynamic pipeline based on user configuration settings. The default settings are robust and designed to work with a variety of methods. These two steps are a <code>MinMaxScaler(-1,1)</code> and a <code>VarianceThreshold</code> which removes any column that has 0 variance. Users can activate other steps with more configuration parameters. For example if users add <code>use_SVM_to_remove_outliers: true</code> to the <code>freqai</code> config, then FreqAI will automatically add the <a href="#identifying-outliers-using-a-support-vector-machine-svm"><code>SVMOutlierExtractor</code></a> to the pipeline. Likewise, users can add <code>principal_component_analysis: true</code> to the <code>freqai</code> config to activate PCA. The <a href="#identifying-outliers-with-the-dissimilarity-index-di">DissimilarityIndex</a> is activated with <code>DI_threshold: 1</code>. Finally, noise can also be added to the data with <code>noise_standard_deviation: 0.1</code>. Finally, users can add <a href="#identifying-outliers-with-dbscan">DBSCAN</a> outlier removal with <code>use_DBSCAN_to_remove_outliers: true</code>.</p>
<div class="admonition note">
<p class="admonition-title">More information available</p>
<p>Please review the <a href="../freqai-parameter-table/">parameter table</a> for more information on these parameters.</p>
</div>
<h3 id="customizing-the-pipeline">Customizing the pipeline<a class="headerlink" href="#customizing-the-pipeline" title="Permanent link">&para;</a></h3>
<p>Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting <code>dk.feature_pipeline</code> to their desired <code>Pipeline</code> object inside their <code>IFreqaiModel</code> <code>train()</code> function, or if they prefer not to touch the <code>train()</code> function, they can override <code>define_data_pipeline</code>/<code>define_label_pipeline</code> functions in their <code>IFreqaiModel</code>:</p>
<div class="admonition note">
<p class="admonition-title">More information available</p>
<p>FreqAI uses the <a href="https://github.com/emergentmethods/datasieve"><code>DataSieve</code></a> pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, feature removal, feature name following. </p>
</div>
<div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">datasieve.transforms</span> <span class="kn">import</span> <span class="n">SKLearnWrapper</span><span class="p">,</span> <span class="n">DissimilarityIndex</span>
<span class="kn">from</span> <span class="nn">datasieve.pipeline</span> <span class="kn">import</span> <span class="n">Pipeline</span>
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="kn">import</span> <span class="n">QuantileTransformer</span><span class="p">,</span> <span class="n">StandardScaler</span>
<span class="kn">from</span> <span class="nn">freqai.base_models</span> <span class="kn">import</span> <span class="n">BaseRegressionModel</span>
<span class="k">class</span> <span class="nc">MyFreqaiModel</span><span class="p">(</span><span class="n">BaseRegressionModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Some cool custom model</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data_dictionary</span><span class="p">:</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">dk</span><span class="p">:</span> <span class="n">FreqaiDataKitchen</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> My custom fit function</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">cool_model</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">def</span> <span class="nf">define_data_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Pipeline</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> User defines their custom feature pipeline here (if they wish)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">feature_pipeline</span> <span class="o">=</span> <span class="n">Pipeline</span><span class="p">([</span>
<span class="p">(</span><span class="s1">&#39;qt&#39;</span><span class="p">,</span> <span class="n">SKLearnWrapper</span><span class="p">(</span><span class="n">QuantileTransformer</span><span class="p">(</span><span class="n">output_distribution</span><span class="o">=</span><span class="s1">&#39;normal&#39;</span><span class="p">))),</span>
<span class="p">(</span><span class="s1">&#39;di&#39;</span><span class="p">,</span> <span class="n">ds</span><span class="o">.</span><span class="n">DissimilarityIndex</span><span class="p">(</span><span class="n">di_threshold</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span>
<span class="p">])</span>
<span class="k">return</span> <span class="n">feature_pipeline</span>
<span class="k">def</span> <span class="nf">define_label_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Pipeline</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> User defines their custom label pipeline here (if they wish)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">label_pipeline</span> <span class="o">=</span> <span class="n">Pipeline</span><span class="p">([</span>
<span class="p">(</span><span class="s1">&#39;qt&#39;</span><span class="p">,</span> <span class="n">SKLearnWrapper</span><span class="p">(</span><span class="n">StandardScaler</span><span class="p">())),</span>
<span class="p">])</span>
<span class="k">return</span> <span class="n">label_pipeline</span>
</code></pre></div>
<p>Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. You can use <em>most</em> SKLearn transformation steps by wrapping them in the <code>SKLearnWrapper</code> class as shown above. In addition, you can use any of the transformations available in the <a href="https://github.com/emergentmethods/datasieve"><code>DataSieve</code> library</a>. </p>
<p>You can easily add your own transformation by creating a class that inherits from the datasieve <code>BaseTransform</code> and implementing your <code>fit()</code>, <code>transform()</code> and <code>inverse_transform()</code> methods:</p>
<div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">datasieve.transforms.base_transform</span> <span class="kn">import</span> <span class="n">BaseTransform</span>
<span class="c1"># import whatever else you need</span>
<span class="k">class</span> <span class="nc">MyCoolTransform</span><span class="p">(</span><span class="n">BaseTransform</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param1</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;param1&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">feature_list</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="c1"># do something with X, y, sample_weight, or/and feature_list</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">,</span> <span class="n">feature_list</span>
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">feature_list</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">outlier_check</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="c1"># do something with X, y, sample_weight, or/and feature_list</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">,</span> <span class="n">feature_list</span>
<span class="k">def</span> <span class="nf">inverse_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">feature_list</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="c1"># do/dont do something with X, y, sample_weight, or/and feature_list</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">,</span> <span class="n">feature_list</span>
</code></pre></div>
<div class="admonition note">
<p class="admonition-title">Hint</p>
<p>You can define this custom class in the same file as your <code>IFreqaiModel</code>.</p>
</div>
<h3 id="migrating-a-custom-ifreqaimodel-to-the-new-pipeline">Migrating a custom <code>IFreqaiModel</code> to the new Pipeline<a class="headerlink" href="#migrating-a-custom-ifreqaimodel-to-the-new-pipeline" title="Permanent link">&para;</a></h3>
<p>If you have created your own custom <code>IFreqaiModel</code> with a custom <code>train()</code>/<code>predict()</code> function, <em>and</em> you still rely on <code>data_cleaning_train/predict()</code>, then you will need to migrate to the new pipeline. If your model does <em>not</em> rely on <code>data_cleaning_train/predict()</code>, then you do not need to worry about this migration.</p>
<p>More details about the migration can be found <a href="../strategy_migration/#freqai---new-data-pipeline">here</a>.</p>
<h2 id="outlier-detection">Outlier detection<a class="headerlink" href="#outlier-detection" title="Permanent link">&para;</a></h2>
<p>Equity and crypto markets suffer from a high level of non-patterned noise in the form of outlier data points. FreqAI implements a variety of methods to identify such outliers and hence mitigate risk.</p>
<h3 id="identifying-outliers-with-the-dissimilarity-index-di">Identifying outliers with the Dissimilarity Index (DI)<a class="headerlink" href="#identifying-outliers-with-the-dissimilarity-index-di" title="Permanent link">&para;</a></h3>
<p>The Dissimilarity Index (DI) aims to quantify the uncertainty associated with each prediction made by the model. </p>
<p>You can tell FreqAI to remove outlier data points from the training/test data sets using the DI by including the following statement in the config:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;feature_parameters&quot;</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;DI_threshold&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>Which will add <code>DissimilarityIndex</code> step to your <code>feature_pipeline</code> and set the threshold to 1. The DI allows predictions which are outliers (not existent in the model feature space) to be thrown out due to low levels of certainty. To do so, FreqAI measures the distance between each training data point (feature vector), <span class="arithmatex">\(X_{a}\)</span>, and all other training data points:</p>
<div class="arithmatex">\[ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} \]</div>
<p>where <span class="arithmatex">\(d_{ab}\)</span> is the distance between the normalized points <span class="arithmatex">\(a\)</span> and <span class="arithmatex">\(b\)</span>, and <span class="arithmatex">\(p\)</span> is the number of features, i.e., the length of the vector <span class="arithmatex">\(X\)</span>. The characteristic distance, <span class="arithmatex">\(\overline{d}\)</span>, for a set of training data points is simply the mean of the average distances:</p>
<div class="arithmatex">\[ \overline{d} = \sum_{a=1}^n(\sum_{b=1}^n(d_{ab}/n)/n) \]</div>
<p><span class="arithmatex">\(\overline{d}\)</span> quantifies the spread of the training data, which is compared to the distance between a new prediction feature vectors, <span class="arithmatex">\(X_k\)</span> and all the training data:</p>
<div class="arithmatex">\[ d_k = \arg \min d_{k,i} \]</div>
<p>This enables the estimation of the Dissimilarity Index as:</p>
<div class="arithmatex">\[ DI_k = d_k/\overline{d} \]</div>
<p>You can tweak the DI through the <code>DI_threshold</code> to increase or decrease the extrapolation of the trained model. A higher <code>DI_threshold</code> means that the DI is more lenient and allows predictions further away from the training data to be used whilst a lower <code>DI_threshold</code> has the opposite effect and hence discards more predictions.</p>
<p>Below is a figure that describes the DI for a 3D data set.</p>
<p><img alt="DI" src="../assets/freqai_DI.jpg" /></p>
<h3 id="identifying-outliers-using-a-support-vector-machine-svm">Identifying outliers using a Support Vector Machine (SVM)<a class="headerlink" href="#identifying-outliers-using-a-support-vector-machine-svm" title="Permanent link">&para;</a></h3>
<p>You can tell FreqAI to remove outlier data points from the training/test data sets using a Support Vector Machine (SVM) by including the following statement in the config:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;feature_parameters&quot;</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;use_SVM_to_remove_outliers&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>Which will add <code>SVMOutlierExtractor</code> step to your <code>feature_pipeline</code>. The SVM will be trained on the training data and any data point that the SVM deems to be beyond the feature space will be removed.</p>
<p>You can elect to provide additional parameters for the SVM, such as <code>shuffle</code>, and <code>nu</code> via the <code>feature_parameters.svm_params</code> dictionary in the config.</p>
<p>The parameter <code>shuffle</code> is by default set to <code>False</code> to ensure consistent results. If it is set to <code>True</code>, running the SVM multiple times on the same data set might result in different outcomes due to <code>max_iter</code> being to low for the algorithm to reach the demanded <code>tol</code>. Increasing <code>max_iter</code> solves this issue but causes the procedure to take longer time.</p>
<p>The parameter <code>nu</code>, <em>very</em> broadly, is the amount of data points that should be considered outliers and should be between 0 and 1.</p>
<h3 id="identifying-outliers-with-dbscan">Identifying outliers with DBSCAN<a class="headerlink" href="#identifying-outliers-with-dbscan" title="Permanent link">&para;</a></h3>
<p>You can configure FreqAI to use DBSCAN to cluster and remove outliers from the training/test data set or incoming outliers from predictions, by activating <code>use_DBSCAN_to_remove_outliers</code> in the config:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;feature_parameters&quot;</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;use_DBSCAN_to_remove_outliers&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>Which will add the <code>DataSieveDBSCAN</code> step to your <code>feature_pipeline</code>. This is an unsupervised machine learning algorithm that clusters data without needing to know how many clusters there should be.</p>
<p>Given a number of data points <span class="arithmatex">\(N\)</span>, and a distance <span class="arithmatex">\(\varepsilon\)</span>, DBSCAN clusters the data set by setting all data points that have <span class="arithmatex">\(N-1\)</span> other data points within a distance of <span class="arithmatex">\(\varepsilon\)</span> as <em>core points</em>. A data point that is within a distance of <span class="arithmatex">\(\varepsilon\)</span> from a <em>core point</em> but that does not have <span class="arithmatex">\(N-1\)</span> other data points within a distance of <span class="arithmatex">\(\varepsilon\)</span> from itself is considered an <em>edge point</em>. A cluster is then the collection of <em>core points</em> and <em>edge points</em>. Data points that have no other data points at a distance <span class="arithmatex">\(&lt;\varepsilon\)</span> are considered outliers. The figure below shows a cluster with <span class="arithmatex">\(N = 3\)</span>.</p>
<p><img alt="dbscan" src="../assets/freqai_dbscan.jpg" /></p>
<p>FreqAI uses <code>sklearn.cluster.DBSCAN</code> (details are available on scikit-learn's webpage <a href="https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html">here</a> (external website)) with <code>min_samples</code> (<span class="arithmatex">\(N\)</span>) taken as &frac14; of the no. of time points (candles) in the feature set. <code>eps</code> (<span class="arithmatex">\(\varepsilon\)</span>) is computed automatically as the elbow point in the <em>k-distance graph</em> computed from the nearest neighbors in the pairwise distances of all data points in the feature set.</p>
<h3 id="data-dimensionality-reduction-with-principal-component-analysis">Data dimensionality reduction with Principal Component Analysis<a class="headerlink" href="#data-dimensionality-reduction-with-principal-component-analysis" title="Permanent link">&para;</a></h3>
<p>You can reduce the dimensionality of your features by activating the principal_component_analysis in the config:</p>
<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nt">&quot;freqai&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;feature_parameters&quot;</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;principal_component_analysis&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>This will perform PCA on the features and reduce their dimensionality so that the explained variance of the data set is &gt;= 0.999. Reducing data dimensionality makes training the model faster and hence allows for more up-to-date models.</p>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="Footer" >
<a href="../freqai-parameter-table/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Parameter table">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</div>
<div class="md-footer__title">
<span class="md-footer__direction">
Previous
</span>
<div class="md-ellipsis">
Parameter table
</div>
</div>
</a>
<a href="../freqai-running/" class="md-footer__link md-footer__link--next" aria-label="Next: Running FreqAI">
<div class="md-footer__title">
<span class="md-footer__direction">
Next
</span>
<div class="md-ellipsis">
Running FreqAI
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
<!-- Place this tag in your head or just before your close body tag. -->
<script async defer src="https://buttons.github.io/buttons.js"></script>
<script src="https://code.jquery.com/jquery-3.4.1.min.js"
integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "..", "features": ["content.code.annotate", "search.share", "content.code.copy", "navigation.top", "navigation.footer"], "search": "../assets/javascripts/workers/search.07f07601.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": {"alias": true, "provider": "mike"}}</script>
<script src="../assets/javascripts/bundle.56dfad97.min.js"></script>
<script src="../javascripts/config.js"></script>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>