import sys
import warnings

warnings.filterwarnings("ignore")

class no_print():
    
    def __enter__(self):
        sys.__stdout__ = sys.stdout
        sys.stdout = open('/dev/null', 'w')
        return self

    def __exit__(self, type, value, traceback):
        sys.stdout.close()
        sys.stdout = sys.__stdout__

import pandas as pd

df = pd.read_parquet('auto_farm_metrics.parquet')

df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df['watering_start'] = pd.to_datetime(df['watering'].apply(lambda x: x.split(' - ')[0]))
df['watering_end'] = pd.to_datetime(df['watering'].apply(lambda x: x.split(' - ')[1]))

df = df.drop(columns=['watering'])

df

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="darkgrid")

df = df.sort_values('timestamp')

train_indicies = len(df) * 0.8
df['train_set'] = df.index < train_indicies
df['test_set'] = df.index >= train_indicies

train = df[df['train_set']]['ds18b20_air_temp_0']
test = df[df['test_set']]['ds18b20_air_temp_0']

plt.figure(figsize=(15, 5))
sns.lineplot(data=df, x='timestamp', y='ds18b20_air_temp_0', hue='train_set')
plt.title('Temperature Data Set')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend([],[], frameon=False)
plt.show()

import pmdarima as pm

model = pm.auto_arima(
        train,
        start_p=1,
        start_q=1,
        max_p=10,
        max_q=10,
        stepwise=True,
        suppress_warnings=True,
        error_action='ignore',
        seasonal=False,
        maxiter=10
)

forecast = model.predict(n_periods=len(test))

plt.figure(figsize=(15, 5))
plt.plot(df[df['train_set']]['timestamp'], train, label='Train', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], test, label='Test', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], forecast, label='Forecast')
plt.title('Temperature Data Set with ARIMA Forecast')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend([],[], frameon=False)
plt.show()

import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster

lags_grid = np.arange(100, 1100, 100).tolist()

forecaster = ForecasterAutoreg(
    regressor=LinearRegression(),
    lags=100
)

error_metric = lambda y_true, y_pred, y_train: mean_absolute_error(y_true, y_pred)

# convert stdout to /dev/null to suppress output
# I dunno why I think this is a bug in skforecast
with no_print():
    fold_results = grid_search_forecaster(
        forecaster=forecaster,
        y=train,
        param_grid={},
        lags_grid=lags_grid,
        steps=(24 * 60) // 10,
        refit=False,
        metric=error_metric,
        initial_train_size=len(train) // 2,
        fixed_train_size=False,
        return_best=True,
        n_jobs='auto',
        verbose=False,
        show_progress=False
    )

forecast = forecaster.predict(steps=len(test))

lags = len(forecaster.lags)

print(f'Linear Regression: MAE: {mean_absolute_error(test, forecast):.2f}, Lags: {lags}')

plt.figure(figsize=(15, 5))
plt.plot(df[df['train_set']]['timestamp'], train, label='Train', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], test, label='Test', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], forecast, label='Forecast')
plt.title('Temperature Data Set with Linear Regression Forecast')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend([],[], frameon=False)
plt.show()

Linear Regression: MAE: 1.05, Lags: 600

def grid_fit_forecaster(train, forecaster, lags_grid, param_grid):
    error_metric = lambda y_true, y_pred, y_train: mean_absolute_error(y_true, y_pred)

    with no_print():
        fold_results = grid_search_forecaster(
            forecaster=forecaster,
            y=train,
            param_grid=param_grid,
            lags_grid=lags_grid,
            steps=(24 * 60) // 10,
            refit=False,
            metric=error_metric,
            initial_train_size=len(train) // 2,
            fixed_train_size=True,
            return_best=True,
            n_jobs='auto',
            verbose=False,
            show_progress=False
        )

from sklearn.tree import DecisionTreeRegressor

lags_grid = np.arange(100, 1100, 100).tolist()

forecaster = ForecasterAutoreg(
    regressor=DecisionTreeRegressor(),
    lags=100
)

grid_fit_forecaster(train, forecaster, lags_grid, {
    'max_depth': np.arange(1, 100, 10).tolist()
})

forecast = forecaster.predict(steps=len(test))

test_error = mean_absolute_error(test, forecast)

plt.figure(figsize=(15, 5))
plt.plot(df[df['train_set']]['timestamp'], train, label='Train', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], test, label='Test', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], forecast, label='Forecast')
plt.title(f'Temperature Data Set with Decision Tree Forecast | MAE: {test_error:.2f}')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend([],[], frameon=False)
plt.show()

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import ElasticNet
from xgboost import XGBRegressor

lags_grid = np.arange(100, 1100, 100).tolist()

net_forecaster = ForecasterAutoreg(regressor=ElasticNet(), lags=100)
knn_forecaster = ForecasterAutoreg(regressor=KNeighborsRegressor(n_jobs=-1), lags=100)
xgb_forecaster = ForecasterAutoreg(
    regressor=XGBRegressor(
        n_jobs=-1,
        max_depth=20,
        n_estimators=200,
        enable_categorical=True
    ),
    lags=300
)

xgb_forecaster.fit(y=train)

grid_fit_forecaster(train, net_forecaster, lags_grid, {
    'alpha': np.arange(0.1, 1.0, 0.1).tolist()
})

grid_fit_forecaster(train, knn_forecaster, lags_grid, {
    'n_neighbors': np.arange(1, 50, 10).tolist()
})

net_forecast = net_forecaster.predict(steps=len(test))
knn_forecast = knn_forecaster.predict(steps=len(test))
xgb_forecast = xgb_forecaster.predict(steps=len(test))

net_test_error = mean_absolute_error(test, net_forecast)
knn_test_error = mean_absolute_error(test, knn_forecast)
xgb_test_error = mean_absolute_error(test, xgb_forecast)

print(f'Elastic Net MAE: {net_test_error:.2f}')
print(f'K-Nearest Neighbors MAE: {knn_test_error:.2f}')
print(f'XGBoost MAE: {xgb_test_error:.2f}')

plt.figure(figsize=(15, 5))
plt.plot(df[df['train_set']]['timestamp'], train, label='Train', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], test, label='Test', linewidth=0.7, alpha=0.5)
plt.plot(df[df['test_set']]['timestamp'], net_forecast, label=f'Elastic Net')
plt.plot(df[df['test_set']]['timestamp'], knn_forecast, label=f'K-Nearest Neighbors')
plt.plot(df[df['test_set']]['timestamp'], xgb_forecast, label=f'XGBoost')
plt.title('Temperature Data Set Forecast Comparison')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend()
plt.show()

Elastic Net MAE: 1.00
K-Nearest Neighbors MAE: 1.24
XGBoost MAE: 1.04

len(knn_forecaster.lags), len(xgb_forecaster.lags), len(net_forecaster.lags)

(900, 300, 900)

from prophet import Prophet

pdf = df[['timestamp', 'ds18b20_air_temp_0']].copy()
pdf = pdf.rename(columns={'timestamp': 'ds', 'ds18b20_air_temp_0': 'y'})

train_indicies = len(pdf) * 0.8
pdf['train_set'] = pdf.index < train_indicies
pdf['test_set'] = pdf.index >= train_indicies
train = pdf[pdf['train_set']]
test = pdf[pdf['test_set']]

model = Prophet()
model.fit(train)

forecast = model.predict(test)

test_error = mean_absolute_error(test['y'], forecast['yhat'])

print(f'Prophet MAE: {test_error:.2f}')

plt.figure(figsize=(15, 5))
sns.lineplot(data=pdf, x='ds', y='y', hue='train_set', linewidth=0.7, alpha=0.5)
plt.plot(test['ds'], forecast['yhat'], label='Forecast')
plt.fill_between(test['ds'], forecast['yhat_upper'], forecast['yhat_lower'], color='gray', alpha=0.2)
plt.fill_between(test['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='gray', alpha=0.2)
plt.title('Temperature Data Set with Prophet Forecast')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend([],[], frameon=False)
plt.show()

prophet    INFO  Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
cmdstanpy  DEBUG input tempfile: /tmp/tmpjwblsoab/da_seem9.json
cmdstanpy  DEBUG input tempfile: /tmp/tmpjwblsoab/460g0jfo.json
cmdstanpy  DEBUG idx 0
cmdstanpy  DEBUG running CmdStan, num_threads: None
cmdstanpy  DEBUG CmdStan args: ['/home/jack/.local/share/virtualenvs/datadev-5ox7fytP/lib/python3.10/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38846', 'data', 'file=/tmp/tmpjwblsoab/da_seem9.json', 'init=/tmp/tmpjwblsoab/460g0jfo.json', 'output', 'file=/tmp/tmpjwblsoab/prophet_modelphz5o616/prophet_model-20240905101237.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
10:12:37 - cmdstanpy - INFO - Chain [1] start processing
cmdstanpy  INFO  Chain [1] start processing
10:12:38 - cmdstanpy - INFO - Chain [1] done processing
cmdstanpy  INFO  Chain [1] done processing

Prophet MAE: 1.00

import torch as t
from torch.utils.data import DataLoader, TensorDataset

timesteps = lambda hours: (hours * 60) // 10

def create_dataset(data, lags, future):
    """
    `lags` describes the number of timesteps to look back. This is our `window`
    
    `future` describes the number of timesteps to look forward.
    This is how far we want to predict into the future.
    """
    x, y = [], []
    
    for i in range(len(data) - lags - future):
        x.append(data[i:i+lags])
        y.append(data[i+lags:i+lags+future])
    
    return t.tensor(x).float().unsqueeze(-1), t.tensor(y).float()


ldf = df[['timestamp', 'ds18b20_air_temp_0']].copy()
train_indicies = len(ldf) * 0.8
ldf['train_set'] = ldf.index < train_indicies
ldf['test_set'] = ldf.index >= train_indicies
train = ldf[ldf['train_set']]
test = ldf[ldf['test_set']]

x_train, y_train = create_dataset(train['ds18b20_air_temp_0'].values, timesteps(48), timesteps(24))
x_test, y_test = create_dataset(test['ds18b20_air_temp_0'].values, timesteps(48), timesteps(24))

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=32, shuffle=False)

torch.Size([2784, 288, 1]) torch.Size([2784, 144]) torch.Size([372, 288, 1]) torch.Size([372, 144])

from torch import nn
from torch.optim import Adam
from tqdm import tqdm
import time

class AutoFarmModel(nn.Module):
    
    def __init__(self, hidden_size=50, num_layers=1):
        super(AutoFarmModel, self).__init__()
        
        # our model is a simple LSTM with a linear layer on top. We could utilize
        # just the LSTM, but the linear layer helps to reduce the output to a single value.
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

    def validate_step(self, test_loader):
        loss_metric = nn.L1Loss()
        
        loss = 0
        count = 0
        
        self.eval()

        with t.no_grad():
            for x, y in test_loader:
                y_pred = self.forward(x)
                y_pred = y_pred[:, -y.shape[1]:]
                loss += loss_metric(y_pred, y.unsqueeze(-1))
                count += 1
        
        return loss / count

    def train_step(self, train_loader, epochs=10, learning_rate=0.001):
        loss_metric = nn.L1Loss()
        optimizer = Adam(self.parameters(), lr=learning_rate)
        
        history = []
        
        progress = tqdm(total=epochs, desc='Training', position=0, leave=True)
        
        self.train()
        
        with t.autograd.set_detect_anomaly(False):
            for epoch in range(epochs):
                epoch_start = time.time()

                for x, y in train_loader:
                    
                    optimizer.zero_grad()
                    y_pred = self.forward(x)
                    
                    # Again, we feed the entire 48 hours into the model.
                    # But, we don't get only 24 hours back. We get the entire sequence!
                    # So we need to trim the last 24 hours from the prediction.
                    # in order to test the loss.
                    y_pred = y_pred[:, -y.shape[1]:]
                    
                    loss = loss_metric(y_pred, y.unsqueeze(-1))
                    loss.backward()
                    optimizer.step()
                
                epoch_time = time.time() - epoch_start
                history.append({
                    'epoch': epoch,
                    'time': epoch_time,
                    'loss': loss.item()
                })

                progress.update(1)
                progress.set_postfix(epoch=f'{epoch}/{epochs}', time=epoch_time, loss=f'{loss:.6f}')

            progress.close()
        
        return history

model = AutoFarmModel(hidden_size=100, num_layers=1)

history = model.train_step(train_loader, epochs=30, learning_rate=0.001)

lstm_test_error = model.validate_step(test_loader)

train_pred = model(x_train)[:, -1, :].detach().numpy()
test_pred = model(x_test)[:, -1, :].detach().numpy()

history = pd.DataFrame(history)
history['smoothed_loss'] = history['loss'].rolling(10, min_periods=1).mean()

plt.figure(figsize=(15, 2))
plt.plot(history['epoch'], history['loss'], label='Loss')
plt.plot(history['epoch'], history['smoothed_loss'], label='Smoothed Loss')
plt.title(f'Loss | Test MAE: {lstm_test_error:.2f}')
plt.xlabel('Epoch')
plt.ylabel('MAE Loss')
plt.legend()
plt.show()

plt.figure(figsize=(15, 5))
plt.plot(train['timestamp'], train['ds18b20_air_temp_0'], label='Train', linewidth=0.7, alpha=0.5)
plt.plot(test['timestamp'], test['ds18b20_air_temp_0'], label='Test', linewidth=0.7, alpha=0.5)
plt.plot(train['timestamp'][-len(train_pred[:, -1]):], train_pred[:, -1], label='Train Prediction')
plt.plot(test['timestamp'][-len(test_pred[:, -1]):], test_pred[:, -1], label='Test Prediction')
plt.title('Temperature Data Set with LSTM Forecast')
plt.xlabel('Time')
plt.ylabel('Temperature (C)')
plt.legend()
plt.show()

Training: 100%|██████████| 30/30 [01:45<00:00,  3.50s/it, epoch=29/30, loss=0.470878, time=3.5]

	timestamp	dht11_air_temp	dht11_air_humid	ds18b20_air_temp_0	ds18b20_air_temp_1	soil_moisture_0	soil_moisture_1	soil_moisture_2	soil_moisture_3	avg_soil_moisture	brightness	watering_start	watering_end
0	2020-10-10 07:00:00	32.000000	63.754826	27.704363	27.293797	417.498069	463.480695	481.974903	326.843629	422.449324	0.000000	2020-10-10 06:57:00	2020-10-11 23:53:00
1	2020-10-10 07:10:00	32.000000	63.632296	27.633689	27.578280	434.618677	475.237354	453.719844	322.889105	421.616245	0.000000	2020-10-10 06:57:00	2020-10-11 23:53:00
2	2020-10-10 07:20:00	32.000000	63.215116	27.556795	27.496446	438.647287	475.158915	450.346899	322.496124	421.662306	0.000000	2020-10-10 06:57:00	2020-10-11 23:53:00
3	2020-10-10 07:30:00	32.000000	63.022814	27.440209	27.397051	441.794677	475.442966	451.977186	323.192015	423.101711	0.000000	2020-10-10 06:57:00	2020-10-11 23:53:00
4	2020-10-10 07:40:00	32.000000	63.118447	27.238981	27.232012	445.568932	476.543689	454.366990	324.236893	425.179126	0.000000	2020-10-10 06:57:00	2020-10-11 23:53:00
...	...	...	...	...	...	...	...	...	...	...	...	...	...
4015	2020-11-08 01:10:00	32.274662	67.290135	27.719818	27.958708	487.388781	463.040619	412.077369	322.647969	421.288685	124.100862	2020-11-04 20:34:00	2020-11-08 03:33:00
4016	2020-11-08 01:20:00	32.013436	67.911708	27.760113	28.013246	488.570058	464.155470	412.541267	322.827255	422.023512	123.418997	2020-11-04 20:34:00	2020-11-08 03:33:00
4017	2020-11-08 01:30:00	32.000000	68.000000	27.834587	28.057213	489.788462	465.150000	413.225000	323.507692	422.917788	122.864865	2020-11-04 20:34:00	2020-11-08 03:33:00
4018	2020-11-08 01:40:00	32.000000	68.000000	27.856563	28.067134	490.801942	465.866019	413.541748	323.900971	423.527670	122.810977	2020-11-04 20:34:00	2020-11-08 03:33:00
4019	2020-11-08 01:50:00	32.000000	68.000000	27.861185	28.077753	491.806818	466.653409	414.045455	324.261364	424.191761	123.305455	2020-11-04 20:34:00	2020-11-08 03:33:00

AutoFarm - Time Series of Temperature and Humidity¶

Arima Model¶

Conclusion¶