|
MikeMills private msg quote post Address this user | |
This python script will generate csv files to simulate electricity usage for three homes along with validation streams. It also generates humidity, temperature and is_weekend import files. All are one hour interval streams.
This script generates the files used with the Forecasting tutorial video
import pandas as pd
import numpy as np
# Set random seed for reproducibility
np.random.seed(42)
# Define the time index in US/Central time zone
start = '2023-01-01 00:00'
end = '2023-02-07 23:00' # Extended to cover a 168-hour forecasting horizon
freq = 'H' # Hourly frequency
time_index = pd.date_range(start=start, end=end, freq=freq, tz='US/Central')
n = len(time_index) # Number of hours from 2023-01-01 to 2023-02-07
# Generate temperature data
hours = time_index.hour
temp = 5 + 5 * np.cos(2 * np.pi * (hours - 15) / 24) + np.random.normal(0, 1, n)
# Generate humidity data
humidity = 60 + 10 * np.sin(2 * np.pi * hours / 24) + np.random.normal(0, 2, n)
# Generate is_weekend data based on local US/Central date
is_weekend = (time_index.weekday >= 5).astype(int)
# Define static covariates (entered manually in GroveStreams UI, not saved to CSV)
houses = ['house1', 'house2', 'house3']
static_covariates = {
'house1': {'building_area': 100, 'num_occupants': 2},
'house2': {'building_area': 150, 'num_occupants': 4},
'house3': {'building_area': 200, 'num_occupants': 3}
}
# Generate electricity usage for each house
electricity_usage = {}
for house in houses:
building_area = static_covariates[house]['building_area']
num_occupants = static_covariates[house]['num_occupants']
# Define effects on electricity usage
base = 1
hourly_effect = np.where((hours >= 8) & (hours <= 22), 1, 0.5)
weekend_effect = is_weekend * 0.5
temp_effect = -0.1 * (temp - 5) # Usage increases as temperature decreases
area_effect = 0.01 * building_area
occupant_effect = 0.2 * num_occupants
noise = np.random.normal(0, 0.1, n)
usage = base + hourly_effect + weekend_effect + temp_effect + area_effect + occupant_effect + noise
usage = np.maximum(0, usage) # Ensure non-negative values
electricity_usage[house] = usage
# Split into training and validation periods
split_date = '2023-02-01 00:00'
train_mask = time_index < pd.Timestamp(split_date, tz='US/Central')
val_mask = time_index >= pd.Timestamp(split_date, tz='US/Central')
# Create dataframes for covariates (full period)
df_temp = pd.DataFrame({'time': time_index, 'temperature': temp})
df_humidity = pd.DataFrame({'time': time_index, 'humidity': humidity})
df_is_weekend = pd.DataFrame({'time': time_index, 'is_weekend': is_weekend})
# Create dataframes for electricity usage (train and validation)
df_usage_train = {}
df_usage_val = {}
for house in houses:
df_usage_train[house] = pd.DataFrame({'time': time_index[train_mask], 'electricity_usage': electricity_usage[house][train_mask]})
df_usage_val[house] = pd.DataFrame({'time': time_index[val_mask], 'electricity_usage': electricity_usage[house][val_mask]})
# Function to format the 'time' column and save to CSV
def save_formatted_csv(df, filename):
# Format 'time' as yyyy-MM-dd'T'HH:mm:ss without timezone suffix
df['time'] = df['time'].dt.strftime('%Y-%m-%dT%H:%M:%S')
df.to_csv(filename, index=False) # Save without index, keeping 'time' as first column
# Save covariates (full period)
save_formatted_csv(df_temp, 'temperature.csv')
save_formatted_csv(df_humidity, 'humidity.csv')
save_formatted_csv(df_is_weekend, 'is_weekend.csv')
# Save electricity usage for each house (train and validation)
for house in houses:
save_formatted_csv(df_usage_train[house], f'electricity_usage_{house}_train.csv')
save_formatted_csv(df_usage_val[house], f'electricity_usage_{house}_validation.csv')
# Confirm completion and verify data ranges
print("CSV files generated with 'time' column in yyyy-MM-dd'T'HH:mm:ss format (US/Central):")
print("- temperature.csv (full period)")
print("- humidity.csv (full period)")
print("- is_weekend.csv (full period)")
for house in houses:
print(f"- electricity_usage_{house}_train.csv (training period)")
print(f"- electricity_usage_{house}_validation.csv (validation period)")
print("nVerification: Data range in temperature.csv:")
print(f"Start: {df_temp['time'].iloc[0]}")
print(f"End: {df_temp['time'].iloc[-1]}")
print("nVerification: Training and validation periods for house1:")
print(f"Training start: {df_usage_train['house1']['time'].iloc[0]}")
print(f"Training end: {df_usage_train['house1']['time'].iloc[-1]}")
print(f"Validation start: {df_usage_val['house1']['time'].iloc[0]}")
print(f"Validation end: {df_usage_val['house1']['time'].iloc[-1]}")
|
16 days ago |
Post 1 IP
flag post |