Vehicle Utilization Classification

It can be useful to automatically classify driving behavior to determine whether the vehicle is used primarily as a commuting vehicle or consistently used during the day as a vehicle for hire

How often vehicles are used and when they are used

gdf_tz = gdf.sjoin(tz[['geometry','tz_name1st']])
gdf_tz['timestamp_local'] = gdf_tz.apply(lambda row: row['timestamp'].tz_convert(row['tz_name1st']), axis=1)
gdf_tz['day_of_week'] = gdf_tz['timestamp_local'].map(lambda x: x.dayofweek)
gdf_tz['hour_of_day'] = gdf_tz['timestamp_local'].map(lambda x: x.hour)
gb = pd.DataFrame(gdf_tz.loc[gdf_tz['haversine_dist_shift']>0].groupby(['day_of_week','hour_of_day'])['haversine_dist_shift'].sum())

gb = pd.DataFrame(gdf_tz.loc[gdf_tz['haversine_dist_shift']>0].groupby(['day_of_week','hour_of_day']).agg({'haversine_dist_shift':[np.sum,np.mean],'timestamp_diff_second':[np.sum,np.mean]}))

gb = gb.reset_index(drop = False)
gb.columns = [i[0] + '_' + i[1] if i[1] != '' else i[0] for i in gb.columns]

temp_lst = []
for day in [0,1,2,3,4,5,6]:
    temp = gb.loc[gb['day_of_week'] == day]
    all_time = pd.DataFrame(index = np.arange(0,24,1))
    all_time['odometer_diff_fill'] = 0
    all_time['time_fill'] = 0
    temp = temp.merge(all_time, left_on = 'hour_of_day', right_index = True, how = 'right')
    temp['filled_dist_sum'] = temp['haversine_dist_shift_sum']
    temp['filled_time_sum'] = temp['timestamp_diff_second_sum']
    temp['filled_dist_mean'] = temp['haversine_dist_shift_mean']
    temp['filled_time_mean'] = temp['timestamp_diff_second_mean']
    temp['day_of_week'] = day

    if day < 5:
        temp['weekday'] = True
    else:
        temp['weekday'] = False

    temp_lst.append(temp)

weekly_dst = pd.concat(temp_lst)

weekly_dst['filled_dist_sum'] = weekly_dst['haversine_dist_shift_sum'].combine_first(weekly_dst['odometer_diff_fill'])
weekly_dst['filled_time_sum'] = weekly_dst['timestamp_diff_second_sum'].combine_first(weekly_dst['time_fill'])

weekly_dst['filled_dist_mean'] = weekly_dst['haversine_dist_shift_mean'].combine_first(weekly_dst['odometer_diff_fill'])
weekly_dst['filled_time_mean'] = weekly_dst['timestamp_diff_second_mean'].combine_first(weekly_dst['time_fill'])

With this setup, the percent of weekday driving miles and hours that take place during commuting hours (7am to 9am and 4pm to 7pm). If that fraction is sufficiently high, the vehicle is probably primarily used for commuting.

morning_commute_dist_frac = weekly_dst.loc[weekly_dst['weekday']& (weekly_dst['hour_of_day'] <= 9) & (weekly_dst['hour_of_day'] >= 7)]['filled_dist_sum'].sum()/weekly_dst.loc[weekly_dst['weekday']]['filled_dist_sum'].sum()

evening_commute_dist_frac = weekly_dst.loc[weekly_dst['weekday'] & (weekly_dst['hour_of_day'] <= 7+12) & (weekly_dst['hour_of_day'] >= 4+12)]['filled_dist_sum'].sum()/weekly_dst.loc[weekly_dst['weekday']]['filled_dist_sum'].sum()

commuting_time_dist_frac = morning_commute_dist_frac + evening_commute_dist_frac

morning_commute_time_frac = weekly_dst.loc[weekly_dst['weekday']& (weekly_dst['hour_of_day'] <= 9) & (weekly_dst['hour_of_day'] >= 7)]['filled_time_sum'].sum()/weekly_dst.loc[weekly_dst['weekday']]['filled_time_sum'].sum()

evening_commute_time_frac = weekly_dst.loc[weekly_dst['weekday'] & (weekly_dst['hour_of_day'] <= 7+12) & (weekly_dst['hour_of_day'] >= 4+12)]['filled_time_sum'].sum()/weekly_dst.loc[weekly_dst['weekday']]['filled_time_sum'].sum()

commuting_time_time_frac = morning_commute_time_frac + evening_commute_time_frac

Using the above code, the averaged distribution of driving during a week can be plotted as a heatmap (above) or a bar graph (below)

plt.cla()
plt.clf()
plt.close()

fig, ax = plt.subplots(figsize=(20,6))

data = (weekly_dst[['day_of_week','hour_of_day','filled_dist_sum']].reset_index(drop = True)['filled_dist_sum'].values.reshape(7,24))
im = ax.imshow(data, cmap = 'coolwarm')

ax.set_xticks(range(0,24)[::3])
ax.set_xticklabels([f'{i}:00' for i in range(0,24)][::3], fontsize = 20)

ax.set_yticks(range(0,7))
ax.set_yticklabels(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'], fontsize = 20)

ax.set_xlabel('Local Time', fontsize = 20)
ax.set_ylabel('Day of Week', fontsize = 20)
plt.title('Weekly User Activity Heatmap', fontsize = 30)



# Add minorticks on the colorbar to make it easy to read the
# values off the colorbar.
cbar = fig.colorbar(im, ax=ax, extend='both')
cbar.set_label(label='Kilometers Driven',size=20)
cbar.minorticks_on()
cbar.ax.tick_params(labelsize=20)

plt.tight_layout()
plt.show()

And here is an example heatmap

Note that this type of heatmap (and the below bar graph) can easily be created for fleet-level statistics as well. If weekly_dst contains data for multiple vehicles, the data to be plotted can be obtained using a groupby as in:

weekly_dst = weekly_dst.groupby(['day_of_week','hour_of_day'])['filled_dist_sum'].sum()
weekly_dst = weekly_dst.reset_index(drop = False)
data = (weekly_dst[['day_of_week','hour_of_day','filled_dist_sum']].reset_index(drop = True)['filled_dist_sum'].values.reshape(7,24))

For 100 example vehicles, the activity heatmap looks like this:

vals = []
bottom = np.zeros(24)
colors = ['#3288bd','#d53e4f','#fc8d59','#fee08b','#ffffbf','#e6f598','#99d594']
labels = ['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday']

plt.cla()
plt.clf()
plt.close()

fig, ax = plt.subplots(figsize=(20,15))

ax.bar(np.arange(0,24*7,1), weekly_dst['filled_time'].values\
       /gb['timestamp_diff_second'].sum(), color = \
           24*['#25AA54'] + 24*['#DC504F'] + 24*['#0094CA'] + 24*['#38C8BD'] + \
              24*['#25AA54'] + 24*['#DC504F'] + 24*['#0094CA'])

ax.tick_params(axis="x", labelsize=30)
ax.tick_params(axis="y", labelsize=30)
ax.set_title(f'Driving Profile', fontsize=30)

ax.set_xticks(np.arange(0,24*7+1,12))
ax.set_xticklabels([0,'Monday',24,'Tuesday',48,'Wednesday',72,'Thursday',96,'Friday',120,'Saturday',144,'Sunday',168])
plt.xticks(rotation = 90)

ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))


ax.set_xlabel('Hours Since Monday at Midnight', fontsize=30)
ax.set_ylabel('Percentage of Total Driving Time', fontsize=30)
#plt.legend(fontsize = 40)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], fontsize = 30)

plt.tight_layout()

plt.show()

And here is an example bar graph of the distribution of driving kilometers averaged over each weekday

PreviousVehicle Depreciation NextMac Instructions

Last updated 2 years ago