Code
import numpy as np
import pandas as pd
import plotly.express as px
import statsmodels.formula.api as smf
from itertools import product
# import xlwings as xwIn the first and second lecture block for “Finanz und Versicherungstechnik” (Finance & Insurance), we learnt about price elasticity. To solidify our understanding about it, the Professor provided us a dataset, from which we need to calculate the price elasticity and visualize our findings.
The goals are:
-> You can directly check my findings by clicking Section 10.
For theory about price elasticity, check this wikipedia page.
import numpy as np
import pandas as pd
import plotly.express as px
import statsmodels.formula.api as smf
from itertools import product
# import xlwings as xwraw_data = pd.read_csv("/Users/gunardiali/Documents/Master_Data_Science/Semester_2/Finanz-_&_Versicherungstechnik/Preiselastizität.csv", sep=";")
raw_data.shape(35, 13)
Seems like there are lots of columns. Let’s visualize the transposed version to show all 13 columns as rows:
raw_data.T.iloc[:,0:5]| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| date | mag-06 | giu-06 | lug-06 | ago-06 | set-06 |
| c.class.average.price | 38917 | 38917 | 38917 | 38917 | 38917 |
| unit.c | 4329.0 | 4250.0 | 5285.0 | 4652.0 | 4238.0 |
| E.class.average.price | 61420 | 61420 | 61420 | 61420 | 61420 |
| unit.e | 4070.0 | 4213.0 | 4479.0 | 3955.0 | 4112.0 |
| s.class.average.price | 101100 | 101100 | 101100 | 101100 | 101100 |
| unit.s | 2526.0 | 2407.0 | 2233.0 | 2427.0 | 2357.0 |
| cl.class.average.price | 132050 | 132050 | 132050 | 132050 | 132050 |
| unit.cl | 46.0 | 63.0 | 52.0 | 54.0 | 41.0 |
| sl.class.average.price | 134225 | 134225 | 134225 | 134225 | 134225 |
| unit.sl | 1056.0 | 866.0 | 768.0 | 713.0 | 557.0 |
| m.class.average.price | 44900 | 44900 | 44900 | 44900 | 44900 |
| unit.m | 2278.0 | 2601.0 | 2446.0 | 2312.0 | 2706.0 |
Let’s clean up month and year:
modified_data = pd.DataFrame()
modified_data.loc[:,"month"] = raw_data.loc[:,"date"].str[:3]
modified_data.loc[:,"year"] = "20" + raw_data.loc[:,"date"].str[-2:]
modified_data["year"] = modified_data.loc[:,"year"].astype('int32')
modified_data.T| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| month | mag | giu | lug | ago | set | ott | nov | dic | gen | feb | ... | giu | lug | ago | set | ott | nov | dic | gen | feb | mar |
| year | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2007 | 2007 | ... | 2008 | 2008 | 2008 | 2008 | 2008 | 2008 | 2008 | 2009 | 2009 | 2009 |
2 rows × 35 columns
Replace italian month to integer for simplicity:
replace_dict = {"gen":1, "feb":2, "mar":3, "apr":4, "mag":5, "giu":6, "lug":7, "ago":8, "set":9, "ott":10, "nov":11, "dic":12}
modified_data.replace(replace_dict, inplace=True)
modified_data.T| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| month | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 1 | 2 | ... | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 1 | 2 | 3 |
| year | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2006 | 2007 | 2007 | ... | 2008 | 2008 | 2008 | 2008 | 2008 | 2008 | 2008 | 2009 | 2009 | 2009 |
2 rows × 35 columns
Transform into standard datetime:
modified_data.loc[:,"date"] = pd.to_datetime(modified_data[["year", "month"]].assign(Day=1))
modified_data.drop(["month", "year"], axis=1, inplace=True)
modified_data.head(5)| date | |
|---|---|
| 0 | 2006-05-01 |
| 1 | 2006-06-01 |
| 2 | 2006-07-01 |
| 3 | 2006-08-01 |
| 4 | 2006-09-01 |
Transform table using class:
cleaned_data = pd.concat([modified_data, raw_data.drop(["date"], axis=1)], axis=1)
cleaned_data.set_index("date", inplace=True)
class_list = ["c", "e", "s", "cl", "sl", "m"]
iterables = [class_list, ["price", "unit_sold"]]
cleaned_data.columns = pd.MultiIndex.from_product(iterables, names=["class", "unit"])
cleaned_data.head(5)
#xw.view(cleaned_data)| class | c | e | s | cl | sl | m | ||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unit | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold |
| date | ||||||||||||
| 2006-05-01 | 38917 | 4329.0 | 61420 | 4070.0 | 101100 | 2526.0 | 132050 | 46.0 | 134225 | 1056.0 | 44900 | 2278.0 |
| 2006-06-01 | 38917 | 4250.0 | 61420 | 4213.0 | 101100 | 2407.0 | 132050 | 63.0 | 134225 | 866.0 | 44900 | 2601.0 |
| 2006-07-01 | 38917 | 5285.0 | 61420 | 4479.0 | 101100 | 2233.0 | 132050 | 52.0 | 134225 | 768.0 | 44900 | 2446.0 |
| 2006-08-01 | 38917 | 4652.0 | 61420 | 3955.0 | 101100 | 2427.0 | 132050 | 54.0 | 134225 | 713.0 | 44900 | 2312.0 |
| 2006-09-01 | 38917 | 4238.0 | 61420 | 4112.0 | 101100 | 2357.0 | 132050 | 41.0 | 134225 | 557.0 | 44900 | 2706.0 |
Apply new column:
iterables = [class_list, ["price", "unit_sold"]]
cleaned_data.columns = pd.MultiIndex.from_product(iterables, names=["class", "unit"])
cleaned_data.head()| class | c | e | s | cl | sl | m | ||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unit | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold | price | unit_sold |
| date | ||||||||||||
| 2006-05-01 | 38917 | 4329.0 | 61420 | 4070.0 | 101100 | 2526.0 | 132050 | 46.0 | 134225 | 1056.0 | 44900 | 2278.0 |
| 2006-06-01 | 38917 | 4250.0 | 61420 | 4213.0 | 101100 | 2407.0 | 132050 | 63.0 | 134225 | 866.0 | 44900 | 2601.0 |
| 2006-07-01 | 38917 | 5285.0 | 61420 | 4479.0 | 101100 | 2233.0 | 132050 | 52.0 | 134225 | 768.0 | 44900 | 2446.0 |
| 2006-08-01 | 38917 | 4652.0 | 61420 | 3955.0 | 101100 | 2427.0 | 132050 | 54.0 | 134225 | 713.0 | 44900 | 2312.0 |
| 2006-09-01 | 38917 | 4238.0 | 61420 | 4112.0 | 101100 | 2357.0 | 132050 | 41.0 | 134225 | 557.0 | 44900 | 2706.0 |
Remove multilevel column:
cleaned_data = cleaned_data.stack(level=0).reset_index("class")
cleaned_data.head()| unit | class | price | unit_sold |
|---|---|---|---|
| date | |||
| 2006-05-01 | c | 38917 | 4329.0 |
| 2006-05-01 | cl | 132050 | 46.0 |
| 2006-05-01 | e | 61420 | 4070.0 |
| 2006-05-01 | m | 44900 | 2278.0 |
| 2006-05-01 | s | 101100 | 2526.0 |
Visualize price and unit sold for different class:
fig = px.scatter(cleaned_data, y="price", x="unit_sold", color="class", log_x=True, trendline="ols", trendline_options=dict(log_y=True))
fig.show()Extract trendline coefficients:
trendline_params = []
for i in (class_list):
current_class = cleaned_data.loc[(cleaned_data['class'] == i)]
model = smf.ols(formula="price ~ unit_sold", data=current_class).fit()
intercept = model.params.Intercept
slope = model.params.unit_sold
trendline_params.append((intercept, slope))
trendlines = pd.DataFrame(trendline_params, columns=["intercept", "slope"])
trendlines.loc[:, "class"] = class_list
trendlines.set_index("class", inplace=True)
trendlines| intercept | slope | |
|---|---|---|
| class | ||
| c | 38676.549355 | -0.221809 |
| e | 64882.396937 | -0.570804 |
| s | 147041.433666 | -14.185744 |
| cl | 152113.548976 | -20.676529 |
| sl | 137453.325316 | -2.907888 |
| m | 58356.218353 | -1.915135 |
Visualise trendline for S class:
s_class = cleaned_data.loc[(cleaned_data['class'] == "s")]
fig = px.scatter(s_class, y="price", x="unit_sold", log_x=True, trendline="ols", trendline_options=dict(log_y=True), title="Price vs. Unit Sold for S Class")
fig.show()Get intercept and slope:
trendlines.loc["s"]intercept 147041.433666
slope -14.185744
Name: s, dtype: float64
Create elasticity function:
def calculate_elasticity(car_class:str, x_axis:int=0, x_input:int=0, show_vis=True, return_elasticity=False):
axis_list = ["price", "unit_sold"]
x = axis_list[x_axis]
del axis_list[x_axis]
y = axis_list[0]
current_class = cleaned_data.loc[(cleaned_data['class'] == car_class)]
formula_str = "{} ~ {}".format(y, x)
model = smf.ols(formula=formula_str, data=current_class).fit()
if show_vis:
fig = px.scatter(current_class, y=x, x=y, trendline="ols", title="Price vs. Unit Sold for {} Class".format(car_class))
fig.show()
intercept = model.params.Intercept
slope = model.params[x]
elasticity = slope * x_input / ((slope * x_input) + intercept)
if not return_elasticity:
print("Intercept, Slope (transposed): {}, {}".format(intercept, slope))
print("Elasticity at {} {} = {}".format(x, x_input, elasticity))
return
else:
return elasticityGet elasticity for S class at price 125k:
calculate_elasticity("s", 0, 125000)Intercept, Slope (transposed): 6553.746326543564, -0.03885217994413569
Elasticity at price 125000 = -2.861450798110559
Get elasticity for s class at price 105k:
calculate_elasticity("s", 0, 105000, False)Intercept, Slope (transposed): 6553.746326543564, -0.03885217994413569
Elasticity at price 105000 = -1.6487623127148616
The elasticity for S class at 105k and 125k are different, because they represent local elasticity for specific price. Let’s calculate the global elasticity for S class. To calculate it, the mean or median value can be used. In this example, the mean value is used.
Calculate mean and median price for each class:
mean_median_df = pd.DataFrame(index=class_list, columns=["mean_price", "median_price"])
for i in class_list:
current_class_df = cleaned_data.loc[(cleaned_data['class'] == i)].dropna(subset=["class", "price", "unit_sold"])
current_class_df["unit_sold"] = current_class_df["unit_sold"].astype(int)
current_class_df.loc[:, "revenue"] = current_class_df["price"] * current_class_df["unit_sold"]
current_total_unitsold = current_class_df.loc[:, "unit_sold"].sum()
current_total_revenue = current_class_df.loc[:, "revenue"].sum()
mean_price = int(current_total_revenue/current_total_unitsold)
current_class_df["price"] = current_class_df.apply(lambda s: [s['price']] * s['unit_sold'], axis=1)
current_class_df = current_class_df.explode('price')
current_class_df["price"] = current_class_df["price"].astype(int)
median_price = int(current_class_df.price.median())
mean_median_df.loc[i, ["mean_price", "median_price"]] = [mean_price, median_price]
mean_median_df| mean_price | median_price | |
|---|---|---|
| c | 37481 | 37256 |
| e | 62618 | 62818 |
| s | 117550 | 124750 |
| cl | 146585 | 132050 |
| sl | 135658 | 136175 |
| m | 53066 | 56040 |
Calculate global elasticity using mean price:
for i in mean_median_df.index:
mean_median_df.loc[i, "elasticity"] = calculate_elasticity(i, 0, mean_median_df.loc[i, "mean_price"], show_vis=False, return_elasticity=True)
fig = px.scatter(cleaned_data, y="price", x="unit_sold", color="class", log_x=True, trendline="ols", trendline_options=dict(log_y=True))
fig.show()
mean_median_df.sort_values(by=['elasticity'])| mean_price | median_price | elasticity | |
|---|---|---|---|
| sl | 135658 | 136175 | -22.419657 |
| e | 62618 | 62818 | -11.428949 |
| c | 37481 | 37256 | -2.750339 |
| s | 117550 | 124750 | -2.298856 |
| m | 53066 | 56040 | -0.410138 |
| cl | 146585 | 132050 | -0.297840 |
Based on price elasticity model, the optimal prices are:
# Calculating trendline parameter using price as x axis
trendline_params = []
for i in (class_list):
current_class = cleaned_data.loc[(cleaned_data['class'] == i)]
model = smf.ols(formula="unit_sold ~ price", data=current_class).fit()
intercept = model.params.Intercept
slope = model.params.price
trendline_params.append((intercept, slope))
trendlines = pd.DataFrame(trendline_params, columns=["intercept", "slope"])
trendlines.loc[:, "class"] = class_list
trendlines.set_index("class", inplace=True)
price = np.arange(0, 500000, 1000)
trendlines_df = pd.DataFrame(list(product(class_list, price)), columns=['class', 'price'])
trendlines_df = pd.merge(trendlines_df, trendlines, on="class")
trendlines_df.loc[:,"unit_sold"] = trendlines_df.slope * trendlines_df.price + trendlines_df.intercept
trendlines_df["unit_sold"] = trendlines_df.loc[:,"unit_sold"].astype(int)
trendlines_df.loc[:,"revenue"] = trendlines_df.price * trendlines_df.unit_sold
fig = px.line(trendlines_df[trendlines_df.revenue > 0], y="revenue", x="price", color="class", hover_data=['unit_sold'])
fig.show()mean_median_df.loc[:, "optimal_price"] = 0
for i in mean_median_df.index:
id = trendlines_df.loc[trendlines_df.loc[:,"class"] == i, "revenue"].idxmax()
mean_median_df.loc[i, "optimal_price"] = trendlines_df.iloc[id].price
mean_median_df.sort_values(by=['elasticity'])| mean_price | median_price | elasticity | optimal_price | |
|---|---|---|---|---|
| sl | 135658 | 136175 | -22.419657 | 71000 |
| e | 62618 | 62818 | -11.428949 | 34000 |
| c | 37481 | 37256 | -2.750339 | 26000 |
| s | 117550 | 124750 | -2.298856 | 84000 |
| m | 53066 | 56040 | -0.410138 | 92000 |
| cl | 146585 | 132050 | -0.297840 | 313000 |