import pandas as pd
import matplotlib
# load sales_calls
calls = pd.read_csv("data/sales_calls.csv")
calls
| Team member | Territory | Month | Calls | |
|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 |
| 1 | Jorge | 3 | 2 | 88 |
| 2 | Jorge | 3 | 3 | 84 |
| 3 | Jorge | 3 | 4 | 113 |
| 4 | Ana | 1 | 1 | 91 |
| 5 | Ana | 1 | 2 | 129 |
| 6 | Ana | 1 | 3 | 96 |
| 7 | Ana | 1 | 4 | 128 |
| 8 | Ali | 2 | 1 | 120 |
| 9 | Ali | 2 | 2 | 85 |
| 10 | Ali | 2 | 3 | 87 |
| 11 | Ali | 2 | 4 | 87 |
# load sales_revenue
revenue = pd.read_csv("data/sales_revenue.csv")
revenue
| Territory | Month | Amount | |
|---|---|---|---|
| 0 | 1 | 1 | 54228 |
| 1 | 1 | 2 | 61640 |
| 2 | 1 | 3 | 43491 |
| 3 | 1 | 4 | 52173 |
| 4 | 2 | 1 | 36061 |
| 5 | 2 | 2 | 44957 |
| 6 | 2 | 3 | 35058 |
| 7 | 2 | 4 | 33855 |
| 8 | 3 | 1 | 50876 |
| 9 | 3 | 2 | 57682 |
| 10 | 3 | 3 | 53689 |
| 11 | 3 | 4 | 49173 |
revenue.Territory
0 1 1 1 2 1 3 1 4 2 5 2 6 2 7 2 8 3 9 3 10 3 11 3 Name: Territory, dtype: int64
list(revenue.columns)
['Territory', 'Month', 'Amount']
# merge two dataframes
calls_revenue = pd.merge(calls, revenue, on=['Territory', 'Month'])
calls_revenue
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 |
| 1 | Jorge | 3 | 2 | 88 | 57682 |
| 2 | Jorge | 3 | 3 | 84 | 53689 |
| 3 | Jorge | 3 | 4 | 113 | 49173 |
| 4 | Ana | 1 | 1 | 91 | 54228 |
| 5 | Ana | 1 | 2 | 129 | 61640 |
| 6 | Ana | 1 | 3 | 96 | 43491 |
| 7 | Ana | 1 | 4 | 128 | 52173 |
| 8 | Ali | 2 | 1 | 120 | 36061 |
| 9 | Ali | 2 | 2 | 85 | 44957 |
| 10 | Ali | 2 | 3 | 87 | 35058 |
| 11 | Ali | 2 | 4 | 87 | 33855 |
calls_revenue.Month
0 1 1 2 2 3 3 4 4 1 5 2 6 3 7 4 8 1 9 2 10 3 11 4 Name: Month, dtype: int64
# display revenue for territory 3 - filter records
calls_revenue[calls_revenue.Territory == 3]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 |
| 1 | Jorge | 3 | 2 | 88 | 57682 |
| 2 | Jorge | 3 | 3 | 84 | 53689 |
| 3 | Jorge | 3 | 4 | 113 | 49173 |
calls_revenue[calls_revenue['Territory'] == 3]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 |
| 1 | Jorge | 3 | 2 | 88 | 57682 |
| 2 | Jorge | 3 | 3 | 84 | 53689 |
| 3 | Jorge | 3 | 4 | 113 | 49173 |
calls_revenue[calls_revenue.Territory == 1]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 4 | Ana | 1 | 1 | 91 | 54228 |
| 5 | Ana | 1 | 2 | 129 | 61640 |
| 6 | Ana | 1 | 3 | 96 | 43491 |
| 7 | Ana | 1 | 4 | 128 | 52173 |
calls_revenue[calls_revenue.Territory == 2]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 8 | Ali | 2 | 1 | 120 | 36061 |
| 9 | Ali | 2 | 2 | 85 | 44957 |
| 10 | Ali | 2 | 3 | 87 | 35058 |
| 11 | Ali | 2 | 4 | 87 | 33855 |
calls_revenue[calls_revenue.Month == 1]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 |
| 4 | Ana | 1 | 1 | 91 | 54228 |
| 8 | Ali | 2 | 1 | 120 | 36061 |
calls_revenue[calls_revenue.Month == 2]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 1 | Jorge | 3 | 2 | 88 | 57682 |
| 5 | Ana | 1 | 2 | 129 | 61640 |
| 9 | Ali | 2 | 2 | 85 | 44957 |
calls_revenue[calls_revenue.Month == 3]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 2 | Jorge | 3 | 3 | 84 | 53689 |
| 6 | Ana | 1 | 3 | 96 | 43491 |
| 10 | Ali | 2 | 3 | 87 | 35058 |
# select rows in which amount per call is greater than 500
calls_revenue[calls_revenue.Amount / calls_revenue.Calls > 500]
| Team member | Territory | Month | Calls | Amount | |
|---|---|---|---|---|---|
| 1 | Jorge | 3 | 2 | 88 | 57682 |
| 2 | Jorge | 3 | 3 | 84 | 53689 |
| 4 | Ana | 1 | 1 | 91 | 54228 |
| 9 | Ali | 2 | 2 | 85 | 44957 |
# calculate and add amount per calls column to dataframe
calls_revenue['Calls_Amount'] = calls_revenue.Amount / calls_revenue.Calls
calls_revenue
| Team member | Territory | Month | Calls | Amount | Calls_Amount | |
|---|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 | 475.476636 |
| 1 | Jorge | 3 | 2 | 88 | 57682 | 655.477273 |
| 2 | Jorge | 3 | 3 | 84 | 53689 | 639.154762 |
| 3 | Jorge | 3 | 4 | 113 | 49173 | 435.159292 |
| 4 | Ana | 1 | 1 | 91 | 54228 | 595.912088 |
| 5 | Ana | 1 | 2 | 129 | 61640 | 477.829457 |
| 6 | Ana | 1 | 3 | 96 | 43491 | 453.031250 |
| 7 | Ana | 1 | 4 | 128 | 52173 | 407.601562 |
| 8 | Ali | 2 | 1 | 120 | 36061 | 300.508333 |
| 9 | Ali | 2 | 2 | 85 | 44957 | 528.905882 |
| 10 | Ali | 2 | 3 | 87 | 35058 | 402.965517 |
| 11 | Ali | 2 | 4 | 87 | 33855 | 389.137931 |
calls_revenue[calls_revenue.Amount / calls_revenue.Calls > 500]
| Team member | Territory | Month | Calls | Amount | Calls_Amount | |
|---|---|---|---|---|---|---|
| 1 | Jorge | 3 | 2 | 88 | 57682 | 655.477273 |
| 2 | Jorge | 3 | 3 | 84 | 53689 | 639.154762 |
| 4 | Ana | 1 | 1 | 91 | 54228 | 595.912088 |
| 9 | Ali | 2 | 2 | 85 | 44957 | 528.905882 |
calls_revenue.describe()
| Territory | Month | Calls | Amount | Calls_Amount | |
|---|---|---|---|---|---|
| count | 12.000000 | 12.000000 | 12.000000 | 12.000000 | 12.000000 |
| mean | 2.000000 | 2.500000 | 101.250000 | 47740.250000 | 480.096665 |
| std | 0.852803 | 1.167748 | 17.279126 | 9129.881321 | 107.179540 |
| min | 1.000000 | 1.000000 | 84.000000 | 33855.000000 | 300.508333 |
| 25% | 1.000000 | 1.750000 | 87.000000 | 41633.500000 | 406.442551 |
| 50% | 2.000000 | 2.500000 | 93.500000 | 50024.500000 | 464.253943 |
| 75% | 3.000000 | 3.250000 | 114.750000 | 53823.750000 | 545.657434 |
| max | 3.000000 | 4.000000 | 129.000000 | 61640.000000 | 655.477273 |
# get all rows in which amount per call is above mean/avg
calls_revenue.Calls_Amount.mean()
480.0966653266681
calls_revenue[calls_revenue.Calls_Amount > calls_revenue.Calls_Amount.mean()]
| Team member | Territory | Month | Calls | Amount | Calls_Amount | |
|---|---|---|---|---|---|---|
| 1 | Jorge | 3 | 2 | 88 | 57682 | 655.477273 |
| 2 | Jorge | 3 | 3 | 84 | 53689 | 639.154762 |
| 4 | Ana | 1 | 1 | 91 | 54228 | 595.912088 |
| 9 | Ali | 2 | 2 | 85 | 44957 | 528.905882 |
calls_revenue[calls_revenue.Calls_Amount > calls_revenue.Calls_Amount.median()]
| Team member | Territory | Month | Calls | Amount | Calls_Amount | |
|---|---|---|---|---|---|---|
| 0 | Jorge | 3 | 1 | 107 | 50876 | 475.476636 |
| 1 | Jorge | 3 | 2 | 88 | 57682 | 655.477273 |
| 2 | Jorge | 3 | 3 | 84 | 53689 | 639.154762 |
| 4 | Ana | 1 | 1 | 91 | 54228 | 595.912088 |
| 5 | Ana | 1 | 2 | 129 | 61640 | 477.829457 |
| 9 | Ali | 2 | 2 | 85 | 44957 | 528.905882 |
# total calls and amounts by month or territory
calls_revenue[['Month', 'Calls', 'Amount']].groupby(['Month']).sum()
| Calls | Amount | |
|---|---|---|
| Month | ||
| 1 | 318 | 141165 |
| 2 | 302 | 164279 |
| 3 | 267 | 132238 |
| 4 | 328 | 135201 |
calls_revenue[['Territory', 'Calls', 'Amount']].groupby(['Territory']).sum()
| Calls | Amount | |
|---|---|---|
| Territory | ||
| 1 | 444 | 211532 |
| 2 | 379 | 149931 |
| 3 | 392 | 211420 |
# plotting - Calls vs Territory
calls_revenue[['Territory', 'Calls']].groupby(['Territory']).sum().plot.bar()
<AxesSubplot:xlabel='Territory'>