import pandas as pd
import numpy as np
data = {
'Animal':['cat','cat','snake','dog','dog','cat','snake','cat','dog','dog'],
'Age':[2.5,3.0,0.5,np.nan,5.0,2.0,4.5,np.nan,7.0,3.0],
'Priority':['yes','yes','no','yes','no','no','no','yes','no','no'],
'Visits':[1,3,2,3,2,3,1,1,2,1]
}
data
{'Animal': ['cat',
'cat',
'snake',
'dog',
'dog',
'cat',
'snake',
'cat',
'dog',
'dog'],
'Age': [2.5, 3.0, 0.5, nan, 5.0, 2.0, 4.5, nan, 7.0, 3.0],
'Priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no'],
'Visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1]}
index = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=index)
df
| Animal | Age | Priority | Visits | |
|---|---|---|---|---|
| a | cat | 2.5 | yes | 1 |
| b | cat | 3.0 | yes | 3 |
| c | snake | 0.5 | no | 2 |
| d | dog | NaN | yes | 3 |
| e | dog | 5.0 | no | 2 |
| f | cat | 2.0 | no | 3 |
| g | snake | 4.5 | no | 1 |
| h | cat | NaN | yes | 1 |
| i | dog | 7.0 | no | 2 |
| j | dog | 3.0 | no | 1 |
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 10 entries, a to j Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Animal 10 non-null object 1 Age 8 non-null float64 2 Priority 10 non-null object 3 Visits 10 non-null int64 dtypes: float64(1), int64(1), object(2) memory usage: 400.0+ bytes
df.describe()
| Age | Visits | |
|---|---|---|
| count | 8.000000 | 10.000000 |
| mean | 3.437500 | 1.900000 |
| std | 2.007797 | 0.875595 |
| min | 0.500000 | 1.000000 |
| 25% | 2.375000 | 1.000000 |
| 50% | 3.000000 | 2.000000 |
| 75% | 4.625000 | 2.750000 |
| max | 7.000000 | 3.000000 |
df.head(3)
| Animal | Age | Priority | Visits | |
|---|---|---|---|---|
| a | cat | 2.5 | yes | 1 |
| b | cat | 3.0 | yes | 3 |
| c | snake | 0.5 | no | 2 |
df.iloc[:3]
| Animal | Age | Priority | Visits | |
|---|---|---|---|---|
| a | cat | 2.5 | yes | 1 |
| b | cat | 3.0 | yes | 3 |
| c | snake | 0.5 | no | 2 |
df[['Animal', 'Age']]
| Animal | Age | |
|---|---|---|
| a | cat | 2.5 |
| b | cat | 3.0 |
| c | snake | 0.5 |
| d | dog | NaN |
| e | dog | 5.0 |
| f | cat | 2.0 |
| g | snake | 4.5 |
| h | cat | NaN |
| i | dog | 7.0 |
| j | dog | 3.0 |
df.loc[:, ['Animal', 'Age']]
| Animal | Age | |
|---|---|---|
| a | cat | 2.5 |
| b | cat | 3.0 |
| c | snake | 0.5 |
| d | dog | NaN |
| e | dog | 5.0 |
| f | cat | 2.0 |
| g | snake | 4.5 |
| h | cat | NaN |
| i | dog | 7.0 |
| j | dog | 3.0 |
df
| Animal | Age | Priority | Visits | |
|---|---|---|---|---|
| a | cat | 2.5 | yes | 1 |
| b | cat | 3.0 | yes | 3 |
| c | snake | 0.5 | no | 2 |
| d | dog | NaN | yes | 3 |
| e | dog | 5.0 | no | 2 |
| f | cat | 2.0 | no | 3 |
| g | snake | 4.5 | no | 1 |
| h | cat | NaN | yes | 1 |
| i | dog | 7.0 | no | 2 |
| j | dog | 3.0 | no | 1 |
df['Visits'].groupby([df['Animal'], df['Priority']]).count()
Animal Priority
cat no 1
yes 3
dog no 3
yes 1
snake no 2
Name: Visits, dtype: int64
df.groupby('Animal')['Age'].mean()
Animal cat 2.5 dog 5.0 snake 2.5 Name: Age, dtype: float64
df['Age'].groupby(df['Animal']).mean()
Animal cat 2.5 dog 5.0 snake 2.5 Name: Age, dtype: float64
df.groupby('Animal')['Age'].describe()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Animal | ||||||||
| cat | 3.0 | 2.5 | 0.500000 | 2.0 | 2.25 | 2.5 | 2.75 | 3.0 |
| dog | 3.0 | 5.0 | 2.000000 | 3.0 | 4.00 | 5.0 | 6.00 | 7.0 |
| snake | 2.0 | 2.5 | 2.828427 | 0.5 | 1.50 | 2.5 | 3.50 | 4.5 |
df.describe()
| Age | Visits | |
|---|---|---|
| count | 8.000000 | 10.000000 |
| mean | 3.437500 | 1.900000 |
| std | 2.007797 | 0.875595 |
| min | 0.500000 | 1.000000 |
| 25% | 2.375000 | 1.000000 |
| 50% | 3.000000 | 2.000000 |
| 75% | 4.625000 | 2.750000 |
| max | 7.000000 | 3.000000 |