Tasks
import pandas as pd
# load data
data = pd.read_csv("data/Titanic.csv")
data.shape
(434, 4)
data.columns
Index(['Survived', 'Gender', 'Passenger Class', 'Age'], dtype='object')
# 1. Find out how many passenger died/survived.
data.groupby('Survived').count()
| Gender | Passenger Class | Age | |
|---|---|---|---|
| Survived | |||
| died | 227 | 227 | 222 |
| survived | 207 | 207 | 201 |
data.groupby('Survived').size()
Survived died 227 survived 207 dtype: int64
# 2. How many female/male passenger died/survived?
data.groupby(['Survived', 'Gender']).count()
| Passenger Class | Age | ||
|---|---|---|---|
| Survived | Gender | ||
| died | female | 58 | 56 |
| male | 169 | 166 | |
| survived | female | 122 | 119 |
| male | 85 | 82 |
data.groupby(['Survived', 'Gender']).size()
Survived Gender
died female 58
male 169
survived female 122
male 85
dtype: int64
# 3. How many passenger died/survived for each passenger class?
data.groupby(['Survived', 'Passenger Class']).size()
Survived Passenger Class
died 1st 52
2nd 61
3rd 114
survived 1st 84
2nd 62
3rd 61
dtype: int64
# 4. For each passenger class, find out how many female/male passenger?
data.groupby(['Passenger Class', 'Gender']).size()
Passenger Class Gender
1st female 53
male 83
2nd female 55
male 68
3rd female 72
male 103
dtype: int64
import numpy as np
data.shape
(423, 4)
# drop rows with empty Age
data['Age'].replace('', np.nan, inplace=True)
data.dropna(subset=['Age'], inplace=True)
# check the datatype
data.dtypes
Survived object Gender object Passenger Class object Age int32 dtype: object
# convert Age column to int
data['Age'] = data['Age'].astype(int)
# 5. How many infants died/survived?
infants = data[data['Age'] <= 2]
infants.groupby('Survived').size()
Survived died 7 survived 15 dtype: int64
infants.groupby('Survived').count()
| Gender | Passenger Class | Age | |
|---|---|---|---|
| Survived | |||
| died | 7 | 7 | 7 |
| survived | 15 | 15 | 15 |
infants.groupby(['Survived', 'Passenger Class']).size()
Survived Passenger Class
died 1st 1
3rd 6
survived 1st 1
2nd 7
3rd 7
dtype: int64
infants.groupby(['Survived', 'Gender']).size()
Survived Gender
died female 3
male 4
survived female 7
male 8
dtype: int64
infants.groupby(['Survived', 'Passenger Class', 'Gender']).size()
Survived Passenger Class Gender
died 1st female 1
3rd female 2
male 4
survived 1st male 1
2nd female 3
male 4
3rd female 4
male 3
dtype: int64
# 6. Oldest passenger? Female/Male? Which class?
data[data['Age'] == data['Age'].max()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 384 | survived | male | 1st | 80 |
# 7. Youngest passenger? Female/Male? Which class?
data[data['Age'] == data['Age'].min()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 155 | died | male | 3rd | 0 |
| 156 | died | male | 3rd | 0 |
| 276 | survived | female | 2nd | 0 |
| 319 | survived | female | 3rd | 0 |
| 320 | survived | female | 3rd | 0 |
| 350 | survived | male | 1st | 0 |
| 386 | survived | male | 2nd | 0 |
| 387 | survived | male | 2nd | 0 |
| 405 | survived | male | 3rd | 0 |
| 406 | survived | male | 3rd | 0 |
# summarize
data[data['Age'] == data['Age'].min()].groupby(['Survived', 'Passenger Class', 'Gender']).size()
Survived Passenger Class Gender
died 3rd male 2
survived 1st male 1
2nd female 1
male 2
3rd female 2
male 2
dtype: int64
# 8. Oldest died passenger? Female/Male? Which class?
died_passengers = data[data['Survived'] == 'died']
died_passengers.shape
(222, 4)
died_passengers[died_passengers['Age'] == died_passengers['Age'].max()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 226 | died | male | 3rd | 74 |
# 9. Oldest survived passenger? Female/Male? Which class?
survived = data[data['Survived'] != 'died']
survived[survived['Age'] == survived['Age'].max()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 384 | survived | male | 1st | 80 |
# 10. Youngest died passenger? Female/Male? Which class?
died_passengers[died_passengers['Age'] == died_passengers['Age'].min()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 155 | died | male | 3rd | 0 |
| 156 | died | male | 3rd | 0 |
# 11. Youngest survived passenger? Female/Male? Which class?
survived[survived['Age'] == survived['Age'].min()]
| Survived | Gender | Passenger Class | Age | |
|---|---|---|---|---|
| 276 | survived | female | 2nd | 0 |
| 319 | survived | female | 3rd | 0 |
| 320 | survived | female | 3rd | 0 |
| 350 | survived | male | 1st | 0 |
| 386 | survived | male | 2nd | 0 |
| 387 | survived | male | 2nd | 0 |
| 405 | survived | male | 3rd | 0 |
| 406 | survived | male | 3rd | 0 |
# 12. Find out how many passengers for each class?
data.groupby(['Passenger Class']).size()
Passenger Class 1st 133 2nd 119 3rd 171 dtype: int64