In [2]:
import pandas as pd
import matplotlib.pyplot as plt
#Read the data
df = pd.read_csv(r'bodyfat.csv')
EDA (Exploratory Data Analysis)¶
In [4]:
df.head()
Out[4]:
Density | BodyFat | Age | Weight | Height | Neck | Chest | Abdomen | Hip | Thigh | Knee | Ankle | Biceps | Forearm | Wrist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0708 | 12.3 | 23 | 154.25 | 67.75 | 36.2 | 93.1 | 85.2 | 94.5 | 59.0 | 37.3 | 21.9 | 32.0 | 27.4 | 17.1 |
1 | 1.0853 | 6.1 | 22 | 173.25 | 72.25 | 38.5 | 93.6 | 83.0 | 98.7 | 58.7 | 37.3 | 23.4 | 30.5 | 28.9 | 18.2 |
2 | 1.0414 | 25.3 | 22 | 154.00 | 66.25 | 34.0 | 95.8 | 87.9 | 99.2 | 59.6 | 38.9 | 24.0 | 28.8 | 25.2 | 16.6 |
3 | 1.0751 | 10.4 | 26 | 184.75 | 72.25 | 37.4 | 101.8 | 86.4 | 101.2 | 60.1 | 37.3 | 22.8 | 32.4 | 29.4 | 18.2 |
4 | 1.0340 | 28.7 | 24 | 184.25 | 71.25 | 34.4 | 97.3 | 100.0 | 101.9 | 63.2 | 42.2 | 24.0 | 32.2 | 27.7 | 17.7 |
In [5]:
df.columns
Out[5]:
Index(['Density', 'BodyFat', 'Age', 'Weight', 'Height', 'Neck', 'Chest', 'Abdomen', 'Hip', 'Thigh', 'Knee', 'Ankle', 'Biceps', 'Forearm', 'Wrist'], dtype='object')
In [7]:
type(df)
Out[7]:
pandas.core.frame.DataFrame
In [6]:
type(['BodyFat', 'Age'])
Out[6]:
list
In [8]:
cols = ['BodyFat', 'Age']
data = df[cols]
In [9]:
type(data)
Out[9]:
pandas.core.frame.DataFrame
In [10]:
d = df['Age']
In [12]:
type(d) # Series: One-dimensional ndarray with axis labels (including time series)
Out[12]:
pandas.core.series.Series
In [13]:
data.head()
Out[13]:
BodyFat | Age | |
---|---|---|
0 | 12.3 | 23 |
1 | 6.1 | 22 |
2 | 25.3 | 22 |
3 | 10.4 | 26 |
4 | 28.7 | 24 |
In [14]:
data.corr()
Out[14]:
BodyFat | Age | |
---|---|---|
BodyFat | 1.000000 | 0.291458 |
Age | 0.291458 | 1.000000 |
Pearson's Correlation Coefficient: summation((xi - xmean)(yi - ymean)) / N.stdx.stdy It goes from -1 to 1. Negative: it one value will increase, other value will decrease Positive: both rise and fall together
In [16]:
#Adding title
plt.title("Box Plot using matplotlib")
# Plotting the box plot
plt.boxplot(data,patch_artist=(True))
plt.xticks([1,2], ["BodyFat", "Age"])
# plt.xticks(ticks, labels=None, *, minor=False, **kwargs)
# ticks: list of floats: List of tick locations. The axis Locator is replaced by a FixedLocator.
# labels: List of tick labels. If not set, the labels are generated with the axis tick Formatter.
#Showing the plot
plt.show()
In [17]:
import pandas as pd
import matplotlib.pyplot as plt
#Read the data
df = pd.read_csv(r'bodyfat.csv')
#Adding title
plt.title("Scatter Plot using matplotlib")
#Plotting the data
plt.xlabel ('BodyFat')
plt.ylabel ('Age')
plt.scatter(df['BodyFat'], df['Age'],alpha=0.8)
plt.show()
In [ ]: