Functionalities of Dataframes in Pandas

Functionalities of Dataframes in Pandas - Python

Functionalities of Dataframes in Pandas
Run the code here: https://repl.it/@VinitKhandelwal/pandas-dataframes

import numpy as np

import pandas as pd

print(np.random.seed(101))

df = pd.DataFrame(np.random.randn(5,4),['A','B','C','D','E'],['W','X','Y','Z']) # defining data frame - 1st arg: matrix data, 2nd arg: rows, 3rd arg: columns

print(df) # print data frame

print(type(df)) # Type of data frame

print(df['W']) # Python style, recommended

print(type(df['W'])) # Type of a column

print(df.W) # SQL style, not recommended

print(df[['W','Z']]) # pass list of column names for multipe columns but not all columns

print("ADD NEW COLUMN")

df['V'] = df['W']+df['Z']

print(df['V'])

print(df)

print("DROP COLUMN")

df.drop('V', axis=1)

print(df)

df.drop('V', axis=1, inplace=True)

print(df)

print("DROP ROW WITH INDEX")

df.drop('E')

print(df)

print("DROP LIST OF ROWS")

df.drop(['E','D'], inplace=True)

print(df)

print("FIND SHAPE OF DATA FRAME")

print(df.shape)

print("GET ROW BY INDEX NAME")

print(df.loc['C'])

print("GET ROW BY INDEX ID")

print(df.iloc[2])

print("GET CELL")

print(df.loc['C','W'])

print("ADD NEW ROW")

df.loc['D'] = df.loc['A']+df.loc['B']

print(df.loc['D'])

print(df)

df.loc['E'] = [0.1,0.2,0.3,0.4]

print(df.loc['E'])

print(df)

print("GET MULTIPLE ROWS")

print(df.loc[['A', 'E'],['W','Y']])

print(df > 0)

print("COMPARE AND RETURN BOOLEAN")

booldf = df > 0

print(df[booldf])

print(df[df > 0])

print(df[df['W'] > 0])

print(df[df['Z'] < 0])

print("COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS")

resultdf = df[df['W'] > 0]

print(resultdf)

print(resultdf['X'])

print(df[df['Z'] > 0]['X'])

print("COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS AND GET MULTIPLE COLUMNS")

print(df[df['Z'] > 0][['X', 'Z']])

print("OR - LONG METHOD")

boolser = df['W'] > 0

colser = ['X', 'Z']

print(df[boolser][colser])

print("APPLY TWO CONDITIONS USING &")

print((df > 0) & (df < 1))

print("APPLY TWO CONDITIONS USING |")

print((df > 0) | (df < 1))

print("RESET INDEX")

print(df.reset_index())

print(df)

print("RESET INDEX INPLACE")

df.reset_index(inplace=True)

print(df)

print("SET INDEX")

df['States'] = 'MH GJ RJ DL HR'.split()

print(df)

print(df.set_index('States'))

print(df)

print("SET INDEX INPLACE")

df.set_index('States', inplace=True)

print(df)

outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

inside = [1,2,3,1,2,3]

print(zip(outside, inside))

hier_index = list(zip(outside, inside))

print(hier_index)

hier_index = pd.MultiIndex.from_tuples(hier_index)

print(hier_index)

print("Multi-level Data Frame")

df = pd.DataFrame(np.random.randn(6,2), hier_index, ['A', 'B'])

print(df)

print("GET DATA")

print(df.loc['G1'])

print(df.loc['G1'].loc[1])

print(df.loc['G1'].loc[1]['A'])

print(df.index.names)

df.index.names = ['Groups', 'Numbers']

print(df.index.names)

print(df)

print("CROSS SECTION")

print(df.xs)

print(df.xs('G1'))

print(df.xs(1, level='Numbers'))

OUTPUT

None
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 0.188695 ... 0.955057
E 0.190794 ... 0.683509

[5 rows x 4 columns]
<class 'pandas.core.frame.DataFrame'>
A 2.706850
B 0.651118
C -2.018168
D 0.188695
E 0.190794
Name: W, dtype: float64
<class 'pandas.core.series.Series'>
A 2.706850
B 0.651118
C -2.018168
D 0.188695
E 0.190794
Name: W, dtype: float64
W Z
A 2.706850 0.503826
B 0.651118 0.605965
C -2.018168 -0.589001
D 0.188695 0.955057
E 0.190794 0.683509
ADD NEW COLUMN
A 3.210676
B 1.257083
C -2.607169
D 1.143752
E 0.874303
Name: V, dtype: float64
W ... V
A 2.706850 ... 3.210676
B 0.651118 ... 1.257083
C -2.018168 ... -2.607169
D 0.188695 ... 1.143752
E 0.190794 ... 0.874303

[5 rows x 5 columns]
DROP COLUMN
W ... V
A 2.706850 ... 3.210676
B 0.651118 ... 1.257083
C -2.018168 ... -2.607169
D 0.188695 ... 1.143752
E 0.190794 ... 0.874303

[5 rows x 5 columns]
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 0.188695 ... 0.955057
E 0.190794 ... 0.683509

[5 rows x 4 columns]
DROP ROW WITH INDEX
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 0.188695 ... 0.955057
E 0.190794 ... 0.683509

[5 rows x 4 columns]
DROP LIST OF ROWS
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001

[3 rows x 4 columns]
FIND SHAPE OF DATA FRAME
(3, 4)
GET ROW BY INDEX NAME
W -2.018168
X 0.740122
Y 0.528813
Z -0.589001
Name: C, dtype: float64
GET ROW BY INDEX ID
W -2.018168
X 0.740122
Y 0.528813
Z -0.589001
Name: C, dtype: float64
GET CELL
-2.018168244037392
ADD NEW ROW
W 3.357968
X 0.308815
Y 0.059892
Z 1.109791
Name: D, dtype: float64
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 3.357968 ... 1.109791

[4 rows x 4 columns]
W 0.1
X 0.2
Y 0.3
Z 0.4
Name: E, dtype: float64
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[5 rows x 4 columns]
GET MULTIPLE ROWS
W Y
A 2.70685 0.907969
E 0.10000 0.300000
W ... Z
A True ... True
B True ... True
C False ... False
D True ... True
E True ... True

[5 rows x 4 columns]
COMPARE AND RETURN BOOLEAN
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C NaN ... NaN
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[5 rows x 4 columns]
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C NaN ... NaN
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[5 rows x 4 columns]
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[4 rows x 4 columns]
W ... Z
C -2.018168 ... -0.589001

[1 rows x 4 columns]
COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[4 rows x 4 columns]
A 0.628133
B -0.319318
D 0.308815
E 0.200000
Name: X, dtype: float64
A 0.628133
B -0.319318
D 0.308815
E 0.200000
Name: X, dtype: float64
COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS AND GET MULTIPLECOLUMNS
X Z
A 0.628133 0.503826
B -0.319318 0.605965
D 0.308815 1.109791
E 0.200000 0.400000
OR - LONG METHOD
X Z
A 0.628133 0.503826
B -0.319318 0.605965
D 0.308815 1.109791
E 0.200000 0.400000
APPLY TWO CONDITIONS USING &
W ... Z
A False ... True
B True ... True
C False ... False
D False ... False
E True ... True

[5 rows x 4 columns]
APPLY TWO CONDITIONS USING |
W ... Z
A True ... True
B True ... True
C True ... True
D True ... True
E True ... True

[5 rows x 4 columns]
RESET INDEX
index ... Z
0 A ... 0.503826
1 B ... 0.605965
2 C ... -0.589001
3 D ... 1.109791
4 E ... 0.400000

[5 rows x 5 columns]
W ... Z
A 2.706850 ... 0.503826
B 0.651118 ... 0.605965
C -2.018168 ... -0.589001
D 3.357968 ... 1.109791
E 0.100000 ... 0.400000

[5 rows x 4 columns]
RESET INDEX INPLACE
index ... Z
0 A ... 0.503826
1 B ... 0.605965
2 C ... -0.589001
3 D ... 1.109791
4 E ... 0.400000

[5 rows x 5 columns]
SET INDEX
index ... States
0 A ... MH
1 B ... GJ
2 C ... RJ
3 D ... DL
4 E ... HR

[5 rows x 6 columns]
index ... Z
States ...
MH A ... 0.503826
GJ B ... 0.605965
RJ C ... -0.589001
DL D ... 1.109791
HR E ... 0.400000

[5 rows x 5 columns]
index ... States
0 A ... MH
1 B ... GJ
2 C ... RJ
3 D ... DL
4 E ... HR

[5 rows x 6 columns]
SET INDEX INPLACE
index ... Z
States ...
MH A ... 0.503826
GJ B ... 0.605965
RJ C ... -0.589001
DL D ... 1.109791
HR E ... 0.400000

[5 rows x 5 columns]
<zip object at 0x7f6732cdec88>
[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2',3)]
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
Multi-level Data Frame
A B
G1 1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
G2 1 0.166905 0.184502
2 0.807706 0.072960
3 0.638787 0.329646
GET DATA
A B
1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
A 0.302665
B 1.693723
Name: 1, dtype: float64
0.3026654485851825
[None, None]
['Groups', 'Numbers']
A B
Groups Numbers
G1 1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
G2 1 0.166905 0.184502
2 0.807706 0.072960
3 0.638787 0.329646
CROSS SECTION
<bound method NDFrame.xs of A B
Groups Numbers
G1 1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
G2 1 0.166905 0.184502
2 0.807706 0.072960
3 0.638787 0.329646>
A B
Numbers
1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
A B
Groups
G1 0.302665 1.693723
G2 0.166905 0.184502

Python - List - Append, Count, Extend, Index, Insert, Pop, Remove, Reverse, Sort

🐍 Advance List List is widely used and it's functionalities are heavily useful. Append Adds one element at the end of the list. Syntax list1.append(value) Input l1 = [1, 2, 3] l1.append(4) l1 Output [1, 2, 3, 4] append can be used to add any datatype in a list. It can even add list inside list. Caution: Append does not return anything. It just appends the list. Count .count(value) counts the number of occurrences of an element in the list. Syntax list1.count(value) Input l1 = [1, 2, 3, 4, 3] l1.count(3) Output 2 It returns 0 if the value is not found in the list. Extend .count(value) counts the number of occurrences of an element in the list. Syntax list1.extend(list) Input l1 = [1, 2, 3] l1.extend([4, 5]) Output [1, 2, 3, 4, 5] If we use append, entire list will be added to the first list like one element. Extend, i nstead of considering a list as one element, it joins the two lists one after other. Append works in the following way. Input l1 = [1, 2, 3] l1.append([4, 5]) Output...

Vinit Khandelwal

Search This Blog