Skip to main content

Functionalities of Dataframes in Pandas - Python

Functionalities of Dataframes in Pandas
Run the code here: https://repl.it/@VinitKhandelwal/pandas-dataframes
import numpy as np
import pandas as pd

print(np.random.seed(101))
df = pd.DataFrame(np.random.randn(5,4),['A','B','C','D','E'],['W','X','Y','Z']) # defining data frame - 1st arg: matrix data, 2nd arg: rows, 3rd arg: columns
print(df) # print data frame
print(type(df)) # Type of data frame
print(df['W']) # Python style, recommended
print(type(df['W'])) # Type of a column
print(df.W) # SQL style, not recommended
print(df[['W','Z']]) # pass list of column names for multipe columns but not all columns
print("ADD NEW COLUMN")
df['V'] = df['W']+df['Z']
print(df['V'])
print(df)
print("DROP COLUMN")
df.drop('V', axis=1)
print(df)
df.drop('V', axis=1, inplace=True)
print(df)
print("DROP ROW WITH INDEX")
df.drop('E')
print(df)
print("DROP LIST OF ROWS")
df.drop(['E','D'], inplace=True)
print(df)
print("FIND SHAPE OF DATA FRAME")
print(df.shape)
print("GET ROW BY INDEX NAME")
print(df.loc['C'])
print("GET ROW BY INDEX ID")
print(df.iloc[2])
print("GET CELL")
print(df.loc['C','W'])
print("ADD NEW ROW")
df.loc['D'] = df.loc['A']+df.loc['B']
print(df.loc['D'])
print(df)
df.loc['E'] = [0.1,0.2,0.3,0.4]
print(df.loc['E'])
print(df)
print("GET MULTIPLE ROWS")
print(df.loc[['A', 'E'],['W','Y']])
print(df > 0)
print("COMPARE AND RETURN BOOLEAN")
booldf = df > 0
print(df[booldf])
print(df[df > 0])
print(df[df['W'] > 0])
print(df[df['Z'] < 0])
print("COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS")
resultdf = df[df['W'] > 0]
print(resultdf)
print(resultdf['X'])
print(df[df['Z'] > 0]['X'])
print("COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS AND GET MULTIPLE COLUMNS")
print(df[df['Z'] > 0][['X', 'Z']])
print("OR - LONG METHOD")
boolser = df['W'] > 0
colser = ['X', 'Z']
print(df[boolser][colser])
print("APPLY TWO CONDITIONS USING &")
print((df > 0) & (df < 1))
print("APPLY TWO CONDITIONS USING |")
print((df > 0) | (df < 1))
print("RESET INDEX")
print(df.reset_index())
print(df)
print("RESET INDEX INPLACE")
df.reset_index(inplace=True)
print(df)
print("SET INDEX")
df['States'] = 'MH GJ RJ DL HR'.split()
print(df)
print(df.set_index('States'))
print(df)
print("SET INDEX INPLACE")
df.set_index('States', inplace=True)
print(df)
outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
inside = [1,2,3,1,2,3]
print(zip(outside, inside))
hier_index = list(zip(outside, inside))
print(hier_index)
hier_index = pd.MultiIndex.from_tuples(hier_index)
print(hier_index)
print("Multi-level Data Frame")
df = pd.DataFrame(np.random.randn(6,2), hier_index, ['A', 'B'])
print(df)
print("GET DATA")
print(df.loc['G1'])
print(df.loc['G1'].loc[1])
print(df.loc['G1'].loc[1]['A'])
print(df.index.names)
df.index.names = ['Groups', 'Numbers']
print(df.index.names)
print(df)
print("CROSS SECTION")
print(df.xs)
print(df.xs('G1'))
print(df.xs(1, level='Numbers'))

OUTPUT

None
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  0.188695    ...     0.955057
E  0.190794    ...     0.683509

[5 rows x 4 columns]
<class 'pandas.core.frame.DataFrame'>
A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64
<class 'pandas.core.series.Series'>
A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64
          W         Z
A  2.706850  0.503826
B  0.651118  0.605965
C -2.018168 -0.589001
D  0.188695  0.955057
E  0.190794  0.683509
ADD NEW COLUMN
A    3.210676
B    1.257083
C   -2.607169
D    1.143752
E    0.874303
Name: V, dtype: float64
          W    ...            V
A  2.706850    ...     3.210676
B  0.651118    ...     1.257083
C -2.018168    ...    -2.607169
D  0.188695    ...     1.143752
E  0.190794    ...     0.874303

[5 rows x 5 columns]
DROP COLUMN
          W    ...            V
A  2.706850    ...     3.210676
B  0.651118    ...     1.257083
C -2.018168    ...    -2.607169
D  0.188695    ...     1.143752
E  0.190794    ...     0.874303

[5 rows x 5 columns]
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  0.188695    ...     0.955057
E  0.190794    ...     0.683509

[5 rows x 4 columns]
DROP ROW WITH INDEX
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  0.188695    ...     0.955057
E  0.190794    ...     0.683509

[5 rows x 4 columns]
DROP LIST OF ROWS
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001

[3 rows x 4 columns]
FIND SHAPE OF DATA FRAME
(3, 4)
GET ROW BY INDEX NAME
W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64
GET ROW BY INDEX ID
W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64
GET CELL
-2.018168244037392
ADD NEW ROW
W    3.357968
X    0.308815
Y    0.059892
Z    1.109791
Name: D, dtype: float64
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  3.357968    ...     1.109791

[4 rows x 4 columns]
W    0.1
X    0.2
Y    0.3
Z    0.4
Name: E, dtype: float64
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[5 rows x 4 columns]
GET MULTIPLE ROWS
         W         Y
A  2.70685  0.907969
E  0.10000  0.300000
       W  ...        Z
A   True  ...     True
B   True  ...     True
C  False  ...    False
D   True  ...     True
E   True  ...     True

[5 rows x 4 columns]
COMPARE AND RETURN BOOLEAN
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C       NaN    ...          NaN
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[5 rows x 4 columns]
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C       NaN    ...          NaN
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[5 rows x 4 columns]
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[4 rows x 4 columns]
          W    ...            Z
C -2.018168    ...    -0.589001

[1 rows x 4 columns]
COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[4 rows x 4 columns]
A    0.628133
B   -0.319318
D    0.308815
E    0.200000
Name: X, dtype: float64
A    0.628133
B   -0.319318
D    0.308815
E    0.200000
Name: X, dtype: float64
COMPARE AND RETURN BOOLEAN OF SPECFIC COLUMNS AND GET MULTIPLECOLUMNS
          X         Z
A  0.628133  0.503826
B -0.319318  0.605965
D  0.308815  1.109791
E  0.200000  0.400000
OR - LONG METHOD
          X         Z
A  0.628133  0.503826
B -0.319318  0.605965
D  0.308815  1.109791
E  0.200000  0.400000
APPLY TWO CONDITIONS USING &
       W  ...        Z
A  False  ...     True
B   True  ...     True
C  False  ...    False
D  False  ...    False
E   True  ...     True

[5 rows x 4 columns]
APPLY TWO CONDITIONS USING |
      W  ...      Z
A  True  ...   True
B  True  ...   True
C  True  ...   True
D  True  ...   True
E  True  ...   True

[5 rows x 4 columns]
RESET INDEX
  index    ...            Z
0     A    ...     0.503826
1     B    ...     0.605965
2     C    ...    -0.589001
3     D    ...     1.109791
4     E    ...     0.400000

[5 rows x 5 columns]
          W    ...            Z
A  2.706850    ...     0.503826
B  0.651118    ...     0.605965
C -2.018168    ...    -0.589001
D  3.357968    ...     1.109791
E  0.100000    ...     0.400000

[5 rows x 4 columns]
RESET INDEX INPLACE
  index    ...            Z
0     A    ...     0.503826
1     B    ...     0.605965
2     C    ...    -0.589001
3     D    ...     1.109791
4     E    ...     0.400000

[5 rows x 5 columns]
SET INDEX
  index   ...    States
0     A   ...        MH
1     B   ...        GJ
2     C   ...        RJ
3     D   ...        DL
4     E   ...        HR

[5 rows x 6 columns]
       index    ...            Z
States          ...
MH         A    ...     0.503826
GJ         B    ...     0.605965
RJ         C    ...    -0.589001
DL         D    ...     1.109791
HR         E    ...     0.400000

[5 rows x 5 columns]
  index   ...    States
0     A   ...        MH
1     B   ...        GJ
2     C   ...        RJ
3     D   ...        DL
4     E   ...        HR

[5 rows x 6 columns]
SET INDEX INPLACE
       index    ...            Z
States          ...
MH         A    ...     0.503826
GJ         B    ...     0.605965
RJ         C    ...    -0.589001
DL         D    ...     1.109791
HR         E    ...     0.400000

[5 rows x 5 columns]
<zip object at 0x7f6732cdec88>
[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2',3)]
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
Multi-level Data Frame
             A         B
G1 1  0.302665  1.693723
   2 -1.706086 -1.159119
   3 -0.134841  0.390528
G2 1  0.166905  0.184502
   2  0.807706  0.072960
   3  0.638787  0.329646
GET DATA
          A         B
1  0.302665  1.693723
2 -1.706086 -1.159119
3 -0.134841  0.390528
A    0.302665
B    1.693723
Name: 1, dtype: float64
0.3026654485851825
[None, None]
['Groups', 'Numbers']
                       A         B
Groups Numbers
G1     1        0.302665  1.693723
       2       -1.706086 -1.159119
       3       -0.134841  0.390528
G2     1        0.166905  0.184502
       2        0.807706  0.072960
       3        0.638787  0.329646
CROSS SECTION
<bound method NDFrame.xs of                        A         B
Groups Numbers
G1     1        0.302665  1.693723
       2       -1.706086 -1.159119
       3       -0.134841  0.390528
G2     1        0.166905  0.184502
       2        0.807706  0.072960
       3        0.638787  0.329646>
                A         B
Numbers
1        0.302665  1.693723
2       -1.706086 -1.159119
3       -0.134841  0.390528
               A         B
Groups
G1      0.302665  1.693723
G2      0.166905  0.184502

Comments

Popular posts from this blog

Python - List - Append, Count, Extend, Index, Insert, Pop, Remove, Reverse, Sort

🐍 Advance List List is widely used and it's functionalities are heavily useful. Append Adds one element at the end of the list. Syntax list1.append(value) Input l1 = [1, 2, 3] l1.append(4) l1 Output [1, 2, 3, 4] append can be used to add any datatype in a list. It can even add list inside list. Caution: Append does not return anything. It just appends the list. Count .count(value) counts the number of occurrences of an element in the list. Syntax list1.count(value) Input l1 = [1, 2, 3, 4, 3] l1.count(3) Output 2 It returns 0 if the value is not found in the list. Extend .count(value) counts the number of occurrences of an element in the list. Syntax list1.extend(list) Input l1 = [1, 2, 3] l1.extend([4, 5]) Output [1, 2, 3, 4, 5] If we use append, entire list will be added to the first list like one element. Extend, i nstead of considering a list as one element, it joins the two lists one after other. Append works in the following way. Input l1 = [1, 2, 3] l1.append([4, 5]) Output...

Difference between .exec() and .execPopulate() in Mongoose?

Here I answer what is the difference between .exec() and .execPopulate() in Mongoose? .exec() is used with a query while .execPopulate() is used with a document Syntax for .exec() is as follows: Model.query() . populate ( 'field' ) . exec () // returns promise . then ( function ( document ) { console . log ( document ); }); Syntax for .execPopulate() is as follows: fetchedDocument . populate ( 'field' ) . execPopulate () // returns promise . then ( function ( document ) { console . log ( document ); }); When working with individual document use .execPopulate(), for model query use .exec(). Both returns a promise. One can do without .exec() or .execPopulate() but then has to pass a callback in populate.

Python Class to Calculate Distance and Slope of a Line with Coordinates as Input

🐍  Can be run on Jupyter Notebook #CLASS DESIGNED TO CREATE OBJECTS THAT TAKES COORDINATES AND CALCULATES DISTANCE AND SLOPE class Line:     def __init__(self,coor1,coor2):         self.coor1=coor1         self.coor2=coor2 #FUNCTION CALCULATES DISTANCE     def distance(self):         return ((self.coor2[0]-self.coor1[0])**2+(self.coor2[1]-self.coor1[1])**2)**0.5 #FUNCTION CALCULATES SLOPE         def slope(self):         return (self.coor2[1]-self.coor1[1])/(self.coor2[0]-self.coor1[0]) #DEFINING COORDINATES coordinate1 = (3,2) coordinate2 = (8,10) #CREATING OBJECT OF LINE CLASS li = Line(coordinate1,coordinate2) #CALLING DISTANCE FUNCTION li.distance() #CALLING SLOPE FUNCTION li.slope()