Skip to main content

Concatenation, Merging, and Joining in Pandas - Python

Here is how to concatenate, merge, or join in Pandas
Run the python code here: https://repl.it/@VinitKhandelwal/pandas-concat-merge-join
import numpy as np
import pandas as pd

print("CONCAT")
df1 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['A','B','C','D'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,4), [4,5,6,7], ['A','B','C','D'])
print(df2)
df3 = pd.DataFrame(np.random.randn(4,4), [8,9,10,11], ['A','B','C','D'])
print(df3)
print(pd.concat([df1,df2,df3]))
df1 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['A','B','C','D'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['E','F','G','H'])
print(df2)
df3 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['I','J','K','L'])
print(pd.concat([df1,df2,df3], axis=1))
print("MERGE IS ON COLUMN AND NOT ON INDEX")
df1 = pd.DataFrame(np.random.randn(4,2), [0,1,2,3], ['A','B'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,2), [0,1,2,3], ['C','D'])
print(df2)
df1['KEY'] = ['K0','K1','K2','K3']
print(df1)
df2['KEY'] = ['K0','K1','K2','K3']
print(df2)
print(pd.merge(df1,df2,how='inner',on='KEY'))
print("TWO KEY INNER MERGE")
df1['KEY'] = ['K0','K0','K1','K2']
df1['KEY1'] = ['K0','K1','K0','K1']
print(df1)
df2['KEY'] = ['K0','K1','K1','K2']
df2['KEY1'] = ['K0','K0','K0','K0']
print(df2)
print(pd.merge(df1,df2,how='inner',on=['KEY', 'KEY1']))
print("TWO KEY OUTER MERGE")
print(pd.merge(df1,df2,how='outer',on=['KEY', 'KEY1']))
print("TWO KEY RIGHT MERGE")
print(pd.merge(df1,df2,how='right',on=['KEY', 'KEY1']))
print("TWO KEY LEFT MERGE")
print(pd.merge(df1,df2,how='left',on=['KEY', 'KEY1']))
print("JOIN IS ON INDEX AND NOR ON COLUMN")
df1 = pd.DataFrame(np.random.randn(3,2), ['K0','K1','K2'], ['A','B'])
print(df1)
df2 = pd.DataFrame(np.random.randn(3,2), ['K0','K2','K3'], ['C','D'])
print(df2)
print(df1.join(df2))
print(df2.join(df1))

OUTPUT

CONCAT
          A         B         C         D
0  0.916421  0.035595  0.293305 -0.402285
1 -0.835531 -1.602794  0.850362 -0.020083
2  1.170033  0.764249 -1.781987  0.577272
3 -0.649998 -0.059491  0.506015  1.386711
          A         B         C         D
4  0.027545  0.418065  0.023933  0.584219
5  1.241963 -1.198550 -1.076841 -1.907718
6  0.863230  0.820684  0.011180 -1.386358
7  0.356003 -0.248571 -0.197533  0.793309
           A         B         C         D
8  -0.660149  1.953251 -0.073291  0.836487
9  -0.203366  0.698548 -0.586643 -0.752695
10  0.516609 -0.326910  0.611201 -0.459187
11 -0.192555  1.355531 -0.268725  1.181282
           A         B         C         D
0   0.916421  0.035595  0.293305 -0.402285
1  -0.835531 -1.602794  0.850362 -0.020083
2   1.170033  0.764249 -1.781987  0.577272
3  -0.649998 -0.059491  0.506015  1.386711
4   0.027545  0.418065  0.023933  0.584219
5   1.241963 -1.198550 -1.076841 -1.907718
6   0.863230  0.820684  0.011180 -1.386358
7   0.356003 -0.248571 -0.197533  0.793309
8  -0.660149  1.953251 -0.073291  0.836487
9  -0.203366  0.698548 -0.586643 -0.752695
10  0.516609 -0.326910  0.611201 -0.459187
11 -0.192555  1.355531 -0.268725  1.181282
          A         B         C         D
0  0.983828  1.203951  0.125771  1.475740
1 -0.729233  1.444557 -1.518726 -1.060839
2 -1.537408  0.876840 -0.422955  1.548782
3  0.691144 -0.335625 -1.222966 -0.066613
          E         F         G         H
0 -0.018694  0.239440  0.418294 -0.315046
1 -0.081236  0.506776 -1.272656 -0.425861
2  0.375903  0.005405 -0.134107 -0.090822
3  0.454768  1.042870  0.682735 -0.241464
          A         B         C    ...            J         K         L
0  0.983828  1.203951  0.125771    ...    -1.402984 -0.247747  0.079447
1 -0.729233  1.444557 -1.518726    ...     1.171449 -1.739336  1.420380
2 -1.537408  0.876840 -0.422955    ...    -1.773472  1.165093 -1.308042
3  0.691144 -0.335625 -1.222966    ...     2.433909 -0.755073 -1.218695

[4 rows x 12 columns]
MERGE
          A         B
0  1.842076  1.069991
1 -1.195149 -0.248629
2 -0.283477 -0.770632
3 -0.289297  0.396434
          C         D
0  1.738321 -0.397815
1  1.990671  0.324250
2 -0.226425  0.451265
3 -1.031861 -0.509392
          A         B KEY
0  1.842076  1.069991  K0
1 -1.195149 -0.248629  K1
2 -0.283477 -0.770632  K2
3 -0.289297  0.396434  K3
          C         D KEY
0  1.738321 -0.397815  K0
1  1.990671  0.324250  K1
2 -0.226425  0.451265  K2
3 -1.031861 -0.509392  K3
          A         B KEY         C         D
0  1.842076  1.069991  K0  1.738321 -0.397815
1 -1.195149 -0.248629  K1  1.990671  0.324250
2 -0.283477 -0.770632  K2 -0.226425  0.451265
3 -0.289297  0.396434  K3 -1.031861 -0.509392
TWO KEY INNER MERGE
          A         B KEY KEY1
0  1.842076  1.069991  K0   K0
1 -1.195149 -0.248629  K0   K1
2 -0.283477 -0.770632  K1   K0
3 -0.289297  0.396434  K2   K1
          C         D KEY KEY1
0  1.738321 -0.397815  K0   K0
1  1.990671  0.324250  K1   K0
2 -0.226425  0.451265  K1   K0
3 -1.031861 -0.509392  K2   K0
          A         B KEY KEY1         C         D
0  1.842076  1.069991  K0   K0  1.738321 -0.397815
1 -0.283477 -0.770632  K1   K0  1.990671  0.324250
2 -0.283477 -0.770632  K1   K0 -0.226425  0.451265
TWO KEY OUTER MERGE
          A         B KEY KEY1         C         D
0  1.842076  1.069991  K0   K0  1.738321 -0.397815
1 -1.195149 -0.248629  K0   K1       NaN       NaN
2 -0.283477 -0.770632  K1   K0  1.990671  0.324250
3 -0.283477 -0.770632  K1   K0 -0.226425  0.451265
4 -0.289297  0.396434  K2   K1       NaN       NaN
5       NaN       NaN  K2   K0 -1.031861 -0.509392
TWO KEY RIGHT MERGE
          A         B KEY KEY1         C         D
0  1.842076  1.069991  K0   K0  1.738321 -0.397815
1 -0.283477 -0.770632  K1   K0  1.990671  0.324250
2 -0.283477 -0.770632  K1   K0 -0.226425  0.451265
3       NaN       NaN  K2   K0 -1.031861 -0.509392
TWO KEY LEFT MERGE
          A         B KEY KEY1         C         D
0  1.842076  1.069991  K0   K0  1.738321 -0.397815
1 -1.195149 -0.248629  K0   K1       NaN       NaN
2 -0.283477 -0.770632  K1   K0  1.990671  0.324250
3 -0.283477 -0.770632  K1   K0 -0.226425  0.451265
4 -0.289297  0.396434  K2   K1       NaN       NaN
JOINING
           A         B
K0  0.204361 -3.018034
K1  0.145593 -0.052479
K2 -1.994043 -1.221300
           C         D
K0  0.186839 -0.937334
K2  1.017004 -0.094436
K3  1.506828 -0.676077
           A         B         C         D
K0  0.204361 -3.018034  0.186839 -0.937334
K1  0.145593 -0.052479       NaN       NaN
K2 -1.994043 -1.221300  1.017004 -0.094436
           C         D         A         B
K0  0.186839 -0.937334  0.204361 -3.018034
K2  1.017004 -0.094436 -1.994043 -1.221300
K3  1.506828 -0.676077       NaN       NaN

Comments

Popular posts from this blog

Python - List - Append, Count, Extend, Index, Insert, Pop, Remove, Reverse, Sort

🐍 Advance List List is widely used and it's functionalities are heavily useful. Append Adds one element at the end of the list. Syntax list1.append(value) Input l1 = [1, 2, 3] l1.append(4) l1 Output [1, 2, 3, 4] append can be used to add any datatype in a list. It can even add list inside list. Caution: Append does not return anything. It just appends the list. Count .count(value) counts the number of occurrences of an element in the list. Syntax list1.count(value) Input l1 = [1, 2, 3, 4, 3] l1.count(3) Output 2 It returns 0 if the value is not found in the list. Extend .count(value) counts the number of occurrences of an element in the list. Syntax list1.extend(list) Input l1 = [1, 2, 3] l1.extend([4, 5]) Output [1, 2, 3, 4, 5] If we use append, entire list will be added to the first list like one element. Extend, i nstead of considering a list as one element, it joins the two lists one after other. Append works in the following way. Input l1 = [1, 2, 3] l1.append([4, 5]) Output...

Difference between .exec() and .execPopulate() in Mongoose?

Here I answer what is the difference between .exec() and .execPopulate() in Mongoose? .exec() is used with a query while .execPopulate() is used with a document Syntax for .exec() is as follows: Model.query() . populate ( 'field' ) . exec () // returns promise . then ( function ( document ) { console . log ( document ); }); Syntax for .execPopulate() is as follows: fetchedDocument . populate ( 'field' ) . execPopulate () // returns promise . then ( function ( document ) { console . log ( document ); }); When working with individual document use .execPopulate(), for model query use .exec(). Both returns a promise. One can do without .exec() or .execPopulate() but then has to pass a callback in populate.

Python Class to Calculate Distance and Slope of a Line with Coordinates as Input

🐍  Can be run on Jupyter Notebook #CLASS DESIGNED TO CREATE OBJECTS THAT TAKES COORDINATES AND CALCULATES DISTANCE AND SLOPE class Line:     def __init__(self,coor1,coor2):         self.coor1=coor1         self.coor2=coor2 #FUNCTION CALCULATES DISTANCE     def distance(self):         return ((self.coor2[0]-self.coor1[0])**2+(self.coor2[1]-self.coor1[1])**2)**0.5 #FUNCTION CALCULATES SLOPE         def slope(self):         return (self.coor2[1]-self.coor1[1])/(self.coor2[0]-self.coor1[0]) #DEFINING COORDINATES coordinate1 = (3,2) coordinate2 = (8,10) #CREATING OBJECT OF LINE CLASS li = Line(coordinate1,coordinate2) #CALLING DISTANCE FUNCTION li.distance() #CALLING SLOPE FUNCTION li.slope()