Here is how to concatenate, merge, or join in Pandas
Run the python code here: https://repl.it/@VinitKhandelwal/pandas-concat-merge-join
A B C D
0 0.916421 0.035595 0.293305 -0.402285
1 -0.835531 -1.602794 0.850362 -0.020083
2 1.170033 0.764249 -1.781987 0.577272
3 -0.649998 -0.059491 0.506015 1.386711
A B C D
4 0.027545 0.418065 0.023933 0.584219
5 1.241963 -1.198550 -1.076841 -1.907718
6 0.863230 0.820684 0.011180 -1.386358
7 0.356003 -0.248571 -0.197533 0.793309
A B C D
8 -0.660149 1.953251 -0.073291 0.836487
9 -0.203366 0.698548 -0.586643 -0.752695
10 0.516609 -0.326910 0.611201 -0.459187
11 -0.192555 1.355531 -0.268725 1.181282
A B C D
0 0.916421 0.035595 0.293305 -0.402285
1 -0.835531 -1.602794 0.850362 -0.020083
2 1.170033 0.764249 -1.781987 0.577272
3 -0.649998 -0.059491 0.506015 1.386711
4 0.027545 0.418065 0.023933 0.584219
5 1.241963 -1.198550 -1.076841 -1.907718
6 0.863230 0.820684 0.011180 -1.386358
7 0.356003 -0.248571 -0.197533 0.793309
8 -0.660149 1.953251 -0.073291 0.836487
9 -0.203366 0.698548 -0.586643 -0.752695
10 0.516609 -0.326910 0.611201 -0.459187
11 -0.192555 1.355531 -0.268725 1.181282
A B C D
0 0.983828 1.203951 0.125771 1.475740
1 -0.729233 1.444557 -1.518726 -1.060839
2 -1.537408 0.876840 -0.422955 1.548782
3 0.691144 -0.335625 -1.222966 -0.066613
E F G H
0 -0.018694 0.239440 0.418294 -0.315046
1 -0.081236 0.506776 -1.272656 -0.425861
2 0.375903 0.005405 -0.134107 -0.090822
3 0.454768 1.042870 0.682735 -0.241464
A B C ... J K L
0 0.983828 1.203951 0.125771 ... -1.402984 -0.247747 0.079447
1 -0.729233 1.444557 -1.518726 ... 1.171449 -1.739336 1.420380
2 -1.537408 0.876840 -0.422955 ... -1.773472 1.165093 -1.308042
3 0.691144 -0.335625 -1.222966 ... 2.433909 -0.755073 -1.218695
[4 rows x 12 columns]
MERGE
A B
0 1.842076 1.069991
1 -1.195149 -0.248629
2 -0.283477 -0.770632
3 -0.289297 0.396434
C D
0 1.738321 -0.397815
1 1.990671 0.324250
2 -0.226425 0.451265
3 -1.031861 -0.509392
A B KEY
0 1.842076 1.069991 K0
1 -1.195149 -0.248629 K1
2 -0.283477 -0.770632 K2
3 -0.289297 0.396434 K3
C D KEY
0 1.738321 -0.397815 K0
1 1.990671 0.324250 K1
2 -0.226425 0.451265 K2
3 -1.031861 -0.509392 K3
A B KEY C D
0 1.842076 1.069991 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K1 1.990671 0.324250
2 -0.283477 -0.770632 K2 -0.226425 0.451265
3 -0.289297 0.396434 K3 -1.031861 -0.509392
TWO KEY INNER MERGE
A B KEY KEY1
0 1.842076 1.069991 K0 K0
1 -1.195149 -0.248629 K0 K1
2 -0.283477 -0.770632 K1 K0
3 -0.289297 0.396434 K2 K1
C D KEY KEY1
0 1.738321 -0.397815 K0 K0
1 1.990671 0.324250 K1 K0
2 -0.226425 0.451265 K1 K0
3 -1.031861 -0.509392 K2 K0
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -0.283477 -0.770632 K1 K0 1.990671 0.324250
2 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
TWO KEY OUTER MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K0 K1 NaN NaN
2 -0.283477 -0.770632 K1 K0 1.990671 0.324250
3 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
4 -0.289297 0.396434 K2 K1 NaN NaN
5 NaN NaN K2 K0 -1.031861 -0.509392
TWO KEY RIGHT MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -0.283477 -0.770632 K1 K0 1.990671 0.324250
2 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
3 NaN NaN K2 K0 -1.031861 -0.509392
TWO KEY LEFT MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K0 K1 NaN NaN
2 -0.283477 -0.770632 K1 K0 1.990671 0.324250
3 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
4 -0.289297 0.396434 K2 K1 NaN NaN
JOINING
A B
K0 0.204361 -3.018034
K1 0.145593 -0.052479
K2 -1.994043 -1.221300
C D
K0 0.186839 -0.937334
K2 1.017004 -0.094436
K3 1.506828 -0.676077
A B C D
K0 0.204361 -3.018034 0.186839 -0.937334
K1 0.145593 -0.052479 NaN NaN
K2 -1.994043 -1.221300 1.017004 -0.094436
C D A B
K0 0.186839 -0.937334 0.204361 -3.018034
K2 1.017004 -0.094436 -1.994043 -1.221300
K3 1.506828 -0.676077 NaN NaN
Run the python code here: https://repl.it/@VinitKhandelwal/pandas-concat-merge-join
import numpy as np
import pandas as pd
print("CONCAT")
df1 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['A','B','C','D'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,4), [4,5,6,7], ['A','B','C','D'])
print(df2)
df3 = pd.DataFrame(np.random.randn(4,4), [8,9,10,11], ['A','B','C','D'])
print(df3)
print(pd.concat([df1,df2,df3]))
df1 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['A','B','C','D'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['E','F','G','H'])
print(df2)
df3 = pd.DataFrame(np.random.randn(4,4), [0,1,2,3], ['I','J','K','L'])
print(pd.concat([df1,df2,df3], axis=1))
print("MERGE IS ON COLUMN AND NOT ON INDEX")
df1 = pd.DataFrame(np.random.randn(4,2), [0,1,2,3], ['A','B'])
print(df1)
df2 = pd.DataFrame(np.random.randn(4,2), [0,1,2,3], ['C','D'])
print(df2)
df1['KEY'] = ['K0','K1','K2','K3']
print(df1)
df2['KEY'] = ['K0','K1','K2','K3']
print(df2)
print(pd.merge(df1,df2,how='inner',on='KEY'))
print("TWO KEY INNER MERGE")
df1['KEY'] = ['K0','K0','K1','K2']
df1['KEY1'] = ['K0','K1','K0','K1']
print(df1)
df2['KEY'] = ['K0','K1','K1','K2']
df2['KEY1'] = ['K0','K0','K0','K0']
print(df2)
print(pd.merge(df1,df2,how='inner',on=['KEY', 'KEY1']))
print("TWO KEY OUTER MERGE")
print(pd.merge(df1,df2,how='outer',on=['KEY', 'KEY1']))
print("TWO KEY RIGHT MERGE")
print(pd.merge(df1,df2,how='right',on=['KEY', 'KEY1']))
print("TWO KEY LEFT MERGE")
print(pd.merge(df1,df2,how='left',on=['KEY', 'KEY1']))
print("JOIN IS ON INDEX AND NOR ON COLUMN")
df1 = pd.DataFrame(np.random.randn(3,2), ['K0','K1','K2'], ['A','B'])
print(df1)
df2 = pd.DataFrame(np.random.randn(3,2), ['K0','K2','K3'], ['C','D'])
print(df2)
print(df1.join(df2))
print(df2.join(df1))
OUTPUT
CONCATA B C D
0 0.916421 0.035595 0.293305 -0.402285
1 -0.835531 -1.602794 0.850362 -0.020083
2 1.170033 0.764249 -1.781987 0.577272
3 -0.649998 -0.059491 0.506015 1.386711
A B C D
4 0.027545 0.418065 0.023933 0.584219
5 1.241963 -1.198550 -1.076841 -1.907718
6 0.863230 0.820684 0.011180 -1.386358
7 0.356003 -0.248571 -0.197533 0.793309
A B C D
8 -0.660149 1.953251 -0.073291 0.836487
9 -0.203366 0.698548 -0.586643 -0.752695
10 0.516609 -0.326910 0.611201 -0.459187
11 -0.192555 1.355531 -0.268725 1.181282
A B C D
0 0.916421 0.035595 0.293305 -0.402285
1 -0.835531 -1.602794 0.850362 -0.020083
2 1.170033 0.764249 -1.781987 0.577272
3 -0.649998 -0.059491 0.506015 1.386711
4 0.027545 0.418065 0.023933 0.584219
5 1.241963 -1.198550 -1.076841 -1.907718
6 0.863230 0.820684 0.011180 -1.386358
7 0.356003 -0.248571 -0.197533 0.793309
8 -0.660149 1.953251 -0.073291 0.836487
9 -0.203366 0.698548 -0.586643 -0.752695
10 0.516609 -0.326910 0.611201 -0.459187
11 -0.192555 1.355531 -0.268725 1.181282
A B C D
0 0.983828 1.203951 0.125771 1.475740
1 -0.729233 1.444557 -1.518726 -1.060839
2 -1.537408 0.876840 -0.422955 1.548782
3 0.691144 -0.335625 -1.222966 -0.066613
E F G H
0 -0.018694 0.239440 0.418294 -0.315046
1 -0.081236 0.506776 -1.272656 -0.425861
2 0.375903 0.005405 -0.134107 -0.090822
3 0.454768 1.042870 0.682735 -0.241464
A B C ... J K L
0 0.983828 1.203951 0.125771 ... -1.402984 -0.247747 0.079447
1 -0.729233 1.444557 -1.518726 ... 1.171449 -1.739336 1.420380
2 -1.537408 0.876840 -0.422955 ... -1.773472 1.165093 -1.308042
3 0.691144 -0.335625 -1.222966 ... 2.433909 -0.755073 -1.218695
[4 rows x 12 columns]
MERGE
A B
0 1.842076 1.069991
1 -1.195149 -0.248629
2 -0.283477 -0.770632
3 -0.289297 0.396434
C D
0 1.738321 -0.397815
1 1.990671 0.324250
2 -0.226425 0.451265
3 -1.031861 -0.509392
A B KEY
0 1.842076 1.069991 K0
1 -1.195149 -0.248629 K1
2 -0.283477 -0.770632 K2
3 -0.289297 0.396434 K3
C D KEY
0 1.738321 -0.397815 K0
1 1.990671 0.324250 K1
2 -0.226425 0.451265 K2
3 -1.031861 -0.509392 K3
A B KEY C D
0 1.842076 1.069991 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K1 1.990671 0.324250
2 -0.283477 -0.770632 K2 -0.226425 0.451265
3 -0.289297 0.396434 K3 -1.031861 -0.509392
TWO KEY INNER MERGE
A B KEY KEY1
0 1.842076 1.069991 K0 K0
1 -1.195149 -0.248629 K0 K1
2 -0.283477 -0.770632 K1 K0
3 -0.289297 0.396434 K2 K1
C D KEY KEY1
0 1.738321 -0.397815 K0 K0
1 1.990671 0.324250 K1 K0
2 -0.226425 0.451265 K1 K0
3 -1.031861 -0.509392 K2 K0
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -0.283477 -0.770632 K1 K0 1.990671 0.324250
2 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
TWO KEY OUTER MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K0 K1 NaN NaN
2 -0.283477 -0.770632 K1 K0 1.990671 0.324250
3 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
4 -0.289297 0.396434 K2 K1 NaN NaN
5 NaN NaN K2 K0 -1.031861 -0.509392
TWO KEY RIGHT MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -0.283477 -0.770632 K1 K0 1.990671 0.324250
2 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
3 NaN NaN K2 K0 -1.031861 -0.509392
TWO KEY LEFT MERGE
A B KEY KEY1 C D
0 1.842076 1.069991 K0 K0 1.738321 -0.397815
1 -1.195149 -0.248629 K0 K1 NaN NaN
2 -0.283477 -0.770632 K1 K0 1.990671 0.324250
3 -0.283477 -0.770632 K1 K0 -0.226425 0.451265
4 -0.289297 0.396434 K2 K1 NaN NaN
JOINING
A B
K0 0.204361 -3.018034
K1 0.145593 -0.052479
K2 -1.994043 -1.221300
C D
K0 0.186839 -0.937334
K2 1.017004 -0.094436
K3 1.506828 -0.676077
A B C D
K0 0.204361 -3.018034 0.186839 -0.937334
K1 0.145593 -0.052479 NaN NaN
K2 -1.994043 -1.221300 1.017004 -0.094436
C D A B
K0 0.186839 -0.937334 0.204361 -3.018034
K2 1.017004 -0.094436 -1.994043 -1.221300
K3 1.506828 -0.676077 NaN NaN
Comments
Post a Comment