from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
InteractiveShell.log_level = 'INFO'

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


%matplotlib inline
import pandas as pd
import numpy as np
import logging
import sys
import codecs

from collections import OrderedDict

log = logging.getLogger()
log.handlers = []
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
log.setLevel(logging.INFO)
# log.setLevel(logging.DEBUG)
log.addHandler(ch)

pd.set_option('display.width',1000)
# pd.set_option('max_colwidth',200)
pd.reset_option('max_colwidth')


all_files = ['data/Top80_sPop&Rock.txt','data/TopAlt&Electronic.txt']
list_of_playlists= []
for file_ in all_files:
    doc = codecs.open(file_,'rU','UTF-16') 
    df = pd.read_csv(doc,index_col=None, header=0,sep='\t')
    log.info("## %s: df.head()=" % file_)
    df.head()
    log.info("## %s: df.shape=%s" % (file_, str(df.shape)))
    list_of_playlists.append(df)

2018-04-24 15:30:30,966 - INFO - ## data/Top80_sPop&Rock.txt: df.head()=

2018-04-24 15:30:31,000 - INFO - ## data/Top80_sPop&Rock.txt: df.shape=(678, 31)
2018-04-24 15:30:31,012 - INFO - ## data/TopAlt&Electronic.txt: df.head()=

2018-04-24 15:30:31,048 - INFO - ## data/TopAlt&Electronic.txt: df.shape=(186, 31)


# Stack the DataFrames on top of each other
vertical_stack = pd.concat(list_of_playlists, axis=0)

log.info("## vertical_stack.head()=")
vertical_stack.head()
log.info("vertical_stack.shape=%s" % str(vertical_stack.shape))

2018-04-24 15:30:36,223 - INFO - ## vertical_stack.head()=

2018-04-24 15:30:36,266 - INFO - vertical_stack.shape=(864, 31)


df_playlist1=list_of_playlists[0]
df_playlist2=list_of_playlists[1]

merged_df = pd.merge(df_playlist1, df_playlist2, on=['Artist','Name'])

log.info("## merged_df.head()=")
merged_df.head()
log.info("merged_df.shape=%s" % str(merged_df.shape))

2018-04-24 15:30:39,713 - INFO - ## merged_df.head()=

2018-04-24 15:30:39,750 - INFO - merged_df.shape=(72, 60)


import functools
merged_df = functools.reduce(lambda left,right: pd.merge(left,right,on=['Artist','Name']), list_of_playlists)

log.info("## merged_df.head()=")
merged_df.head()
log.info("merged_df.shape=%s" % str(merged_df.shape))

2018-04-24 15:30:44,378 - INFO - ## merged_df.head()=

2018-04-24 15:30:44,417 - INFO - merged_df.shape=(72, 60)


df_playlist1=list_of_playlists[0]
df_playlist2=list_of_playlists[1]

merged_df = pd.merge(df_playlist1, 
                     df_playlist2, 
                     on=['Artist','Name'],
                     how='outer')

log.info("## merged_df.head()=")
merged_df.head()
log.info("merged_df.shape=%s" % str(merged_df.shape))

2018-04-24 15:30:48,407 - INFO - ## merged_df.head()=

2018-04-24 15:30:48,447 - INFO - merged_df.shape=(797, 60)


df_dupes1 = df_playlist1[df_playlist1.duplicated(['Artist','Name'], keep=False)]
log.info("df_dupes1.shape=%s" % str(df_dupes1.shape))

df_dupes1.sort_values(['Artist','Name']).head(10)

2018-04-24 15:30:52,357 - INFO - df_dupes1.shape=(76, 31)


df_playlist1_uniq=df_playlist1.drop_duplicates(subset=['Artist', 'Name'])

log.info("df_playlist1_uniq.shape=%s" % str(df_playlist1_uniq.shape))

2018-04-24 15:30:56,282 - INFO - df_playlist1_uniq.shape=(634, 31)


all_files = ['data/Top80_sPop&Rock.txt','data/TopAlt&Electronic.txt']
list_of_playlists= []
for file_ in all_files:
    doc = codecs.open(file_,'rU','UTF-16') 
    df = pd.read_csv(doc,index_col=None, header=0,sep='\t')
    
    df.drop_duplicates(subset=['Artist', 'Name'], inplace=True)
    log.info("## %s: df.shape=%s" % (file_, str(df.shape)))

    list_of_playlists.append(df)

inner_df = reduce(lambda left,right: pd.merge(left,right,on=['Artist','Name'], how='inner'), list_of_playlists)
merged_df = reduce(lambda left,right: pd.merge(left,right,on=['Artist','Name'], how='outer'), list_of_playlists)

log.info("inner_df.shape=%s" % str(inner_df.shape))
log.info("merged_df.shape=%s" % str(merged_df.shape))

df1_cnt=list_of_playlists[0].shape[0]
df2_cnt=list_of_playlists[1].shape[0]
df_inner_cnt=inner_df.shape[0]
df_union_cnt=df1_cnt+df2_cnt-df_inner_cnt
df_merged_cnt=merged_df.shape[0]

log.info("Rows in dataset1=%s" % df1_cnt)
log.info("Rows in dataset2=%s" % df2_cnt)
log.info("Rows in intersection=%s" % df_inner_cnt)
log.info("Rows in union=%s" % df_union_cnt)
log.info("(Rows in union==Rows in merged) = %s" % (df_union_cnt==df_merged_cnt))

2018-04-24 15:31:58,614 - INFO - ## data/Top80_sPop&Rock.txt: df.shape=(634, 31)
2018-04-24 15:31:58,625 - INFO - ## data/TopAlt&Electronic.txt: df.shape=(185, 31)
2018-04-24 15:31:58,650 - INFO - inner_df.shape=(67, 60)
2018-04-24 15:31:58,654 - INFO - merged_df.shape=(752, 60)
2018-04-24 15:31:58,655 - INFO - Rows in dataset1=634
2018-04-24 15:31:58,657 - INFO - Rows in dataset2=185
2018-04-24 15:31:58,658 - INFO - Rows in intersection=67
2018-04-24 15:31:58,661 - INFO - Rows in union=752
2018-04-24 15:31:58,664 - INFO - (Rows in union==Rows in merged) = True

	Name	Artist	Composer	Album	Grouping	Work	Movement Number	Movement Count	Movement Name	Genre	...	Volume Adjustment	Kind	Equalizer	Comments	Plays	Last Played	Skips	Last Skipped	My Rating	Location
0	Just The Way You Are	Billy Joel	Billy Joel	Greatest Hits Vol.I	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	154.0	4/7/2018 9:49 AM	2.0	12/27/2017 10:16 AM	100	C:\Users\Lee\itunes\Music\Billy Joel\Greatest ...
1	She's Got A Way	Billy Joel	Billy Joel	Greatest Hits Vol.II	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	144.0	3/25/2018 2:02 PM	3.0	1/20/2018 8:18 AM	100	C:\Users\Lee\itunes\Music\Billy Joel\Greatest ...
2	Thunderstruck	Brian Johnson	NaN	The Very Best of AC/DC	NaN	NaN	NaN	NaN	NaN	Hard Rock, 80s	...	NaN	MPEG audio file	NaN	NaN	96.0	2/7/2018 11:01 AM	7.0	1/8/2018 6:35 PM	100	C:\Users\Lee\itunes\Music\Compilations\The Ver...
3	Don't Stop Believin'	Journey	Jonathan Cain/Neal Schon/Steve Perry	Greatest Hits (Limited Gold Edition)	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	Encoded from Original CD album. Lame v3.98.2 ...	86.0	2/9/2018 7:07 AM	6.0	1/6/2018 12:45 PM	100	C:\Users\Lee\itunes\Music\Journey\Greatest Hit...
4	Fade To Grey	Visage	NaN	New Wave Hits Of The '80s, Vol. 3	NaN	NaN	NaN	NaN	NaN	Rock, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	62.0	4/9/2018 7:23 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Visage\New Wave Hits...

	Name	Artist	Composer	Album	Grouping	Work	Movement Number	Movement Count	Movement Name	Genre	...	Volume Adjustment	Kind	Equalizer	Comments	Plays	Last Played	Skips	Last Skipped	My Rating	Location
0	The Walker	Fitz & The Tantrums	NaN	More Than Just a Dream (Deluxe Version)	NaN	NaN	NaN	NaN	NaN	Alternative	...	NaN	MPEG audio file	NaN	NaN	176.0	12/5/2017 10:46 AM	10.0	1/8/2018 7:45 PM	100	C:\Users\Lee\itunes\Music\Fitz & The Tantrums\...
1	Jack The Ripper	Morrissey	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Alternative, Punk	...	NaN	MPEG audio file	NaN	NaN	436.0	3/12/2018 7:59 PM	13.0	9/28/2017 6:59 PM	100	C:\Users\Lee\itunes\Music\Morrissey\Unknown Al...
2	Heathens	Twenty One Pilots	NaN	Suicide Squad: The Album	NaN	NaN	NaN	NaN	NaN	Indie Rock, Alternative	...	NaN	MPEG audio file	NaN	NaN	176.0	11/21/2017 2:29 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Twenty One Pilots\Su...
3	Fade To Grey	Visage	NaN	New Wave Hits Of The '80s, Vol. 3	NaN	NaN	NaN	NaN	NaN	Rock, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	62.0	4/9/2018 7:23 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Visage\New Wave Hits...
4	Take A Chance On Me	ABBA	NaN	The Essential Collection (CD 1/2)	NaN	NaN	NaN	NaN	NaN	Disco/Dance/Electronica/Pop	...	NaN	MPEG audio file	NaN	NaN	96.0	11/21/2017 2:37 PM	23.0	10/30/2017 6:33 PM	80	C:\Users\Lee\itunes\Music\ABBA\The Essential C...

	Name	Artist	Composer	Album	Grouping	Work	Movement Number	Movement Count	Movement Name	Genre	...	Volume Adjustment	Kind	Equalizer	Comments	Plays	Last Played	Skips	Last Skipped	My Rating	Location
0	Just The Way You Are	Billy Joel	Billy Joel	Greatest Hits Vol.I	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	154.0	4/7/2018 9:49 AM	2.0	12/27/2017 10:16 AM	100	C:\Users\Lee\itunes\Music\Billy Joel\Greatest ...
1	She's Got A Way	Billy Joel	Billy Joel	Greatest Hits Vol.II	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	144.0	3/25/2018 2:02 PM	3.0	1/20/2018 8:18 AM	100	C:\Users\Lee\itunes\Music\Billy Joel\Greatest ...
2	Thunderstruck	Brian Johnson	NaN	The Very Best of AC/DC	NaN	NaN	NaN	NaN	NaN	Hard Rock, 80s	...	NaN	MPEG audio file	NaN	NaN	96.0	2/7/2018 11:01 AM	7.0	1/8/2018 6:35 PM	100	C:\Users\Lee\itunes\Music\Compilations\The Ver...
3	Don't Stop Believin'	Journey	Jonathan Cain/Neal Schon/Steve Perry	Greatest Hits (Limited Gold Edition)	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	Encoded from Original CD album. Lame v3.98.2 ...	86.0	2/9/2018 7:07 AM	6.0	1/6/2018 12:45 PM	100	C:\Users\Lee\itunes\Music\Journey\Greatest Hit...
4	Fade To Grey	Visage	NaN	New Wave Hits Of The '80s, Vol. 3	NaN	NaN	NaN	NaN	NaN	Rock, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	62.0	4/9/2018 7:23 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Visage\New Wave Hits...

	Name	Artist	Composer_x	Album_x	Grouping_x	Work_x	Movement Number_x	Movement Count_x	Movement Name_x	Genre_x	...	Volume Adjustment_y	Kind_y	Equalizer_y	Comments_y	Plays_y	Last Played_y	Skips_y	Last Skipped_y	My Rating_y	Location_y
0	Fade To Grey	Visage	NaN	New Wave Hits Of The '80s, Vol. 3	NaN	NaN	NaN	NaN	NaN	Rock, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	62.0	4/9/2018 7:23 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Visage\New Wave Hits...
1	I Could Be Happy	Altered Images	NaN	New Wave Hits Of The '80s, Vol. 04	NaN	NaN	NaN	NaN	NaN	Pop, New Wave, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	66.0	4/11/2018 5:44 PM	1.0	1/6/2018 8:21 PM	80	C:\Users\Lee\itunes\Music\Altered Images\New W...
2	Dance This Mess Around	The B-52's	Cindy Wilson/Fred Schneider/Kate Pierson/Keith...	B-52's	NaN	NaN	NaN	NaN	NaN	Alternative, Punk, 80s	...	NaN	MPEG audio file	NaN	Black1000	151.0	4/10/2018 3:09 PM	6.0	3/7/2018 11:58 AM	80	C:\Users\Lee\itunes\Music\The B-52's\B-52's\03...
3	Fight For Your Right	Beastie Boys	NaN	The Best 80s Songs	NaN	.	NaN	NaN	NaN	80s, Pop, Alternative	...	NaN	MPEG audio file	NaN	NaN	53.0	4/9/2018 8:18 PM	1.0	1/2/2018 1:37 AM	80	C:\Users\Lee\itunes\Music\Compilations\The Bes...
4	Just What I Needed	The Cars	Ric Ocasek	Complete Greatest Hits	NaN	NaN	NaN	NaN	NaN	Pop, Electronic, 80s	...	NaN	MPEG audio file	NaN	NaN	112.0	1/8/2018 12:44 PM	4.0	1/6/2018 10:59 PM	80	C:\Users\Lee\itunes\Music\The Cars\Complete Gr...

	Name	Artist	Composer_x	Album_x	Grouping_x	Work_x	Movement Number_x	Movement Count_x	Movement Name_x	Genre_x	...	Volume Adjustment_y	Kind_y	Equalizer_y	Comments_y	Plays_y	Last Played_y	Skips_y	Last Skipped_y	My Rating_y	Location_y
0	Fade To Grey	Visage	NaN	New Wave Hits Of The '80s, Vol. 3	NaN	NaN	NaN	NaN	NaN	Rock, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	62.0	4/9/2018 7:23 PM	NaN	NaN	100	C:\Users\Lee\itunes\Music\Visage\New Wave Hits...
1	I Could Be Happy	Altered Images	NaN	New Wave Hits Of The '80s, Vol. 04	NaN	NaN	NaN	NaN	NaN	Pop, New Wave, 80s, Alternative	...	NaN	MPEG audio file	NaN	NaN	66.0	4/11/2018 5:44 PM	1.0	1/6/2018 8:21 PM	80	C:\Users\Lee\itunes\Music\Altered Images\New W...
2	Dance This Mess Around	The B-52's	Cindy Wilson/Fred Schneider/Kate Pierson/Keith...	B-52's	NaN	NaN	NaN	NaN	NaN	Alternative, Punk, 80s	...	NaN	MPEG audio file	NaN	Black1000	151.0	4/10/2018 3:09 PM	6.0	3/7/2018 11:58 AM	80	C:\Users\Lee\itunes\Music\The B-52's\B-52's\03...
3	Fight For Your Right	Beastie Boys	NaN	The Best 80s Songs	NaN	.	NaN	NaN	NaN	80s, Pop, Alternative	...	NaN	MPEG audio file	NaN	NaN	53.0	4/9/2018 8:18 PM	1.0	1/2/2018 1:37 AM	80	C:\Users\Lee\itunes\Music\Compilations\The Bes...
4	Just What I Needed	The Cars	Ric Ocasek	Complete Greatest Hits	NaN	NaN	NaN	NaN	NaN	Pop, Electronic, 80s	...	NaN	MPEG audio file	NaN	NaN	112.0	1/8/2018 12:44 PM	4.0	1/6/2018 10:59 PM	80	C:\Users\Lee\itunes\Music\The Cars\Complete Gr...

Merging Itunes Playlists using Pandas

Setup notebook output options¶

Import standard libraries¶

Import Itunes Playlist Data¶

Vertically Stack the Playlist Data¶

Merge into one dataframe based on composite keys.¶

Inner, Left, and right merge types¶

Reduce into one dataframe based on the composite key¶

Example of outer merge / full outer join¶

Enhance the dataset load to exclude duplicates¶

Comments

	Name	Artist	Composer	Album	Grouping	Work	Movement Number	Movement Count	Movement Name	Genre	...	Volume Adjustment	Kind	Equalizer	Comments	Plays	Last Played	Skips	Last Skipped	My Rating	Location
122	Halloween	Dead Kennedys	NaN	Just Can't Get Enough: New Wa	NaN	NaN	NaN	NaN	NaN	Alternative, 80s	...	NaN	MPEG audio file	NaN	NaN	NaN	NaN	4.0	3/19/2018 8:46 AM	60	C:\Users\Lee\itunes\Music\Dead Kennedys\Just C...
310	Halloween	Dead Kennedys	D.H. Peligro/Dead Kennedys/East Bay Ray/Jello ...	Milking The Sacred Cow	NaN	NaN	NaN	NaN	NaN	Punk Rock, 80s	...	NaN	MPEG audio file	NaN	NaN	NaN	NaN	1.0	1/11/2018 11:59 PM	40	C:\Users\Lee\itunes\Music\Dead Kennedys\Milkin...
20	Enjoy The Silence	Depeche Mode	NaN	Best Of Depeche Mode, Vol. 1	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	Track 4	40.0	4/10/2018 7:19 PM	1.0	1/14/2018 10:37 AM	80	C:\Users\Lee\itunes\Music\Depeche Mode\Best Of...
127	Enjoy The Silence	Depeche Mode	Martin L. Gore	Violator	NaN	NaN	NaN	NaN	NaN	Pop, 80s, Alternative	...	NaN	MPEG audio file	NaN	Sire / Reprise 9 26081-2	27.0	4/9/2018 7:49 PM	2.0	2/23/2018 1:33 PM	60	C:\Users\Lee\itunes\Music\Depeche Mode\Violato...
29	The Final Countdown	Europe	NaN	101 Sporting Anthems	NaN	NaN	NaN	NaN	NaN	Hard Rock, 80s	...	NaN	MPEG audio file	NaN	NaN	86.0	4/29/2016 3:29 PM	5.0	9/28/2015 7:58 PM	80	C:\Users\Lee\itunes\Music\Various Artists\101 ...
358	The Final Countdown	Europe	NaN	The Best 80s Songs	NaN	.	NaN	NaN	NaN	80s, Pop	...	NaN	MPEG audio file	NaN	NaN	3.0	1/2/2018 5:47 PM	2.0	1/6/2018 9:49 PM	40	C:\Users\Lee\itunes\Music\Compilations\The Bes...
36	Down Under	Men At Work	NaN	The Best 80s Songs	NaN	.	NaN	NaN	NaN	80s, Pop	...	NaN	MPEG audio file	NaN	NaN	2.0	1/2/2018 7:24 PM	1.0	1/2/2018 1:44 AM	80	C:\Users\Lee\itunes\Music\Compilations\The Bes...
37	Down Under	Men At Work	Colin Hay, Roy Strykert	Like, Omigod! The 80s Pop Culture Box (Totally...	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	49.0	4/11/2018 6:47 PM	3.0	1/17/2018 6:53 PM	80	C:\Users\Lee\itunes\Music\Men At Work\Like, Om...
45	Cum On Feel The Noize	Quiet Riot	NaN	101 Sporting Anthems	NaN	NaN	NaN	NaN	NaN	Hard Rock, 80s	...	NaN	MPEG audio file	NaN	NaN	79.0	4/29/2016 4:44 PM	5.0	1/8/2018 3:01 PM	80	C:\Users\Lee\itunes\Music\Various Artists\101 ...
485	Cum On Feel The Noize	Quiet Riot	Noddy Holder, Jim Lea	Like, Omigod! The 80s Pop Culture Box (Totally...	NaN	NaN	NaN	NaN	NaN	Pop, 80s	...	NaN	MPEG audio file	NaN	NaN	3.0	3/30/2018 10:27 AM	2.0	1/9/2018 6:04 PM	40	C:\Users\Lee\itunes\Music\Compilations\Like, O...