In [8]: key_counts = collections.Counter()
   ...: for d in os.listdir("."):
   ...:     print(f" # {d}")
   ...:     for f in os.listdir(d):
   ...:         print(f"  - {f}")
   ...:         listens = open(os.path.join(d, f)).read().splitlines()
   ...:         for l in listens:
   ...:             lj = ujson.loads(l)
   ...:             for k in lj.get("track_metadata", {}).get("additional_info", {}).keys():
   ...:                 key_counts[k] += 1
   ...:
In [31]:
import collections
import os
import concurrent.futures
import dataclasses
import pickle

from tabulate import tabulate
import ujson
from IPython.display import HTML, display
In [2]:
@dataclasses.dataclass
class Results:
    # the year that this data was from
    year: int
    # Number of submissions per submitter client
    listening_count: collections.Counter
    # Counts of tags per submission tool
    tags_per_tool: collections.defaultdict(collections.Counter)
    # counts of all tools
    counts_all: collections.Counter
In [15]:
def process_month(path) -> Results:
    counts_all = collections.Counter()
    listening_count = collections.Counter()
    tags_per_tool = collections.defaultdict(collections.Counter)
    for l in open(path):
        lj = ujson.loads(l)
        additional = lj.get("track_metadata", {}).get("additional_info", {})
        listening_from = additional.get("listening_from")
        listening_count[listening_from] += 1
        for k in additional.keys():
            tags_per_tool[listening_from][k] += 1
            counts_all[k] += 1
    return Results(year=0, listening_count=listening_count, tags_per_tool=tags_per_tool,
            counts_all=counts_all)

def process_year(dirname: str, year: int):
    listening_count = collections.Counter()
    tags_per_tool = collections.defaultdict(collections.Counter)
    counts_all = collections.Counter()
    with concurrent.futures.ProcessPoolExecutor(max_workers=12) as executor:
        # Start the load operations and mark each future with its URL
        files = os.listdir(dirname)
        # print("files", files)
        future_to_month = {executor.submit(process_month, os.path.join(dirname, f)): f for f in files}
        for future in concurrent.futures.as_completed(future_to_month):
            file = future_to_month[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%s generated an exception: %s' % (file, exc))
            else:
                listening_count.update(data.listening_count)
                counts_all.update(data.counts_all)
                for k, v in data.tags_per_tool.items():
                    tags_per_tool[k].update(v)
    return Results(year=year, listening_count=listening_count, tags_per_tool=tags_per_tool,
            counts_all=counts_all)
In [4]:
root = "/media/data/listenbrainz-listens-dump-561-20210915-040004-full/listens/"
#path = "2021/4.listens"
#r = process_month(os.path.join(root, path))
In [16]:
results = {}
for year in range(2005, 2022):
    print("Year: {}".format(year))
    year_result = process_year(os.path.join(root, str(year)), year)
    results[year] = year_result
    pickle.dump(year_result, open("{}.pickle".format(year), "wb"))
Year: 2005
Year: 2006
Year: 2007
Year: 2008
Year: 2009
Year: 2010
Year: 2011
Year: 2012
Year: 2013
Year: 2014
Year: 2015
Year: 2016
Year: 2017
Year: 2018
Year: 2019
Year: 2020
Year: 2021
In [22]:
# Collate together counts for all years:
listening_count = collections.Counter()
tags_per_tool = collections.defaultdict(collections.Counter)
counts_all = collections.Counter()

for year, data in results.items():
    listening_count.update(data.listening_count)
    counts_all.update(data.counts_all)
    for k, v in data.tags_per_tool.items():
        tags_per_tool[k].update(v)

        
# Some tags are added by LB, so remove them. Also remove listening_from
for k in list(counts_all.keys()):
    if k.endswith("_msid"):
        del counts_all[k]
if "listening_from" in counts_all:
    del counts_all["listening_from"]
for tool, data in tags_per_tool.items():
    for k in list(data.keys()):
        if k.endswith("_msid"):
            del data[k]
    if "listening_from" in data:
        del data["listening_from"]
In [38]:
ordered_submitters = []
for year, data in results.items():
    counts = data.counts_all
    for client, clcount in counts.most_common():
        if client not in ordered_submitters:
            ordered_submitters.append(client)

table_counts_per_year = []
for year, data in results.items():
    this_year = [str(year)]
    counts = data.counts_all
    for cl in ordered_submitters:
        if cl in counts:
            this_year.append(f"{counts[cl]:,}")
        else:
            this_year.append("")
    table_counts_per_year.append(this_year)
    
display(HTML(tabulate(table_counts_per_year, headers=["Year"] + ordered_submitters, tablefmt='html')))
Yearartist_msid release_msid recording_msid listening_from lastfm_artist_mbid lastfm_track_mbid recording_mbid lastfm_release_mbid artist_mbids release_mbid dedup_tag tags source track_length track_number date totaltracks discnumber totaldiscs albumartist length tracknumber isrc spotify_id duration_ms artist_names spotify_album_id spotify_artist_ids release_artist_names spotify_album_artist_ids release_artist_name release_group_mbid track_mbid work_mbids choosen_by_user duration disc_number artists_mbids album title artist genre comment discid release_artist_name.Songs release_artist_name.Psych albumartist.Moonfog Records Presents release_artist_name.Metalocalypse release_artist_name.Original Broadway Cast of If/Then albumartist.Jessye Norman; Pierre Boulez albumartist.Format albumartist.Akatsuki Kirika (CV albumartist.Kashii Airi (CV albumartist.Maria Cadenzavna Eve (CV rmq release_artist_name.Tom Morello albumartist.Stieber Twins Mit release_artist_name.Ashley Henry, The RE release_artist_name.Original Broadway Cast of Chicago albumartist.Tachibana Makoto (CV release_artist_name.Jedi Mind Tricks Presents artist_mbid rating k albumartist.Annie Hall release_artist_name.Caissie Levy, John Riddle, Male Ensemble - Frozen release_artist_name.Caissie Levy, Ensemble - Frozen release_artist_name.Transformers artists_mbid origin_url release_artist artists albumartists musicbrainz_albumartistid bpm asin disc year discc label media track script trackc encoder composer grouping language encodedby publisher artistsort acoustid_id compilation description album artist discsubtitle originaldate artist_credit catalognumber releasecountry albumartistsort albumartist_credit musicbrainz_albumid musicbrainz_trackid acoustid_fingerprint musicbrainz_artistid musicbrainz_albumtype musicbrainz_albumstatus musicbrainz_albumcomment musicbrainz_releasegroupid arranger lyricist composersort musicbrainz_releasetrackid brainzplayer_metadata.track_name brainzplayer_metadata.artist_name brainzplayer_metadata.release_name
20051,357,328 1,357,328 1,357,328 1,357,327 1,074,810 945,080 213,391 42,649 1 1
200610,559,436 6,575,569 10,559,436 2,686,669 2,086,049 1,497,970 6,785,486 1,279,409 1 1 2,264
200716,502,256 11,140,437 16,502,256 4,469,597 3,456,367 2,323,716 10,721,723 2,397,715 8,367 638
200820,335,088 16,062,698 20,335,088 5,732,254 4,444,923 3,171,815 13,086,704 3,358,831 52,227
200924,357,961 23,534,818 24,357,961 6,609,720 5,070,887 3,641,689 15,735,178 4,374,935 79,049
201026,645,128 25,745,882 26,645,128 7,069,469 5,447,888 4,012,349 17,015,424 4,717,654 70,881
201128,094,572 26,772,283 28,094,572 7,252,396 5,574,538 4,038,511 17,837,779 4,810,559 67,098 3,271
201229,024,620 27,350,971 29,024,620 7,708,597 5,968,536 4,139,290 18,222,531 5,100,336 101,559 3,945
201330,496,502 28,720,387 30,496,502 8,006,769 6,133,576 4,318,277 18,949,648 5,264,513 142,822 4,030
201433,025,610 31,101,125 33,025,610 8,667,319 6,592,160 4,505,730 20,134,833 5,575,389 133,831 7,187 1 1 1
201533,855,327 31,900,043 33,855,327 8,745,696 6,655,277 4,591,890 19,025,035 5,640,903 6 3 133,824 7,025 174 174 25 25
201635,394,679 33,787,674 35,394,679 8,838,030 6,744,901 4,577,190 18,435,686 5,669,271 388 14 179,423 5,114 778 726 519 464
201740,676,222 38,965,537 40,676,222 10,915,102 8,434,435 5,627,154 19,676,126 6,964,929 19,509 19,443 153,656 23,8438,731 7,839 8,700 33,740 26,483 18,910 18,431 19,898 751
201842,166,544 40,563,235 42,166,544 13,062,411 9,890,380 6,577,629 18,931,299 8,245,502 127,686 104,845 568,262 83,04044,816 44,816 44,350 172,116144,448 185,474 98,702 110,919 158,225 110,395 93,434 84,582 84,582 84,582 84,582 84,582 84,582 84,558 74,948 70,145 10,552 311 144 104 52 22 22 22 20 19 13 10 4 4 3 3 3 2 1 1 1 1 1 1 1 1 1 1
201938,081,658 37,083,543 38,081,658 19,352,708 12,650,603 8,286,958 13,684,882 10,564,518 181,159 198,996 366,171 83,243159,246 148,819 138,996 168,346135,866 2,633,408 111,549 120,066 2,748,392 2,559,5502,529,656 2,519,603 2,519,603 2,519,603 2,519,521 2,519,603 2,519,603 2,528,391 123,298 111,125 8,940 6,512 5,641 1 1 1 11,156 1,294 848 1 1 1 1 1
202037,689,809 37,273,570 37,689,809 28,432,647 15,164,526 11,737,379 4,416,725 12,738,983 449,428 637,695 102,483 41,258618,849 519,558 512,503 307,677236,259 7,177,239 172,888 220,015 7,932,345 7,059,6187,043,245 6,989,826 6,989,302 6,989,302 6,423,039 6,989,302 6,989,302 7,014,216 187,143 172,732 6,317 16,283 15,972 120,173 294,112 12,378 1,466,697 9
202123,058,300 23,058,300 23,058,300 16,556,956 3,938,867 3,829,334 1,089,761 3,317,804 631,463 806,481 24,368432,235 462,684 431,473 362,604269,974 9,658,613 206,924 268,812 10,459,646 9,486,2219,464,902 9,396,195 9,396,052 9,396,052 9,396,052 9,396,052 9,396,052 9,441,763 190,996 323,147 36,201 6,463 10,744 255 255 286 4,184 1,488 106,283 415,683 19,576 1,826,893 4,281 1,848 309 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 176 176 176 176 4 1 1
In [40]:
# Tags per client
for client, tags in tags_per_tool.items():
    table = []
    for tag, count in tags.most_common():
        table.append([tag, f"{count:,}", round(count*100.0/listening_count[client], 2)])
    display(HTML(f"<h2>{client}</h2>"))
    display(HTML(f"<p>total submissions: {listening_count[client]:,}</p>"))
    display(HTML(tabulate(table, headers=["Tag", "count", "proportion"], tablefmt='html')))

lastfm

total submissions: 144,387,476

Tag count proportion
lastfm_artist_mbid 109,328,723 75.72
lastfm_release_mbid 90,063,900 62.38
lastfm_track_mbid 77,821,961 53.9
recording_mbid 32,839,109 22.74
dedup_tag 280,296 0.19
discnumber 56 0
duration_ms 56 0
tracknumber 56 0
artist_names 56 0
spotify_album_id 56 0
spotify_artist_ids 56 0
release_artist_name 56 0
release_artist_names 56 0
spotify_album_artist_ids56 0
isrc 49 0
spotify_id 49 0

None

total submissions: 305,857,373

Tag count proportion
recording_mbid 200,690,284 65.62
origin_url 3,293,586 1.08
dedup_tag 1,854,024 0.61
tracknumber 1,671,612 0.55
release_mbid 1,297,832 0.42
source 1,263,550 0.41
track_length 1,183,717 0.39
track_number 1,136,023 0.37
artist_mbids 940,563 0.31
date 774,851 0.25
rating 711,089 0.23
totaltracks 619,967 0.2
albumartist 538,687 0.18
track_mbid 518,568 0.17
discnumber 482,414 0.16
totaldiscs 459,500 0.15
release_group_mbid 458,850 0.15
tags 286,957 0.09
artist_mbid 237,612 0.08
isrc 155,272 0.05
spotify_id 141,705 0.05
release_artist_name 76,686 0.03
work_mbids 34,796 0.01
k 32,802 0.01
duration 32,501 0.01
choosen_by_user 29,569 0.01
artists 4,281 0
genre 4,204 0
albumartists 1,848 0
comment 1,507 0
length 751 0
duration_ms 667 0
musicbrainz_albumartistid 309 0
artist 308 0
album 277 0
title 277 0
bpm 255 0
asin 255 0
disc 255 0
year 255 0
discc 255 0
label 255 0
media 255 0
track 255 0
script 255 0
trackc 255 0
encoder 255 0
composer 255 0
grouping 255 0
language 255 0
encodedby 255 0
publisher 255 0
artistsort 255 0
acoustid_id 255 0
compilation 255 0
description 255 0
album artist 255 0
discsubtitle 255 0
originaldate 255 0
artist_credit 255 0
catalognumber 255 0
releasecountry 255 0
albumartistsort 255 0
albumartist_credit 255 0
musicbrainz_albumid 255 0
musicbrainz_trackid 255 0
acoustid_fingerprint 255 0
musicbrainz_artistid 255 0
musicbrainz_albumtype 255 0
musicbrainz_albumstatus 255 0
musicbrainz_albumcomment 255 0
musicbrainz_releasegroupid 255 0
arranger 176 0
lyricist 176 0
composersort 176 0
musicbrainz_releasetrackid 176 0
artists_mbids 52 0
discid 13 0
release_artist 9 0
albumartist.Moonfog Records Presents 4 0
albumartist.Jessye Norman; Pierre Boulez3 0
albumartist.Format 2 0
rmq 2 0
albumartist.Akatsuki Kirika (CV 1 0
albumartist.Kashii Airi (CV 1 0
albumartist.Maria Cadenzavna Eve (CV 1 0
albumartist.Stieber Twins Mit 1 0
albumartist.Tachibana Makoto (CV 1 0
albumartist.Annie Hall 1 0
artists_mbid 1 0

MusicBee

total submissions: 721,356

Tag count proportion
dedup_tag 35 0
release_mbid 9 0
recording_mbid 9 0
release_group_mbid 2 0
track_mbid 1 0

spotify

total submissions: 19,040,896

Tag count proportion
spotify_id 18,989,483 99.73
duration_ms 18,989,483 99.73
tracknumber 18,989,483 99.73
artist_names 18,989,483 99.73
spotify_album_id 18,989,483 99.73
release_artist_names 18,989,483 99.73
spotify_album_artist_ids 18,989,483 99.73
release_artist_name 18,989,454 99.73
discnumber 18,989,379 99.73
isrc 18,984,003 99.7
spotify_artist_ids 18,423,138 96.76
dedup_tag 27,528 0.14
disc_number 104 0
recording_mbid 28 0
release_artist_name.Songs 11 0
release_artist_name.Psych 4 0
release_artist_name.Metalocalypse 3 0
release_artist_name.Original Broadway Cast of If/Then 3 0
release_artist_name.Original Broadway Cast of Chicago 2 0
tags 1 0
artist_mbids 1 0
release_mbid 1 0
release_artist_name.Tom Morello 1 0
release_artist_name.Ashley Henry, The RE 1 0
release_artist_name.Jedi Mind Tricks Presents 1 0
release_artist_name.Caissie Levy, John Riddle, Male Ensemble - Frozen1 0
release_artist_name.Caissie Levy, Ensemble - Frozen 1 0
release_artist_name.Transformers 1 0

Plex

total submissions: 627,351

Tag count proportion
dedup_tag 4 0

Lollypop

total submissions: 295,073

Tag count proportion
tracknumber 294,734 99.89
artist_mbids 269,793 91.43
release_mbid 267,969 90.81
recording_mbid267,853 90.78
dedup_tag 30 0.01

foo_listenbrainz2 1.2.0dev1

total submissions: 3

Tag count proportion
tracknumber 3 100
isrc 2 66.67

cmus

total submissions: 2,785

Tag count proportion
tracknumber 2,748 98.67
release_artist_name2,732 98.1
recording_mbid 2,719 97.63
artist_mbids 2,144 76.98
release_mbid 2,141 76.88
release_group_mbid 2,140 76.84
isrc 963 34.58
track_mbid 2 0.07

foobar2000

total submissions: 295,347

Tag count proportion
tracknumber 285,745 96.75
date 270,584 91.62
albumartist 201,023 68.06
totaltracks 193,963 65.67
discnumber 170,136 57.61
totaldiscs 149,483 50.61
release_mbid 121,270 41.06
artist_mbids 118,719 40.2
release_group_mbid114,732 38.85
track_mbid 112,543 38.11
recording_mbid 99,859 33.81
isrc 75,493 25.56
work_mbids 26,553 8.99

Rhythmbox

total submissions: 4,810

Tag count proportion
tracknumber 4,810 100
artist_mbids 3,236 67.28
release_mbid 3,235 67.26
recording_mbid3,235 67.26
source 324 6.74

Musium

total submissions: 931

Tag count proportion
tracknumber 931 100

foo_listenbrainz2 1.0.1

total submissions: 52

Tag count proportion
tracknumber 52 100
isrc 2 3.85

Funkwhale

total submissions: 32,203

Tag count proportion
discnumber 32,203 100
tracknumber 32,203 100
artist_mbids 28,367 88.09
release_mbid 28,171 87.48
recording_mbid27,976 86.87

manual

total submissions: 4

Tag count proportion
tags 4 100
artist_mbids 4 100
recording_mbid 4 100
release_mbid 2 50

jellyfin

total submissions: 23,156

Tag count proportion
release_mbid 21,750 93.93
artist_mbids 21,725 93.82
track_mbid 21,660 93.54
recording_mbid21,318 92.06

LMS

total submissions: 16,383

Tag count proportion
tracknumber 16,231 99.07
release_mbid 9,262 56.53
artist_mbids 9,252 56.47
track_mbid 9,199 56.15
recording_mbid9,156 55.89

Lûd

total submissions: 15,176

Tag count proportion
track_mbid 15,176 100
artist_mbids15,176 100
release_mbid15,176 100

Plex Media Server

total submissions: 661

Tag count proportion
work_mbids 661 100
artist_mbids 661 100
release_mbid 661 100
recording_mbid 661 100
release_group_mbid 661 100

listenbrainz

total submissions: 4

Tag count proportion
source 4 100
origin_url 4 100
brainzplayer_metadata.track_name 4 100
brainzplayer_metadata.artist_name 1 25
brainzplayer_metadata.release_name 1 25
In [ ]: