254 lines
10 KiB
Python
254 lines
10 KiB
Python
"""Jobs a ejecutar con django-rq
|
|
|
|
NO SE DONDE DOCUMENTAR ESTO
|
|
La estructura que se utilizara para las keys es la siguiente
|
|
|
|
artist:{mbid} => full json
|
|
artist:{mbid}:release_groups => set of release_group_mbid
|
|
artist:{mbid}:release_groups:count => cantidad de las release groups
|
|
|
|
release_group:{mbid} => full json
|
|
release_group:{mbid}:artist => artist_mbid
|
|
release_group:{mbid}:releases => set of release_mbid
|
|
release_group:{mbid}:cover_art => json with cover arts
|
|
|
|
release:{mbid} => full json
|
|
release:{mbid}:release_group => release_group_mbid
|
|
release:{mbid:}:media => set of media_json
|
|
release:{mbid}:{media}:recordings => set of recording_mbid
|
|
release:{mbid}:cover_art => json with cover arts
|
|
|
|
recording:{mbid} => full json
|
|
recording:{mbid}:release => release_mbid
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
|
|
import django_rq
|
|
|
|
from fetcher import musicbrainz as mb
|
|
from utils import get_redis_connection, parse_date
|
|
|
|
_log = logging.getLogger('fetcher_jobs')
|
|
_log.addHandler(logging.NullHandler())
|
|
|
|
|
|
@django_rq.job('high')
|
|
def load_release_cover_art(release):
|
|
"""Carga en cache el cover art de una release"""
|
|
mbid = release
|
|
if isinstance(release, dict):
|
|
mbid = release.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
if f'release:{mbid}:cover_art' not in redis:
|
|
cover_art = mb.get_release_cover_art(mbid)
|
|
redis.set(f'release:{mbid}:cover_art', json.dumps(cover_art))
|
|
_log.info('Cover art de release %s almacenado en cache', mbid)
|
|
|
|
|
|
@django_rq.job('high')
|
|
def load_release_group_cover_art(release_group):
|
|
"""Carga en cache el cover art de un release group"""
|
|
mbid = release_group
|
|
if isinstance(release_group, dict):
|
|
mbid = release_group.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
if f'release_group:{mbid}:cover_art' not in redis:
|
|
cover_art = mb.get_release_group_cover_art(mbid)
|
|
redis.set(f'release_group:{mbid}:cover_art', json.dumps(cover_art))
|
|
_log.info('Cover art de release group %s almacenado en cache', mbid)
|
|
|
|
|
|
@django_rq.job
|
|
def load_entities_of_recording(recording):
|
|
"""Carga en cache una recording y sus releases
|
|
|
|
Este difiere del resto e intenta obtener su padre, release, porque cargar recording como tal
|
|
no da tanto valor como hacer lo contrario, se aprovecha mejor las requests usadas
|
|
"""
|
|
mbid = recording
|
|
if isinstance(recording, dict):
|
|
mbid = recording.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
if f'recording:{mbid}' in redis:
|
|
# La única forma de agregar en cache una recording es a través de su release
|
|
# por lo que si ya esta guardada, su release lo estará
|
|
return
|
|
|
|
offset = 0
|
|
while True:
|
|
releases = mb.browse_releases({'recording': mbid},
|
|
includes=['recordings', 'artist-credits'],
|
|
limit=100, offset=offset)
|
|
|
|
# lo unico que sirve hacer aqui es almacenar en cuantas releases se encuentra la
|
|
# grabacion
|
|
redis.set(f'recording:{mbid}:release:count', releases.get('release_count'))
|
|
for release in releases.get('releases'):
|
|
load_entities_of_release.delay(release.get('id'))
|
|
|
|
offset += 100
|
|
if offset > releases.get('release_count'):
|
|
break
|
|
|
|
|
|
@django_rq.job
|
|
def load_entities_of_release(release):
|
|
"""Carga en cache una release y sus entidades relacionadas"""
|
|
mbid = release
|
|
if isinstance(release, dict):
|
|
mbid = release.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
# Cargar release solo si no esta almacenado
|
|
if f'release:{mbid}' not in redis:
|
|
if isinstance(release, str):
|
|
release = mb.get_release_by_mbid(mbid, ['recordings', 'artist-credits'])
|
|
redis.set(f'release:{mbid}', json.dumps(release))
|
|
_log.info('Release %s fue almacenada en cache', mbid)
|
|
else:
|
|
release = json.loads(redis.get(f'release:{mbid}'))
|
|
|
|
# Envía a cargar sus cover art
|
|
load_release_cover_art.delay(release.get('id'))
|
|
|
|
# Almacenar el artista de la release
|
|
if len(release.get('artist_credit')) > 0:
|
|
artist = release.get('artist_credit')[0].get('artist').get('id')
|
|
redis.set(f'release:{mbid}:artist', artist)
|
|
|
|
# Obtener sus medias
|
|
# Una release puede tener mas de un disco o dvds incluidos, entre otras cosas
|
|
# Dentro de estas existen tracks, las cuales no son accesibles de ninguna otra forma, y
|
|
# dentro de una tag esta definido el recording, es un endredo quizás innecesario asi que mas
|
|
# o menos me lo voy a saltar, lo único que interesa de la track es su orden dentro de su
|
|
# media
|
|
medias = release.get('media', [])
|
|
redis.set(f'release:{mbid}:media:count', len(medias))
|
|
for raw_media in medias:
|
|
media = {
|
|
'format': raw_media.get('format'),
|
|
'position': raw_media.get('position'),
|
|
'track_count': raw_media.get('track_count')
|
|
}
|
|
redis.zadd(f'release:{mbid}:media', {json.dumps(media): media['position']})
|
|
for track in raw_media.get('tracks', []):
|
|
recording_key = f'release:{mbid}:media:{media.get("position")}:recordings'
|
|
|
|
recording = track.get('recording')
|
|
recording_id = recording.get('id')
|
|
recording_artist = redis.get(f'release:{mbid}:artist')
|
|
|
|
redis.set(f'{recording_key}:count', media.get('track_count'))
|
|
redis.zadd(recording_key, {recording_id: track.get("position")})
|
|
redis.set(f'recording:{recording_id}', json.dumps(recording))
|
|
redis.set(f'recording:{recording_id}:artist', recording_artist)
|
|
|
|
release_date = parse_date(release.get('release_events', [{}])[0].get('date'))
|
|
redis.zadd(f'recording:{recording_id}:release', {mbid: release_date.timestamp()})
|
|
|
|
|
|
@django_rq.job
|
|
def load_entities_of_release_group(release_group):
|
|
"""Carga en cache un release group y sus entidades relacionadas"""
|
|
mbid = release_group
|
|
if isinstance(release_group, dict):
|
|
mbid = release_group.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
# Cargar release_group solo si no esta almacenado
|
|
if f'release_group:{mbid}' not in redis:
|
|
if isinstance(release_group, str):
|
|
release_group = mb.get_release_group_by_mbid(mbid, includes=['artists'])
|
|
redis.set(f'release_group:{mbid}', json.dumps(release_group))
|
|
_log.info('Release Group %s almacenado en cache', mbid)
|
|
|
|
# Envía a cargar sus cover art
|
|
load_release_group_cover_art.delay(release_group)
|
|
|
|
# Carga sus releases
|
|
offset = 0
|
|
while True:
|
|
# Cargo todas las releases posibles la primera vez, si no lo hiciera y necesitara pedir
|
|
# mas datos, usuaria una request en vano
|
|
releases = mb.browse_releases({'release-group': mbid},
|
|
includes=['recordings', 'artist-credits'],
|
|
limit=100, offset=offset)
|
|
count = releases.get('release_count')
|
|
if f'release_group:{mbid}:releases' in redis:
|
|
# Si es que la cantidad de releases almacenadas es la misma que la que dice
|
|
# musicbrainz que existen, entonces no tengo porque continuar cargando
|
|
if redis.zcard(f'release_group:{mbid}:releases') == count:
|
|
break
|
|
|
|
# Almaceno el count que dice musicbrainz para asegurar que estén todos los
|
|
# elementos cargados
|
|
redis.set(f'release_group:{mbid}:releases:count', count)
|
|
|
|
for rel in releases.get('releases'):
|
|
rel_mbid = rel.get('id')
|
|
rel_release = parse_date(rel.get('release_events')[0].get('date')).timestamp()
|
|
|
|
# Se almacena el id del release en un set ordenado por fecha de lanzamiento
|
|
redis.zadd(f'release_group:{mbid}:releases', {rel_mbid: rel_release})
|
|
redis.set(f'release:{rel_mbid}:release_group', mbid)
|
|
|
|
load_entities_of_release.delay(rel)
|
|
|
|
offset += 100
|
|
if offset > releases.get('release_count', 0):
|
|
break
|
|
|
|
|
|
@django_rq.job
|
|
def load_artist_on_cache(artist):
|
|
"""Carga en cache a un artista y todas sus entidades"""
|
|
mbid = artist
|
|
if isinstance(artist, dict):
|
|
mbid = artist.get('id')
|
|
|
|
with get_redis_connection() as redis:
|
|
# Cargar artista solo si no estaba almacenado
|
|
if f'artist:{mbid}' not in redis:
|
|
if isinstance(artist, str):
|
|
artist = mb.get_artist_by_mbid(mbid, includes=['tags'])
|
|
redis.set(f'artist:{mbid}', json.dumps(artist))
|
|
_log.info('Artista %s almacenado en cache', mbid)
|
|
|
|
# Cargar sus release_groups
|
|
offset = 0
|
|
while True:
|
|
# Cargo todas las releases posibles la primera vez, si no lo hiciera y necesitara pedir
|
|
# mas datos, usuaria una request en vano
|
|
release_groups = mb.browse_release_groups({'artist': mbid},
|
|
includes=['artist-credits'],
|
|
limit=100, offset=offset)
|
|
count = release_groups.get('release_group_count')
|
|
if f'artist:{mbid}:release_groups' in redis:
|
|
# Si es que la cantidad de release_groups almacenadas es la misma que la que dice
|
|
# musicbrainz que existen, entonces no tengo porque continuar cargando
|
|
if redis.zcard(f'artist:{mbid}:release_groups') == count:
|
|
break
|
|
|
|
# Almaceno el count que dice musicbrainz para asegurar que estén todos los
|
|
# elementos cargado
|
|
redis.set(f'artist:{mbid}:release_groups:count', count)
|
|
|
|
for rel in release_groups.get('release_groups'):
|
|
rel_mbid = rel.get('id')
|
|
rel_date = parse_date(rel.get('first_release_date')).timestamp()
|
|
|
|
# Se almacena el id del release group en un set ordenado por su fecha de lanzamiento
|
|
redis.zadd(f'artist:{mbid}:release_groups', {rel_mbid: rel_date})
|
|
redis.set(f'release_group:{rel_mbid}:artist', mbid)
|
|
|
|
load_entities_of_release_group.delay(rel)
|
|
|
|
offset += 100
|
|
if offset > release_groups.get('release_group_count', 0):
|
|
break
|