Continuando con el almacenamiento de datos en cache
Va a mejor la cosa, tengo que hacer tests si o tambien porque no estoy nada seguro si es que todo funciona como espero ya que toda llamada a la api corresponde a una llamada a cache y descubrir si es que esta todo en cache como se espera Voy a terminar del modo que lo estoy haciendo y tal vez cambio esto a un modulo de cache el cual se encargara de o obtener datos desde cache o llamar a music brainz para suplir los datos que no puede responder
This commit is contained in:
255
fetcher/jobs.py
255
fetcher/jobs.py
@@ -1,11 +1,34 @@
|
||||
"""Jobs a ejecutar en el fondo"""
|
||||
"""Jobs a ejecutar con django-rq
|
||||
|
||||
NO SE DONDE DOCUMENTAR ESTO
|
||||
La estructura que se utilizara para las keys es la siguiente
|
||||
|
||||
artist:{mbid} => full json
|
||||
artist:{mbid}:release_groups => set of release_group_mbid
|
||||
artist:{mbid}:release_groups:count => cantidad de las release groups
|
||||
|
||||
release_group:{mbid} => full json
|
||||
release_group:{mbid}:artist => artist_mbid
|
||||
release_group:{mbid}:releases => set of release_mbid
|
||||
release_group:{mbid}:cover_art => json with cover arts
|
||||
|
||||
release:{mbid} => full json
|
||||
release:{mbid}:release_group => release_group_mbid
|
||||
release:{mbid:}:media => set of media_json
|
||||
release:{mbid}:{media}:recordings => set of recording_mbid
|
||||
release:{mbid}:cover_art => json with cover arts
|
||||
|
||||
recording:{mbid} => full json
|
||||
recording:{mbid}:release => release_mbid
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
|
||||
import django_rq
|
||||
|
||||
from fetcher import musicbrainz as mb
|
||||
from utils import get_redis_connection
|
||||
from utils import get_redis_connection, parse_date
|
||||
|
||||
_log = logging.getLogger('fetcher_jobs')
|
||||
_log.addHandler(logging.NullHandler())
|
||||
@@ -14,112 +37,124 @@ _log.addHandler(logging.NullHandler())
|
||||
@django_rq.job('high')
|
||||
def load_release_cover_art(release):
|
||||
"""Carga en cache el cover art de una release"""
|
||||
mbid = release
|
||||
if isinstance(release, dict):
|
||||
mbid = release.get('id')
|
||||
|
||||
with get_redis_connection() as redis:
|
||||
mbid = release
|
||||
if isinstance(release, dict):
|
||||
mbid = release.get('id')
|
||||
|
||||
if f'release_cover_art_{mbid}' in redis:
|
||||
_log.info('El cover art ya estaba guardado')
|
||||
return
|
||||
|
||||
cover_art = mb.get_release_cover_art(mbid)
|
||||
redis.set(f'release_cover_art_{mbid}', json.dumps(cover_art))
|
||||
_log.info('Cover art de release %s almacenado en cache', mbid)
|
||||
if f'release:{mbid}:cover_art' not in redis:
|
||||
cover_art = mb.get_release_cover_art(mbid)
|
||||
redis.set(f'release:{mbid}:cover_art', json.dumps(cover_art))
|
||||
_log.info('Cover art de release %s almacenado en cache', mbid)
|
||||
|
||||
|
||||
@django_rq.job('high')
|
||||
def load_release_group_cover_art(release_group):
|
||||
"""Carga en cache el cover art de un release group"""
|
||||
mbid = release_group
|
||||
if isinstance(release_group, dict):
|
||||
mbid = release_group.get('id')
|
||||
|
||||
with get_redis_connection() as redis:
|
||||
mbid = release_group
|
||||
if isinstance(release_group, dict):
|
||||
mbid = release_group.get('id')
|
||||
|
||||
if f'release_group_cover_art_{mbid}' in redis:
|
||||
_log.info('El cover art ya estaba guardado')
|
||||
return
|
||||
|
||||
cover_art = mb.get_release_group_cover_art(mbid)
|
||||
redis.set(f'release_group_cover_art_{mbid}', json.dumps(cover_art))
|
||||
_log.info('Cover art de release group %s almacenado en cache', mbid)
|
||||
|
||||
|
||||
@django_rq.job
|
||||
def load_entities_of_recording(recording):
|
||||
"""Carga en cache una grabacion y sus entidades relacionadas"""
|
||||
with get_redis_connection() as redis:
|
||||
mbid = recording
|
||||
if isinstance(recording, dict):
|
||||
mbid = recording.get('id')
|
||||
|
||||
if f'recording_{mbid}' in redis:
|
||||
_log.info('La recording ya se había procesado anteriormente')
|
||||
return
|
||||
|
||||
if isinstance(recording, str):
|
||||
recording = mb.get_recording_by_mbid(mbid)
|
||||
|
||||
redis.set(f'recording_{mbid}', json.dumps(recording))
|
||||
_log.info('Recording %s fue almacenada correctamente', mbid)
|
||||
if f'release_group:{mbid}:cover_art' not in redis:
|
||||
cover_art = mb.get_release_group_cover_art(mbid)
|
||||
redis.set(f'release_group:{mbid}:cover_art', json.dumps(cover_art))
|
||||
_log.info('Cover art de release group %s almacenado en cache', mbid)
|
||||
|
||||
|
||||
@django_rq.job
|
||||
def load_entities_of_release(release):
|
||||
"""Carga en cache una release y sus entidades relacionadas"""
|
||||
mbid = release
|
||||
if isinstance(release, dict):
|
||||
mbid = release.get('id')
|
||||
|
||||
with get_redis_connection() as redis:
|
||||
mbid = release
|
||||
if isinstance(release, dict):
|
||||
mbid = release.get('id')
|
||||
# Cargar release solo si no esta almacenado
|
||||
if f'release:{mbid}' not in redis:
|
||||
if isinstance(release, str):
|
||||
release = mb.get_release_by_mbid(mbid, ['recordings', 'artists'])
|
||||
redis.set(f'release:{mbid}', json.dumps(release))
|
||||
_log.info('Release %s fue almacenada en cache', mbid)
|
||||
else:
|
||||
release = json.loads(redis.get(f'release:{mbid}'))
|
||||
|
||||
if f'release_{mbid}' in redis:
|
||||
_log.info('La release ya se había procesado anteriormente')
|
||||
return
|
||||
# Envía a cargar sus cover art
|
||||
load_release_cover_art.delay(release.get('id'))
|
||||
|
||||
if isinstance(release, str):
|
||||
release = mb.get_release_by_mbid(mbid)
|
||||
# Almacenar el artista de la release
|
||||
if len(release.get('artist_credit')) > 0:
|
||||
artist = release.get('artist_credit')[0].get('artist').get('id')
|
||||
redis.set(f'release:{mbid}:artist', artist)
|
||||
|
||||
redis.set(f'release_{mbid}', json.dumps(release))
|
||||
_log.info('Release %s fue almacenada en cache', mbid)
|
||||
|
||||
load_release_cover_art.delay(release)
|
||||
|
||||
offset = 0
|
||||
while True:
|
||||
recordings = mb.browse_recordings({'release': mbid}, limit=100, offset=offset)
|
||||
for recording in recordings.get('recordings'):
|
||||
load_entities_of_recording(recording)
|
||||
|
||||
offset += 100
|
||||
if offset > recordings.get('recording_count', 0):
|
||||
break
|
||||
# Obtener sus medias
|
||||
# Una release puede tener mas de un disco o dvds incluidos, entre otras cosas
|
||||
# Dentro de estas existen tracks, las cuales no son accesibles de ninguna otra forma, y
|
||||
# dentro de una tag esta definido el recording, es un endredo quizás innecesario asi que mas
|
||||
# o menos me lo voy a saltar, lo único que interesa de la track es su orden dentro de su
|
||||
# media
|
||||
medias = release.get('media', [])
|
||||
for raw_media in medias:
|
||||
media = {
|
||||
'format': raw_media.get('format'),
|
||||
'position': raw_media.get('position'),
|
||||
'track_count': raw_media.get('track_count')
|
||||
}
|
||||
redis.zadd(f'release:{mbid}:media', {json.dumps(media): media['position']})
|
||||
for track in raw_media.get('tracks', []):
|
||||
recording_key = f'release:{mbid}:{media.get("position")}:recordings'
|
||||
recording = track.get('recording')
|
||||
redis.zadd(recording_key, {recording.get('id'): track.get("position")})
|
||||
redis.set(f'recording:{mbid}', json.dumps(recording))
|
||||
redis.set(f'recording:{mbid}:release', mbid)
|
||||
|
||||
|
||||
@django_rq.job
|
||||
def load_entities_of_release_group(release_group):
|
||||
"""Carga en cache un release group y sus entidades relacionadas"""
|
||||
mbid = release_group
|
||||
if isinstance(release_group, dict):
|
||||
mbid = release_group.get('id')
|
||||
|
||||
with get_redis_connection() as redis:
|
||||
mbid = release_group
|
||||
if isinstance(release_group, dict):
|
||||
mbid = release_group.get('id')
|
||||
|
||||
if f'release_group_{mbid}' in redis:
|
||||
_log.info('La release group ya se habia procesado anteriormente')
|
||||
return
|
||||
|
||||
if isinstance(release_group, str):
|
||||
release_group = mb.get_release_group_by_mbid(mbid)
|
||||
|
||||
redis.set(f'release_group_{mbid}', json.dumps(release_group))
|
||||
_log.info('Release Group %s almacenado en cache', mbid)
|
||||
# Cargar release_group solo si no esta almacenado
|
||||
if f'release_group:{mbid}' not in redis:
|
||||
if isinstance(release_group, str):
|
||||
release_group = mb.get_release_group_by_mbid(mbid, includes=['artists'])
|
||||
redis.set(f'release_group:{mbid}', json.dumps(release_group))
|
||||
_log.info('Release Group %s almacenado en cache', mbid)
|
||||
|
||||
# Envía a cargar sus cover art
|
||||
load_release_group_cover_art.delay(release_group)
|
||||
|
||||
# Carga sus releases
|
||||
offset = 0
|
||||
while True:
|
||||
releases = mb.browse_releases({'release-group': mbid}, limit=100, offset=offset)
|
||||
for release in releases.get('releases'):
|
||||
load_entities_of_release(release)
|
||||
# Cargo todas las releases posibles la primera vez, si no lo hiciera y necesitara pedir
|
||||
# mas datos, usuaria una request en vano
|
||||
releases = mb.browse_releases({'release-group': mbid},
|
||||
includes=['artist-credits'],
|
||||
limit=100, offset=offset)
|
||||
count = releases.get('release_count')
|
||||
if f'release_group:{mbid}:releases' in redis:
|
||||
# Si es que la cantidad de releases almacenadas es la misma que la que dice
|
||||
# musicbrainz que existen, entonces no tengo porque continuar cargando
|
||||
if redis.zcard(f'release_group:{mbid}:releases') == count:
|
||||
break
|
||||
|
||||
# Almaceno el count que dice musicbrainz para asegurar que estén todos los
|
||||
# elementos cargados
|
||||
redis.set(f'release_group:{mbid}:releases:count', count)
|
||||
|
||||
for rel in releases.get('releases'):
|
||||
rel_mbid = rel.get('id')
|
||||
rel_release = parse_date(rel.get('release_events')[0].get('date')).timestamp()
|
||||
|
||||
# Se almacena el id del release en un set ordenado por fecha de lanzamiento
|
||||
redis.zadd(f'release_group:{mbid}:releases', {rel_mbid: rel_release})
|
||||
redis.set(f'release:{rel_mbid}:release_group', mbid)
|
||||
|
||||
load_entities_of_release.delay(rel)
|
||||
|
||||
offset += 100
|
||||
if offset > releases.get('release_count', 0):
|
||||
@@ -129,32 +164,46 @@ def load_entities_of_release_group(release_group):
|
||||
@django_rq.job
|
||||
def load_artist_on_cache(artist):
|
||||
"""Carga en cache a un artista y todas sus entidades"""
|
||||
mbid = artist
|
||||
if isinstance(artist, dict):
|
||||
mbid = artist.get('id')
|
||||
|
||||
with get_redis_connection() as redis:
|
||||
mbid = artist
|
||||
if isinstance(artist, dict):
|
||||
mbid = artist.get('id')
|
||||
|
||||
if f'artist_{mbid}' in redis:
|
||||
_log.info('El artista ya se había procesado anteriormente')
|
||||
return
|
||||
|
||||
if isinstance(artist, str):
|
||||
artist = mb.get_artist_by_mbid(mbid, includes=['tags'])
|
||||
|
||||
redis.set(f'artist_{mbid}', json.dumps(artist))
|
||||
_log.info('Artista %s almacenado en cache', mbid)
|
||||
# Cargar artista solo si no estaba almacenado
|
||||
if f'artist:{mbid}' not in redis:
|
||||
if isinstance(artist, str):
|
||||
artist = mb.get_artist_by_mbid(mbid, includes=['tags'])
|
||||
redis.set(f'artist:{mbid}', json.dumps(artist))
|
||||
_log.info('Artista %s almacenado en cache', mbid)
|
||||
|
||||
# Cargar sus release_groups
|
||||
offset = 0
|
||||
while True:
|
||||
release_groups = mb.browse_release_groups({'artist': mbid}, limit=100, offset=offset)
|
||||
if f'artist_{mbid}:release_group_count' not in redis:
|
||||
redis.set(f'artist_{mbid}:release_group_count',
|
||||
release_groups.get('release_group_count'))
|
||||
for release_group in release_groups.get('release_groups'):
|
||||
release_group_id = release_group.get('id')
|
||||
redis.rpush(f'artist_{mbid}:release_groups', release_group_id)
|
||||
redis.set(f'release_group_{release_group_id}:artist', mbid)
|
||||
load_entities_of_release_group.delay(release_group)
|
||||
# Cargo todas las releases posibles la primera vez, si no lo hiciera y necesitara pedir
|
||||
# mas datos, usuaria una request en vano
|
||||
release_groups = mb.browse_release_groups({'artist': mbid},
|
||||
includes=['artist-credits'],
|
||||
limit=100, offset=offset)
|
||||
count = release_groups.get('release_group_count')
|
||||
if f'artist:{mbid}:release_groups' in redis:
|
||||
# Si es que la cantidad de release_groups almacenadas es la misma que la que dice
|
||||
# musicbrainz que existen, entonces no tengo porque continuar cargando
|
||||
if redis.zcard(f'artist:{mbid}:release_groups') == count:
|
||||
break
|
||||
|
||||
# Almaceno el count que dice musicbrainz para asegurar que estén todos los
|
||||
# elementos cargado
|
||||
redis.set(f'artist:{mbid}:release_groups:count', count)
|
||||
|
||||
for rel in release_groups.get('release_groups'):
|
||||
rel_mbid = rel.get('id')
|
||||
rel_date = parse_date(rel.get('first_release_date')).timestamp()
|
||||
|
||||
# Se almacena el id del release group en un set ordenado por su fecha de lanzamiento
|
||||
redis.zadd(f'artist:{mbid}:release_groups', {rel_mbid: rel_date})
|
||||
redis.set(f'release_group:{rel_mbid}:artist', mbid)
|
||||
|
||||
load_entities_of_release_group.delay(rel)
|
||||
|
||||
offset += 100
|
||||
if offset > release_groups.get('release_group_count', 0):
|
||||
|
||||
Reference in New Issue
Block a user