Last bit of major changes

Closes #1
Closes #5
Closes #6
Closes #8
Closes #9
Closes #10
This commit is contained in:
2026-01-26 04:11:38 -06:00
parent 1cd87156bd
commit 739d136209
24 changed files with 1157 additions and 410 deletions

View File

@@ -1,12 +1,11 @@
import requests
import time
import os
from django.core.management.base import BaseCommand
from django.utils.dateparse import parse_date
from store.models import Game, Set, Card
class Command(BaseCommand):
help = 'Populates the database with Pokémon TCG sets and cards using the Pokémon TCG API.'
help = 'Populates the database with Pokémon TCG sets and cards using the TCGDex REST API (English).'
def add_arguments(self, parser):
parser.add_argument(
@@ -15,21 +14,17 @@ class Command(BaseCommand):
help='Clear existing Pokémon TCG cards and sets before populating.'
)
parser.add_argument(
'--duration',
default='7',
help='Duration in days to look back for new sets/cards. Use "all" to fetch everything. Default is 7 days.'
)
parser.add_argument(
'--api-key',
default=os.getenv('POKEMONTCG_API_KEY', None),
help='Optional API Key for higher rate limits.'
'--duration',
default='7',
help='(Not full supported by TCGDex) Duration in days to look back. For now, this will just fetch all sets as TCGDex sets endpoint is not sorted by date.'
)
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS('Starting Pokémon TCG population...'))
# Setup Headers for API (Rate limits are better with a key)
self.headers = {'X-Api-Key': options['api_key']} if options['api_key'] else {}
self.stdout.write(self.style.SUCCESS('Starting Pokémon TCG population (via TCGDex)...'))
# User Agent is good practice
self.headers = {'User-Agent': 'ExampleTCGSite/1.0'}
base_url = "https://api.tcgdex.net/v2/en"
# 1. Ensure Game exists
game, created = Game.objects.get_or_create(
@@ -48,177 +43,99 @@ class Command(BaseCommand):
Set.objects.filter(game=game).delete()
self.stdout.write(self.style.SUCCESS('Cleared Pokémon data.'))
# Handle --duration
duration = options['duration']
start_date_str = None
if duration != 'all':
try:
days = int(duration)
from django.utils import timezone
from datetime import timedelta
start_date = timezone.now().date() - timedelta(days=days)
start_date_str = start_date.strftime('%Y/%m/%d') # API uses YYYY/MM/DD
self.stdout.write(f'Fetching data released since {start_date_str}...')
except ValueError:
self.stdout.write(self.style.ERROR('Invalid duration. Must be an integer or "all".'))
return
# 2. Fetch Sets
self.stdout.write('Fetching sets from Pokémon TCG API...')
self.stdout.write('Fetching sets from TCGDex...')
# Build query for sets
# If duration is set, we use the Lucene search syntax provided by the API
params = {'orderBy': '-releaseDate', 'pageSize': 250}
if start_date_str:
params['q'] = f'releaseDate:>=("{start_date_str}")'
try:
# Note: /v2/sets does not usually require pagination for < 250 sets if filtering by recent date
# But "all" will require pagination.
sets_data = self.fetch_all_pages('https://api.pokemontcg.io/v2/sets', params)
# TCGDex /sets returns a list of minimal set objects
response = requests.get(f"{base_url}/sets", headers=self.headers)
response.raise_for_status()
sets_data = response.json()
except Exception as e:
self.stdout.write(self.style.ERROR(f'Failed to fetch sets: {e}'))
return
self.stdout.write(self.style.ERROR(f'Failed to fetch sets: {e}'))
return
self.stdout.write(f'Found {len(sets_data)} sets. Processing...')
processed_sets = []
for set_data in sets_data:
release_date = parse_date(set_data.get('releaseDate', '').replace('/', '-'))
# Pokémon sets have an 'id' (e.g., 'swsh1') and 'ptcgoCode' (e.g., 'SSH').
# We use 'id' as the unique code.
set_code = set_data.get('id')
for s_data in sets_data:
# s_data example: {"id": "base1", "name": "Base Set", ...}
# TCGDex sets don't consistently provide releaseDate in the list view,
# so we'll leave it null or updated if we fetched details (which we might do for cards).
# For efficiency we might not fetch set details just for date if unnecessary.
set_obj, created = Set.objects.update_or_create(
code=set_code,
code=s_data.get('id'),
game=game,
defaults={
'name': set_data.get('name'),
'release_date': release_date,
'name': s_data.get('name'),
# 'release_date': None # Not available in simple list
}
)
processed_sets.append(set_obj)
self.stdout.write(self.style.SUCCESS(f'Processed {len(processed_sets)} sets.'))
# 3. Fetch Cards
# Strategy: To be efficient, if we have a specific duration, we query cards by date.
# If we are doing "all", we iterate through the sets we just found (or all sets) to ensure we get everything structured.
self.stdout.write('Fetching cards...')
card_params = {'pageSize': 250}
# We must iterate sets to get cards, as there isn't a robust "all cards new" stream without pagination headaches
# on some APIs, and TCGDex structure favors set traversal.
if start_date_str:
# Fetch all cards released after date (cross-set)
card_params['q'] = f'set.releaseDate:>=("{start_date_str}")'
self.fetch_and_process_cards(card_params, game)
else:
# Fetch by set to allow for better progress tracking/chunking if doing a full import
total_sets = len(processed_sets)
for idx, set_obj in enumerate(processed_sets):
self.stdout.write(f' [{idx+1}/{total_sets}] Fetching cards for set: {set_obj.name} ({set_obj.code})...')
total_sets = len(processed_sets)
for idx, set_obj in enumerate(processed_sets):
self.stdout.write(f' [{idx+1}/{total_sets}] Fetching cards for set: {set_obj.name} ({set_obj.code})...')
try:
# Fetch Set Detail to get cards
# Endpoint: /sets/{id}
set_resp = requests.get(f"{base_url}/sets/{set_obj.code}", headers=self.headers)
if set_resp.status_code == 404:
self.stdout.write(self.style.WARNING(f' Set {set_obj.code} detail not found. Skipping.'))
continue
set_resp.raise_for_status()
# Filter by specific set ID
set_card_params = {'pageSize': 250, 'q': f'set.id:{set_obj.code}'}
self.fetch_and_process_cards(set_card_params, game, specific_set=set_obj)
set_detail = set_resp.json()
cards = set_detail.get('cards', [])
# Sleep briefly to respect rate limits (60/min without key, 1000/min with key)
time.sleep(0.5 if options['api_key'] else 1.5)
except Exception as e:
self.stdout.write(self.style.ERROR(f' Failed to fetch cards for {set_obj.name}: {e}'))
continue
self.stdout.write(f' Found {len(cards)} cards.')
for c_data in cards:
# c_data example: {"id": "base1-1", "localId": "1", "name": "Alakazam", "image": "..."}
# Rarity is NOT in this list usually, requires fetching card detail. Skipping for speed.
# Image URL: TCGDex gives a base URL usually, e.g. ".../base1/1"
# Sometimes it has /high.png or /low.png supported. The provided 'image' field often works as is.
# It might have extension like .png or just be the base.
# The user-provided example curl showed "image": "https://assets.tcgdex.net/en/base/base1/1"
# Those usually redirect to an image or handle extension. Let's append /high.png if we want best quality or try as is.
# Actually, TCGDex assets usually need an extension. Let's assume the API provides a valid URL or we append.
# Inspecting typical TCGDex response: "image": ".../1" (no extension).
# Browsers handle it, but for our backend saving it might be tricky if it's not a direct file.
# Let's save the URL as provided + "/high.png" as a guess for better quality if it doesn't have extension,
# Or just use the provided one.
# Update: TCGDex documentation often says: {image}/high.webp or {image}/low.webp
base_image = c_data.get('image')
image_url = f"{base_image}/high.webp" if base_image else ''
Card.objects.update_or_create(
scryfall_id=c_data.get('id'),
defaults={
'set': set_obj,
'name': c_data.get('name'),
'rarity': '', # specific call needed, simplifying
'image_url': image_url,
'collector_number': c_data.get('localId', ''),
'external_url': f"https://tcgdex.dev/cards/{c_data.get('id')}", # simplified assumption
}
)
# Rate limiting check - TCGDex is generous but good validation to not slam
# time.sleep(0.1)
self.stdout.write(self.style.SUCCESS('Finished Pokémon TCG population!'))
def fetch_all_pages(self, url, params):
"""Helper to handle API pagination"""
results = []
page = 1
has_more = True
while has_more:
params['page'] = page
response = requests.get(url, params=params, headers=self.headers)
if response.status_code == 429:
self.stdout.write(self.style.WARNING('Rate limit hit. Sleeping for 10 seconds...'))
time.sleep(10)
continue
if response.status_code != 200:
raise Exception(f"API Error {response.status_code}: {response.text}")
data = response.json()
batch = data.get('data', [])
results.extend(batch)
# Check if we need more pages
total_count = data.get('totalCount', 0)
count = data.get('count', 0)
if len(results) >= total_count or count == 0:
has_more = False
else:
page += 1
return results
def fetch_and_process_cards(self, params, game, specific_set=None):
try:
cards_data = self.fetch_all_pages('https://api.pokemontcg.io/v2/cards', params)
except Exception as e:
self.stdout.write(self.style.ERROR(f' Failed to fetch cards: {e}'))
return
self.stdout.write(f' Processing {len(cards_data)} cards...')
# Cache sets if we are doing a bulk mixed query
sets_map = {}
if not specific_set:
sets_map = {s.code: s for s in Set.objects.filter(game=game)}
for card_data in cards_data:
# Determine Set
if specific_set:
set_obj = specific_set
else:
set_code = card_data.get('set', {}).get('id')
if set_code in sets_map:
set_obj = sets_map[set_code]
else:
# If set missing (rare if we synced sets first), try to fetch/create or skip
# For speed, we skip if not found in our pre-fetched map
continue
# Extract Image URL (Prefer Hi-Res)
image_url = ''
if 'images' in card_data:
image_url = card_data['images'].get('large', card_data['images'].get('small', ''))
# TCGPlayer ID (Sometimes provided in tcgplayer field)
tcgplayer_url = card_data.get('tcgplayer', {}).get('url', '')
# Extract ID from URL if possible, or store URL.
# Model expects 'tcgplayer_id' (integer usually).
# The API doesn't always give a clean ID field, often just the URL.
# We will try to parse or leave null if your model requires int.
# Assuming model handles null or we just store nothing.
tcgplayer_id = None
# External URL
external_url = tcgplayer_url if tcgplayer_url else f"https://pkmncards.com/card/{card_data.get('id')}"
# Collector Number
collector_number = card_data.get('number', '')
Card.objects.update_or_create(
scryfall_id=card_data.get('id'), # Using API ID as unique identifier
defaults={
'set': set_obj,
'name': card_data.get('name'),
'rarity': card_data.get('rarity', 'Common'),
'image_url': image_url,
'tcgplayer_id': tcgplayer_id, # Can be updated if you add parsing logic
'collector_number': collector_number,
'external_url': external_url,
}
)