feat: first implementation

This commit is contained in:
Felipe 2025-04-28 19:02:32 -04:00
commit 39ea86b821
Signed by: pitbuster
SSH key fingerprint: SHA256:HDYu2Pm4/TmSX8GBwV49UvFWr1Ljg8XlHxKeCpjJpOk
11 changed files with 2494 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

2048
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

12
Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "ptcg-scrap"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = "1.0.97"
clap = { version = "4.5.35", features = ["derive"] }
reqwest = { version = "0.12.15", default-features = false, features = ["http2", "rustls-tls"] }
scraper = "0.23.1"
strum = { version = "0.27.1", features = ["derive"] }
tokio = { version = "1.44.1", default-features = false, features = ["fs", "rt-multi-thread", "macros"] }

1
README.md Normal file
View file

@ -0,0 +1 @@
## PTCG Scrapper

45
src/card.rs Normal file
View file

@ -0,0 +1,45 @@
//! Card info
#[derive(Debug)]
pub struct CardInfo {
pub slug: String,
pub inner: InnerCardInfo,
}
#[derive(Debug)]
pub struct InnerCardInfo {
pub name: String,
pub kind: CardKind,
pub card_type: CardType,
pub acespec: bool,
pub tagteam: bool,
pub future: bool,
pub ancient: bool,
pub specific_info: SpecificInfo,
}
#[derive(Debug)]
pub enum CardKind {
Pokemon,
Trainer,
Energy,
}
#[derive(Debug)]
pub enum CardType {
Basic,
Stage1,
Stage2,
Item,
Tool,
Supporter,
Stadium,
Special,
}
#[derive(Debug)]
pub enum SpecificInfo {
PokemonInfo {},
TrainerInfo { effect: Vec<String> },
EnergyInfo {},
}

15
src/cli.rs Normal file
View file

@ -0,0 +1,15 @@
//! CLI parameters
use clap::Parser;
#[derive(Debug, Parser)]
#[command(version, about, long_about = None)]
pub struct Args {
/// Edition code
pub code: String,
/// Card number within the edition
pub number: u8,
///Override the slug for the card
#[arg(short, long)]
pub slug: Option<String>,
}

231
src/downloader/card_info.rs Normal file
View file

@ -0,0 +1,231 @@
//! Download card information.
use anyhow::{Result, anyhow};
use reqwest::Client;
use scraper::{Html, Selector};
use crate::card::{CardInfo, CardKind, CardType, InnerCardInfo, SpecificInfo};
use crate::editions::EditionCode;
use crate::lang::Language;
pub async fn download_card_info(
client: Client,
lang: Language,
code: EditionCode,
number: u8,
override_slug: Option<&str>,
) -> Result<CardInfo> {
let url = format!(
"{}/{}/{number}/",
base_url(lang),
code.edition_num().to_lowercase()
);
let response = client.get(url).send().await?;
response.error_for_status_ref()?;
let (mut slug, inner) = parse_html(lang, code, response.text().await?)?;
if let Some(override_slug) = override_slug {
slug = override_slug.into()
}
Ok(CardInfo { slug, inner })
}
fn base_url(lang: Language) -> &'static str {
match lang {
Language::Es => "https://www.pokemon.com/el/jcc-pokemon/cartas-pokemon/series",
Language::En => todo!(),
}
}
fn parse_html(lang: Language, code: EditionCode, html: String) -> Result<(String, InnerCardInfo)> {
let html = Html::parse_document(&html);
let card = html
.select(&selector("div.full-card-information")?)
.next()
.ok_or(anyhow!("Couldn't find card info"))?;
let name = card
.select(&selector("h1")?)
.next()
.ok_or(anyhow!("Failed to get card name"))?
.inner_html();
let (kind, card_type) = parse_card_type(
lang,
card.select(&selector("div.card-type > h2")?)
.next()
.ok_or(anyhow!("Failed to get card type"))?
.inner_html(),
)?;
let slug = match kind {
CardKind::Pokemon => slugify_pokemon(lang, code, &name),
CardKind::Trainer | CardKind::Energy => slugify_unique(lang, &name),
};
let specific_info = match kind {
CardKind::Pokemon => SpecificInfo::PokemonInfo {},
CardKind::Trainer => {
let effect = card
.select(&selector("div.ability > pre > p")?)
.map(|e| e.inner_html())
.collect();
SpecificInfo::TrainerInfo { effect }
}
CardKind::Energy => SpecificInfo::EnergyInfo {},
};
Ok((
slug,
InnerCardInfo {
name,
kind,
card_type,
acespec: false,
tagteam: false,
future: false,
ancient: false,
specific_info,
},
))
}
fn selector(sel: &str) -> Result<Selector> {
Selector::parse(sel).map_err(|_| anyhow!("failed to parse selector"))
}
fn parse_card_type(lang: Language, text: String) -> Result<(CardKind, CardType)> {
let kind = if text.contains(trainer_pattern(lang)) || text.contains(tool_pattern(lang)) {
Ok(CardKind::Trainer)
} else if text.contains("Pokémon") {
Ok(CardKind::Pokemon)
} else if text.contains(energy_pattern(lang)) {
Ok(CardKind::Energy)
} else {
Err(anyhow!(
"Failed to get card kind (Pokemon, Trainer or Energy)"
))
}?;
let card_type = match kind {
CardKind::Pokemon => {
if text.contains(basic_pattern(lang)) {
Ok(CardType::Basic)
} else if text.contains(stage1_pattern(lang)) {
Ok(CardType::Stage1)
} else if text.contains(stage2_pattern(lang)) {
Ok(CardType::Stage2)
} else {
Err(anyhow!("Failed to get Pokemon type: {text}"))
}
}
CardKind::Trainer => {
if text.contains(item_pattern(lang)) {
Ok(CardType::Item)
} else if text.contains(tool_pattern(lang)) {
Ok(CardType::Tool)
} else if text.contains(stadium_pattern(lang)) {
Ok(CardType::Stadium)
} else if text.contains(supporter_pattern(lang)) {
Ok(CardType::Supporter)
} else {
Err(anyhow!("Failed to get Trainer type"))
}
}
CardKind::Energy => {
if text.contains(basic_pattern(lang)) {
Ok(CardType::Basic)
} else if text.contains(special_pattern(lang)) {
Ok(CardType::Special)
} else {
Err(anyhow!("Failed to get Pokemon type"))
}
}
}?;
Ok((kind, card_type))
}
fn trainer_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Entrenador",
Language::En => todo!(),
}
}
fn energy_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Energía",
Language::En => todo!(),
}
}
fn basic_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Básic",
Language::En => todo!(),
}
}
fn stage1_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Fase 1",
Language::En => todo!(),
}
}
fn stage2_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Fase 2",
Language::En => todo!(),
}
}
fn item_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Objeto",
Language::En => todo!(),
}
}
fn tool_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Herramienta",
Language::En => todo!(),
}
}
fn supporter_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Partidario",
Language::En => todo!(),
}
}
fn stadium_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Estadio",
Language::En => todo!(),
}
}
fn special_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Especial",
Language::En => todo!(),
}
}
fn slugify_pokemon(lang: Language, code: EditionCode, name: &str) -> String {
format!("{}-{code}-{lang}", slugify(name))
}
fn slugify_unique(lang: Language, name: &str) -> String {
format!("{}-{lang}", slugify(name))
}
fn slugify(name: &str) -> String {
name.to_lowercase()
.replace("'s", "")
.replace(" ", "-")
.replace("á", "a")
.replace("é", "e")
.replace("í", "i")
.replace("ó", "o")
.replace("ú", "u")
}

3
src/downloader/mod.rs Normal file
View file

@ -0,0 +1,3 @@
//! Data downloaders
pub mod card_info;

98
src/editions.rs Normal file
View file

@ -0,0 +1,98 @@
//! Editions information
use strum::{Display, EnumString};
pub enum EditionBlock {
Bw,
Xy,
Sm,
Ssh,
Sv,
}
#[derive(Clone, Copy, Display, Debug, Hash, PartialEq, Eq, EnumString)]
#[strum(ascii_case_insensitive, serialize_all = "lowercase")]
pub enum EditionCode {
/// Sword and Shield
Ssh,
/// Scarlet and Violer
Svi,
/// Paldea Evolved
Pal,
///Obsidian Flames
Obf,
/// 151
Mew,
/// Paradox Rift
Par,
/// Paldean Fates
Paf,
/// Temporal Forces
Tef,
/// Twilight Masquerade
Twm,
/// Shrouded Fable
Sfa,
/// Stellar Crown
Scr,
/// Surging Sparks
Ssp,
/// Prismatic Evolutions
Pre,
}
impl EditionCode {
pub fn edition_num(self) -> &'static str {
match self {
EditionCode::Ssh => "SWSH1",
EditionCode::Svi => "SV01",
EditionCode::Pal => "SV02",
EditionCode::Obf => "SV03",
EditionCode::Mew => "SV3_pt5",
EditionCode::Par => "SV04",
EditionCode::Paf => "SV4pt5",
EditionCode::Tef => "SV05",
EditionCode::Twm => "SV06",
EditionCode::Sfa => "SV6pt5",
EditionCode::Scr => "SV07",
EditionCode::Ssp => "SV08",
EditionCode::Pre => "SV8pt5",
}
}
pub fn edition_slug(self) -> &'static str {
match self {
EditionCode::Ssh => "sword-shield",
EditionCode::Svi => "scarlet-violet",
EditionCode::Pal => "paldea-evolved",
EditionCode::Obf => "obsidian-flames",
EditionCode::Mew => "151",
EditionCode::Par => "paradox-rift",
EditionCode::Paf => "paldean-fates",
EditionCode::Tef => "temporal-forces",
EditionCode::Twm => "twilight-masquerade",
EditionCode::Sfa => "shrouded-fable",
EditionCode::Scr => "stellar-crown",
EditionCode::Ssp => "surging-sparks",
EditionCode::Pre => "prismatic-evolutions",
}
}
pub fn block(self) -> EditionBlock {
match self {
EditionCode::Ssh => EditionBlock::Ssh,
EditionCode::Svi
| EditionCode::Pal
| EditionCode::Obf
| EditionCode::Mew
| EditionCode::Par
| EditionCode::Paf
| EditionCode::Tef
| EditionCode::Twm
| EditionCode::Sfa
| EditionCode::Scr
| EditionCode::Ssp
| EditionCode::Pre => EditionBlock::Sv,
}
}
}

10
src/lang.rs Normal file
View file

@ -0,0 +1,10 @@
//! Language settings
use strum::{Display, EnumString};
#[derive(Clone, Copy, Display, EnumString)]
#[strum(serialize_all = "lowercase")]
pub enum Language {
Es,
En,
}

30
src/main.rs Normal file
View file

@ -0,0 +1,30 @@
use std::str::FromStr;
use anyhow::{Context, Result};
use clap::Parser;
pub mod card;
pub mod cli;
pub mod downloader;
pub mod editions;
pub mod lang;
#[tokio::main]
async fn main() -> Result<()> {
let args = cli::Args::parse();
let client = reqwest::Client::new();
let edition =
editions::EditionCode::from_str(&args.code).context("Couldn't parse edition code")?;
let number = args.number;
let slug = args.slug.as_deref();
let card_info = downloader::card_info::download_card_info(
client.clone(),
lang::Language::Es,
edition,
number,
slug,
)
.await?;
println!("{card_info:?}");
Ok(())
}