feat: first implementation
This commit is contained in:
commit
39ea86b821
11 changed files with 2494 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/target
|
||||
2048
Cargo.lock
generated
Normal file
2048
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "ptcg-scrap"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.97"
|
||||
clap = { version = "4.5.35", features = ["derive"] }
|
||||
reqwest = { version = "0.12.15", default-features = false, features = ["http2", "rustls-tls"] }
|
||||
scraper = "0.23.1"
|
||||
strum = { version = "0.27.1", features = ["derive"] }
|
||||
tokio = { version = "1.44.1", default-features = false, features = ["fs", "rt-multi-thread", "macros"] }
|
||||
1
README.md
Normal file
1
README.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
## PTCG Scrapper
|
||||
45
src/card.rs
Normal file
45
src/card.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
//! Card info
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CardInfo {
|
||||
pub slug: String,
|
||||
pub inner: InnerCardInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InnerCardInfo {
|
||||
pub name: String,
|
||||
pub kind: CardKind,
|
||||
pub card_type: CardType,
|
||||
pub acespec: bool,
|
||||
pub tagteam: bool,
|
||||
pub future: bool,
|
||||
pub ancient: bool,
|
||||
pub specific_info: SpecificInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CardKind {
|
||||
Pokemon,
|
||||
Trainer,
|
||||
Energy,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CardType {
|
||||
Basic,
|
||||
Stage1,
|
||||
Stage2,
|
||||
Item,
|
||||
Tool,
|
||||
Supporter,
|
||||
Stadium,
|
||||
Special,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpecificInfo {
|
||||
PokemonInfo {},
|
||||
TrainerInfo { effect: Vec<String> },
|
||||
EnergyInfo {},
|
||||
}
|
||||
15
src/cli.rs
Normal file
15
src/cli.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//! CLI parameters
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Args {
|
||||
/// Edition code
|
||||
pub code: String,
|
||||
/// Card number within the edition
|
||||
pub number: u8,
|
||||
///Override the slug for the card
|
||||
#[arg(short, long)]
|
||||
pub slug: Option<String>,
|
||||
}
|
||||
231
src/downloader/card_info.rs
Normal file
231
src/downloader/card_info.rs
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
//! Download card information.
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::card::{CardInfo, CardKind, CardType, InnerCardInfo, SpecificInfo};
|
||||
use crate::editions::EditionCode;
|
||||
use crate::lang::Language;
|
||||
|
||||
pub async fn download_card_info(
|
||||
client: Client,
|
||||
lang: Language,
|
||||
code: EditionCode,
|
||||
number: u8,
|
||||
override_slug: Option<&str>,
|
||||
) -> Result<CardInfo> {
|
||||
let url = format!(
|
||||
"{}/{}/{number}/",
|
||||
base_url(lang),
|
||||
code.edition_num().to_lowercase()
|
||||
);
|
||||
let response = client.get(url).send().await?;
|
||||
response.error_for_status_ref()?;
|
||||
let (mut slug, inner) = parse_html(lang, code, response.text().await?)?;
|
||||
if let Some(override_slug) = override_slug {
|
||||
slug = override_slug.into()
|
||||
}
|
||||
Ok(CardInfo { slug, inner })
|
||||
}
|
||||
|
||||
fn base_url(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "https://www.pokemon.com/el/jcc-pokemon/cartas-pokemon/series",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_html(lang: Language, code: EditionCode, html: String) -> Result<(String, InnerCardInfo)> {
|
||||
let html = Html::parse_document(&html);
|
||||
let card = html
|
||||
.select(&selector("div.full-card-information")?)
|
||||
.next()
|
||||
.ok_or(anyhow!("Couldn't find card info"))?;
|
||||
let name = card
|
||||
.select(&selector("h1")?)
|
||||
.next()
|
||||
.ok_or(anyhow!("Failed to get card name"))?
|
||||
.inner_html();
|
||||
let (kind, card_type) = parse_card_type(
|
||||
lang,
|
||||
card.select(&selector("div.card-type > h2")?)
|
||||
.next()
|
||||
.ok_or(anyhow!("Failed to get card type"))?
|
||||
.inner_html(),
|
||||
)?;
|
||||
let slug = match kind {
|
||||
CardKind::Pokemon => slugify_pokemon(lang, code, &name),
|
||||
CardKind::Trainer | CardKind::Energy => slugify_unique(lang, &name),
|
||||
};
|
||||
let specific_info = match kind {
|
||||
CardKind::Pokemon => SpecificInfo::PokemonInfo {},
|
||||
CardKind::Trainer => {
|
||||
let effect = card
|
||||
.select(&selector("div.ability > pre > p")?)
|
||||
.map(|e| e.inner_html())
|
||||
.collect();
|
||||
SpecificInfo::TrainerInfo { effect }
|
||||
}
|
||||
CardKind::Energy => SpecificInfo::EnergyInfo {},
|
||||
};
|
||||
|
||||
Ok((
|
||||
slug,
|
||||
InnerCardInfo {
|
||||
name,
|
||||
kind,
|
||||
card_type,
|
||||
acespec: false,
|
||||
tagteam: false,
|
||||
future: false,
|
||||
ancient: false,
|
||||
specific_info,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn selector(sel: &str) -> Result<Selector> {
|
||||
Selector::parse(sel).map_err(|_| anyhow!("failed to parse selector"))
|
||||
}
|
||||
|
||||
fn parse_card_type(lang: Language, text: String) -> Result<(CardKind, CardType)> {
|
||||
let kind = if text.contains(trainer_pattern(lang)) || text.contains(tool_pattern(lang)) {
|
||||
Ok(CardKind::Trainer)
|
||||
} else if text.contains("Pokémon") {
|
||||
Ok(CardKind::Pokemon)
|
||||
} else if text.contains(energy_pattern(lang)) {
|
||||
Ok(CardKind::Energy)
|
||||
} else {
|
||||
Err(anyhow!(
|
||||
"Failed to get card kind (Pokemon, Trainer or Energy)"
|
||||
))
|
||||
}?;
|
||||
|
||||
let card_type = match kind {
|
||||
CardKind::Pokemon => {
|
||||
if text.contains(basic_pattern(lang)) {
|
||||
Ok(CardType::Basic)
|
||||
} else if text.contains(stage1_pattern(lang)) {
|
||||
Ok(CardType::Stage1)
|
||||
} else if text.contains(stage2_pattern(lang)) {
|
||||
Ok(CardType::Stage2)
|
||||
} else {
|
||||
Err(anyhow!("Failed to get Pokemon type: {text}"))
|
||||
}
|
||||
}
|
||||
CardKind::Trainer => {
|
||||
if text.contains(item_pattern(lang)) {
|
||||
Ok(CardType::Item)
|
||||
} else if text.contains(tool_pattern(lang)) {
|
||||
Ok(CardType::Tool)
|
||||
} else if text.contains(stadium_pattern(lang)) {
|
||||
Ok(CardType::Stadium)
|
||||
} else if text.contains(supporter_pattern(lang)) {
|
||||
Ok(CardType::Supporter)
|
||||
} else {
|
||||
Err(anyhow!("Failed to get Trainer type"))
|
||||
}
|
||||
}
|
||||
CardKind::Energy => {
|
||||
if text.contains(basic_pattern(lang)) {
|
||||
Ok(CardType::Basic)
|
||||
} else if text.contains(special_pattern(lang)) {
|
||||
Ok(CardType::Special)
|
||||
} else {
|
||||
Err(anyhow!("Failed to get Pokemon type"))
|
||||
}
|
||||
}
|
||||
}?;
|
||||
|
||||
Ok((kind, card_type))
|
||||
}
|
||||
|
||||
fn trainer_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Entrenador",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn energy_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Energía",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn basic_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Básic",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stage1_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Fase 1",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stage2_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Fase 2",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn item_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Objeto",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Herramienta",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn supporter_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Partidario",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stadium_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Estadio",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn special_pattern(lang: Language) -> &'static str {
|
||||
match lang {
|
||||
Language::Es => "Especial",
|
||||
Language::En => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn slugify_pokemon(lang: Language, code: EditionCode, name: &str) -> String {
|
||||
format!("{}-{code}-{lang}", slugify(name))
|
||||
}
|
||||
|
||||
fn slugify_unique(lang: Language, name: &str) -> String {
|
||||
format!("{}-{lang}", slugify(name))
|
||||
}
|
||||
|
||||
fn slugify(name: &str) -> String {
|
||||
name.to_lowercase()
|
||||
.replace("'s", "")
|
||||
.replace(" ", "-")
|
||||
.replace("á", "a")
|
||||
.replace("é", "e")
|
||||
.replace("í", "i")
|
||||
.replace("ó", "o")
|
||||
.replace("ú", "u")
|
||||
}
|
||||
3
src/downloader/mod.rs
Normal file
3
src/downloader/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
//! Data downloaders
|
||||
|
||||
pub mod card_info;
|
||||
98
src/editions.rs
Normal file
98
src/editions.rs
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
//! Editions information
|
||||
|
||||
use strum::{Display, EnumString};
|
||||
|
||||
pub enum EditionBlock {
|
||||
Bw,
|
||||
Xy,
|
||||
Sm,
|
||||
Ssh,
|
||||
Sv,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Display, Debug, Hash, PartialEq, Eq, EnumString)]
|
||||
#[strum(ascii_case_insensitive, serialize_all = "lowercase")]
|
||||
pub enum EditionCode {
|
||||
/// Sword and Shield
|
||||
Ssh,
|
||||
/// Scarlet and Violer
|
||||
Svi,
|
||||
/// Paldea Evolved
|
||||
Pal,
|
||||
///Obsidian Flames
|
||||
Obf,
|
||||
/// 151
|
||||
Mew,
|
||||
/// Paradox Rift
|
||||
Par,
|
||||
/// Paldean Fates
|
||||
Paf,
|
||||
/// Temporal Forces
|
||||
Tef,
|
||||
/// Twilight Masquerade
|
||||
Twm,
|
||||
/// Shrouded Fable
|
||||
Sfa,
|
||||
/// Stellar Crown
|
||||
Scr,
|
||||
/// Surging Sparks
|
||||
Ssp,
|
||||
/// Prismatic Evolutions
|
||||
Pre,
|
||||
}
|
||||
|
||||
impl EditionCode {
|
||||
pub fn edition_num(self) -> &'static str {
|
||||
match self {
|
||||
EditionCode::Ssh => "SWSH1",
|
||||
EditionCode::Svi => "SV01",
|
||||
EditionCode::Pal => "SV02",
|
||||
EditionCode::Obf => "SV03",
|
||||
EditionCode::Mew => "SV3_pt5",
|
||||
EditionCode::Par => "SV04",
|
||||
EditionCode::Paf => "SV4pt5",
|
||||
EditionCode::Tef => "SV05",
|
||||
EditionCode::Twm => "SV06",
|
||||
EditionCode::Sfa => "SV6pt5",
|
||||
EditionCode::Scr => "SV07",
|
||||
EditionCode::Ssp => "SV08",
|
||||
EditionCode::Pre => "SV8pt5",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn edition_slug(self) -> &'static str {
|
||||
match self {
|
||||
EditionCode::Ssh => "sword-shield",
|
||||
EditionCode::Svi => "scarlet-violet",
|
||||
EditionCode::Pal => "paldea-evolved",
|
||||
EditionCode::Obf => "obsidian-flames",
|
||||
EditionCode::Mew => "151",
|
||||
EditionCode::Par => "paradox-rift",
|
||||
EditionCode::Paf => "paldean-fates",
|
||||
EditionCode::Tef => "temporal-forces",
|
||||
EditionCode::Twm => "twilight-masquerade",
|
||||
EditionCode::Sfa => "shrouded-fable",
|
||||
EditionCode::Scr => "stellar-crown",
|
||||
EditionCode::Ssp => "surging-sparks",
|
||||
EditionCode::Pre => "prismatic-evolutions",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn block(self) -> EditionBlock {
|
||||
match self {
|
||||
EditionCode::Ssh => EditionBlock::Ssh,
|
||||
EditionCode::Svi
|
||||
| EditionCode::Pal
|
||||
| EditionCode::Obf
|
||||
| EditionCode::Mew
|
||||
| EditionCode::Par
|
||||
| EditionCode::Paf
|
||||
| EditionCode::Tef
|
||||
| EditionCode::Twm
|
||||
| EditionCode::Sfa
|
||||
| EditionCode::Scr
|
||||
| EditionCode::Ssp
|
||||
| EditionCode::Pre => EditionBlock::Sv,
|
||||
}
|
||||
}
|
||||
}
|
||||
10
src/lang.rs
Normal file
10
src/lang.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
//! Language settings
|
||||
|
||||
use strum::{Display, EnumString};
|
||||
|
||||
#[derive(Clone, Copy, Display, EnumString)]
|
||||
#[strum(serialize_all = "lowercase")]
|
||||
pub enum Language {
|
||||
Es,
|
||||
En,
|
||||
}
|
||||
30
src/main.rs
Normal file
30
src/main.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Parser;
|
||||
|
||||
pub mod card;
|
||||
pub mod cli;
|
||||
pub mod downloader;
|
||||
pub mod editions;
|
||||
pub mod lang;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let args = cli::Args::parse();
|
||||
let client = reqwest::Client::new();
|
||||
let edition =
|
||||
editions::EditionCode::from_str(&args.code).context("Couldn't parse edition code")?;
|
||||
let number = args.number;
|
||||
let slug = args.slug.as_deref();
|
||||
let card_info = downloader::card_info::download_card_info(
|
||||
client.clone(),
|
||||
lang::Language::Es,
|
||||
edition,
|
||||
number,
|
||||
slug,
|
||||
)
|
||||
.await?;
|
||||
println!("{card_info:?}");
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue