Compare commits

..

8 commits
main ... tools

18 changed files with 1990 additions and 997 deletions

2115
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,12 +1,42 @@
[package] [package]
name = "ptcg-scrap" name = "ptcg-tools"
version = "0.1.0" version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
anyhow = "1.0.97" anyhow = "1.0.100"
clap = { version = "4.5.35", features = ["derive"] } camino = "1.2.2"
reqwest = { version = "0.12.15", default-features = false, features = ["http2", "rustls-tls"] } clap = { version = "4.5.53", features = ["derive"] }
scraper = "0.23.1" directories = "6.0.0"
strum = { version = "0.27.1", features = ["derive"] } fluent-templates = "0.13.2"
tokio = { version = "1.44.1", default-features = false, features = ["fs", "rt-multi-thread", "macros"] } futures = { version = "0.3.31", default-features = false }
parquet = { version = "57.1.0", default-features = false, features = ["arrow", "async", "simdutf8", "snap"] }
reqwest = { version = "0.12.28", default-features = false, features = [
"brotli",
"http2",
"gzip",
"json",
"rustls-tls-native-roots",
"stream",
] }
serde = { version = "1.0.228", default-features = false, features = [
"derive",
"std",
] }
serde_json = "1.0.148"
strum = { version = "0.27.2", features = ["derive"] }
tokio = { version = "1.48.0", default-features = false, features = [
"fs",
"rt-multi-thread",
"macros",
] }
tokio-util = { version = "0.7.17", default-features = false, features = ["io"] }
tokio-stream = { version = "0.1.17", default-features = false }
tracing = "0.1.44"
tracing-subscriber = { version = "0.3.22", default-features = false, features = [
"ansi",
"env-filter",
"fmt",
"tracing",
"tracing-log",
] }

View file

@ -1 +1 @@
## PTCG Scrapper ## PTCG Tools

80
cliff.toml Normal file
View file

@ -0,0 +1,80 @@
[changelog]
# changelog header
header = """
# Changelog\n
"""
# template for the changelog body
# https://tera.netlify.app/docs
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
# remove the leading and trailing whitespace from the template
trim = true
# changelog footer
footer = """
"""
# postprocessors
postprocessors = [
{ pattern = '<REPO>', replace = "https://oolong.ludwig.dog/pitbuster/ptcg-tools" },
]
[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))" }, # replace issue numbers
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^feat", group = "Features" },
{ message = "^fix", group = "Bug Fixes" },
{ message = "^doc", group = "Documentation" },
{ message = "^chore(docs)", group = "Documentation" },
{ message = "^perf", group = "Performance" },
{ message = "^refactor", group = "Refactor" },
{ message = "^style", group = "Styling" },
{ message = "^test", group = "Testing" },
{ message = "^release:", skip = true },
{ message = "^chore\\(release\\):", skip = true },
{ message = "^chore\\(changelog\\):", skip = true },
{ message = "^chore\\(deps\\)", skip = true },
{ message = "^chore\\(pr\\)", skip = true },
{ message = "^chore\\(pull\\)", skip = true },
{ message = "^chore|ci", group = "Miscellaneous Tasks" },
{ body = ".*security", group = "Security" },
{ message = "^revert", group = "Revert" },
]
# extract external references
link_parsers = [
{ pattern = "#(\\d+)", href = "https://oolong.ludwig.dog/pitbuster/ptcg-tools/issues/$1" },
]
# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false
# filter out the commits that are not matched by commit parsers
filter_commits = false
# glob pattern for matching git tags
tag_pattern = "[0-9]*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"
# limit the number of commits included in the changelog.
# limit_commits = 42

View file

@ -1,45 +0,0 @@
//! Card info
#[derive(Debug)]
pub struct CardInfo {
pub slug: String,
pub inner: InnerCardInfo,
}
#[derive(Debug)]
pub struct InnerCardInfo {
pub name: String,
pub kind: CardKind,
pub card_type: CardType,
pub acespec: bool,
pub tagteam: bool,
pub future: bool,
pub ancient: bool,
pub specific_info: SpecificInfo,
}
#[derive(Debug)]
pub enum CardKind {
Pokemon,
Trainer,
Energy,
}
#[derive(Debug)]
pub enum CardType {
Basic,
Stage1,
Stage2,
Item,
Tool,
Supporter,
Stadium,
Special,
}
#[derive(Debug)]
pub enum SpecificInfo {
PokemonInfo {},
TrainerInfo { effect: Vec<String> },
EnergyInfo {},
}

View file

@ -1,15 +1,26 @@
//! CLI parameters //! CLI parameters
use clap::Parser; use std::str::FromStr;
use clap::{Parser, Subcommand};
use crate::lang::Language;
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
pub struct Args { pub struct Args {
/// Edition code #[command(subcommand)]
pub code: String, pub command: Command,
/// Card number within the edition }
pub number: u8,
///Override the slug for the card #[derive(Debug, Subcommand, PartialEq)]
#[arg(short, long)] pub enum Command {
pub slug: Option<String>, /// Downloads the card data
DownloadData {
/// Language to download the data in
#[arg(short, value_parser=<Language as FromStr>::from_str)]
lang: Language,
},
/// Terminal User Interface
Tui,
} }

4
src/constants.rs Normal file
View file

@ -0,0 +1,4 @@
//! Application wide constants.
pub const APP_NAME: &str = "ptcg-tools";
pub const SNAKE_CASE_APP_NAME: &str = "ptcg_tools";

30
src/data_store/mod.rs Normal file
View file

@ -0,0 +1,30 @@
//! Local data store
use anyhow::Result;
use camino::Utf8PathBuf;
use parquet::arrow::AsyncArrowWriter;
use tracing::debug;
use crate::{directories::data_cache_directory, malie::models::Index};
pub struct Store {
data_cache_directory: Utf8PathBuf,
}
impl Store {
pub async fn new() -> Result<Self> {
Ok(Self {
data_cache_directory: data_cache_directory().await?,
})
}
pub async fn write_index(&self, index: Index) -> Result<()> {
let path = self.data_cache_directory.join("ptcgl_index.parquet");
if let Ok(true) = tokio::fs::try_exists(&path).await {
debug!("File {path} already exists, skipping.");
return Ok(());
}
// let mut writer = AsyncArrowWriter::try_new(writer, arrow_schema, props)
Ok(())
}
}

47
src/directories.rs Normal file
View file

@ -0,0 +1,47 @@
//! User directories handling
use anyhow::{Result, anyhow};
use camino::Utf8PathBuf;
use directories::ProjectDirs;
use crate::constants::APP_NAME;
/// Returns the path to the user data directory.
///
/// Post condition: this function ensures the directory is already created when returning.
pub async fn data_directory() -> Result<Utf8PathBuf> {
let user_directory = ProjectDirs::from("cl", "bstr", APP_NAME)
.ok_or_else(|| anyhow!("failed to get ProjectDirs"))?
.data_dir()
.to_path_buf();
let user_directory = Utf8PathBuf::try_from(user_directory)?;
tokio::fs::create_dir_all(&user_directory).await?;
Ok(user_directory)
}
/// Returns the path to the user data cache directory.
///
/// Post condition: this function ensures the directory is already created when returning.
pub async fn data_cache_directory() -> Result<Utf8PathBuf> {
let user_directory = ProjectDirs::from("cl", "bstr", APP_NAME)
.ok_or_else(|| anyhow!("failed to get ProjectDirs"))?
.cache_dir()
.to_path_buf();
let user_directory = Utf8PathBuf::try_from(user_directory)?;
let user_directory = user_directory.join("data");
tokio::fs::create_dir_all(&user_directory).await?;
Ok(user_directory)
}
/// Returns the path to the user data cache directory.
///
/// Post condition: this function ensures the directory is already created when returning.
pub async fn image_cache_directory() -> Result<Utf8PathBuf> {
let user_directory = ProjectDirs::from("cl", "bstr", APP_NAME)
.ok_or_else(|| anyhow!("failed to get ProjectDirs"))?
.cache_dir()
.to_path_buf();
let user_directory = Utf8PathBuf::try_from(user_directory)?;
let user_directory = user_directory.join("data");
tokio::fs::create_dir_all(&user_directory).await?;
Ok(user_directory)
}

View file

@ -1,231 +0,0 @@
//! Download card information.
use anyhow::{Result, anyhow};
use reqwest::Client;
use scraper::{Html, Selector};
use crate::card::{CardInfo, CardKind, CardType, InnerCardInfo, SpecificInfo};
use crate::editions::EditionCode;
use crate::lang::Language;
pub async fn download_card_info(
client: Client,
lang: Language,
code: EditionCode,
number: u8,
override_slug: Option<&str>,
) -> Result<CardInfo> {
let url = format!(
"{}/{}/{number}/",
base_url(lang),
code.edition_num().to_lowercase()
);
let response = client.get(url).send().await?;
response.error_for_status_ref()?;
let (mut slug, inner) = parse_html(lang, code, response.text().await?)?;
if let Some(override_slug) = override_slug {
slug = override_slug.into()
}
Ok(CardInfo { slug, inner })
}
fn base_url(lang: Language) -> &'static str {
match lang {
Language::Es => "https://www.pokemon.com/el/jcc-pokemon/cartas-pokemon/series",
Language::En => todo!(),
}
}
fn parse_html(lang: Language, code: EditionCode, html: String) -> Result<(String, InnerCardInfo)> {
let html = Html::parse_document(&html);
let card = html
.select(&selector("div.full-card-information")?)
.next()
.ok_or(anyhow!("Couldn't find card info"))?;
let name = card
.select(&selector("h1")?)
.next()
.ok_or(anyhow!("Failed to get card name"))?
.inner_html();
let (kind, card_type) = parse_card_type(
lang,
card.select(&selector("div.card-type > h2")?)
.next()
.ok_or(anyhow!("Failed to get card type"))?
.inner_html(),
)?;
let slug = match kind {
CardKind::Pokemon => slugify_pokemon(lang, code, &name),
CardKind::Trainer | CardKind::Energy => slugify_unique(lang, &name),
};
let specific_info = match kind {
CardKind::Pokemon => SpecificInfo::PokemonInfo {},
CardKind::Trainer => {
let effect = card
.select(&selector("div.ability > pre > p")?)
.map(|e| e.inner_html())
.collect();
SpecificInfo::TrainerInfo { effect }
}
CardKind::Energy => SpecificInfo::EnergyInfo {},
};
Ok((
slug,
InnerCardInfo {
name,
kind,
card_type,
acespec: false,
tagteam: false,
future: false,
ancient: false,
specific_info,
},
))
}
fn selector(sel: &str) -> Result<Selector> {
Selector::parse(sel).map_err(|_| anyhow!("failed to parse selector"))
}
fn parse_card_type(lang: Language, text: String) -> Result<(CardKind, CardType)> {
let kind = if text.contains(trainer_pattern(lang)) || text.contains(tool_pattern(lang)) {
Ok(CardKind::Trainer)
} else if text.contains("Pokémon") {
Ok(CardKind::Pokemon)
} else if text.contains(energy_pattern(lang)) {
Ok(CardKind::Energy)
} else {
Err(anyhow!(
"Failed to get card kind (Pokemon, Trainer or Energy)"
))
}?;
let card_type = match kind {
CardKind::Pokemon => {
if text.contains(basic_pattern(lang)) {
Ok(CardType::Basic)
} else if text.contains(stage1_pattern(lang)) {
Ok(CardType::Stage1)
} else if text.contains(stage2_pattern(lang)) {
Ok(CardType::Stage2)
} else {
Err(anyhow!("Failed to get Pokemon type: {text}"))
}
}
CardKind::Trainer => {
if text.contains(item_pattern(lang)) {
Ok(CardType::Item)
} else if text.contains(tool_pattern(lang)) {
Ok(CardType::Tool)
} else if text.contains(stadium_pattern(lang)) {
Ok(CardType::Stadium)
} else if text.contains(supporter_pattern(lang)) {
Ok(CardType::Supporter)
} else {
Err(anyhow!("Failed to get Trainer type"))
}
}
CardKind::Energy => {
if text.contains(basic_pattern(lang)) {
Ok(CardType::Basic)
} else if text.contains(special_pattern(lang)) {
Ok(CardType::Special)
} else {
Err(anyhow!("Failed to get Pokemon type"))
}
}
}?;
Ok((kind, card_type))
}
fn trainer_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Entrenador",
Language::En => todo!(),
}
}
fn energy_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Energía",
Language::En => todo!(),
}
}
fn basic_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Básic",
Language::En => todo!(),
}
}
fn stage1_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Fase 1",
Language::En => todo!(),
}
}
fn stage2_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Fase 2",
Language::En => todo!(),
}
}
fn item_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Objeto",
Language::En => todo!(),
}
}
fn tool_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Herramienta",
Language::En => todo!(),
}
}
fn supporter_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Partidario",
Language::En => todo!(),
}
}
fn stadium_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Estadio",
Language::En => todo!(),
}
}
fn special_pattern(lang: Language) -> &'static str {
match lang {
Language::Es => "Especial",
Language::En => todo!(),
}
}
fn slugify_pokemon(lang: Language, code: EditionCode, name: &str) -> String {
format!("{}-{code}-{lang}", slugify(name))
}
fn slugify_unique(lang: Language, name: &str) -> String {
format!("{}-{lang}", slugify(name))
}
fn slugify(name: &str) -> String {
name.to_lowercase()
.replace("'s", "")
.replace(" ", "-")
.replace("á", "a")
.replace("é", "e")
.replace("í", "i")
.replace("ó", "o")
.replace("ú", "u")
}

View file

@ -1,3 +0,0 @@
//! Data downloaders
pub mod card_info;

View file

@ -1,5 +1,6 @@
//! Editions information //! Editions information
use serde::Deserialize;
use strum::{Display, EnumString}; use strum::{Display, EnumString};
pub enum EditionBlock { pub enum EditionBlock {
@ -8,14 +9,19 @@ pub enum EditionBlock {
Sm, Sm,
Ssh, Ssh,
Sv, Sv,
Meg,
} }
#[derive(Clone, Copy, Display, Debug, Hash, PartialEq, Eq, EnumString)] #[derive(Clone, Copy, Display, Debug, Hash, PartialEq, Eq, EnumString, Deserialize)]
#[strum(ascii_case_insensitive, serialize_all = "lowercase")] #[strum(ascii_case_insensitive, serialize_all = "lowercase")]
#[serde(rename_all = "UPPERCASE")]
pub enum EditionCode { pub enum EditionCode {
/// Sword and Shield /// Sword and Shield
Ssh, Ssh,
/// Scarlet and Violer /// SV Promos
#[serde(alias = "PR-SV")]
Svp,
/// Scarlet and Violet
Svi, Svi,
/// Paldea Evolved /// Paldea Evolved
Pal, Pal,
@ -39,12 +45,27 @@ pub enum EditionCode {
Ssp, Ssp,
/// Prismatic Evolutions /// Prismatic Evolutions
Pre, Pre,
/// Journey Together
Jtg,
/// Destined Rivals
Dri,
/// Black Bolt
Blk,
/// White Flare
Wht,
/// Mega Evolution Promos
Mep,
/// Mega Evolution
Meg,
/// Phantasmal Flames
Pfl,
} }
impl EditionCode { impl EditionCode {
pub fn edition_num(self) -> &'static str { pub fn edition_num(self) -> &'static str {
match self { match self {
EditionCode::Ssh => "SWSH1", EditionCode::Ssh => "SWSH1",
EditionCode::Svp => "SVP",
EditionCode::Svi => "SV01", EditionCode::Svi => "SV01",
EditionCode::Pal => "SV02", EditionCode::Pal => "SV02",
EditionCode::Obf => "SV03", EditionCode::Obf => "SV03",
@ -57,6 +78,13 @@ impl EditionCode {
EditionCode::Scr => "SV07", EditionCode::Scr => "SV07",
EditionCode::Ssp => "SV08", EditionCode::Ssp => "SV08",
EditionCode::Pre => "SV8pt5", EditionCode::Pre => "SV8pt5",
EditionCode::Jtg => "SV9",
EditionCode::Dri => "SV10",
EditionCode::Blk => "SV10pt5ZSV",
EditionCode::Wht => "SV10pt5RSV",
EditionCode::Mep => "MEP",
EditionCode::Meg => "MEG1",
EditionCode::Pfl => "MEG2",
} }
} }
@ -64,6 +92,7 @@ impl EditionCode {
match self { match self {
EditionCode::Ssh => "sword-shield", EditionCode::Ssh => "sword-shield",
EditionCode::Svi => "scarlet-violet", EditionCode::Svi => "scarlet-violet",
EditionCode::Svp => "scarlet-violet-promos",
EditionCode::Pal => "paldea-evolved", EditionCode::Pal => "paldea-evolved",
EditionCode::Obf => "obsidian-flames", EditionCode::Obf => "obsidian-flames",
EditionCode::Mew => "151", EditionCode::Mew => "151",
@ -75,6 +104,12 @@ impl EditionCode {
EditionCode::Scr => "stellar-crown", EditionCode::Scr => "stellar-crown",
EditionCode::Ssp => "surging-sparks", EditionCode::Ssp => "surging-sparks",
EditionCode::Pre => "prismatic-evolutions", EditionCode::Pre => "prismatic-evolutions",
EditionCode::Jtg => "journey-together",
EditionCode::Dri => "destined-rivals",
EditionCode::Blk | EditionCode::Wht => "black-white",
EditionCode::Meg => "mega-evolution",
EditionCode::Mep => "mega-evolution-promos",
EditionCode::Pfl => "phantasmal-flames",
} }
} }
@ -82,6 +117,7 @@ impl EditionCode {
match self { match self {
EditionCode::Ssh => EditionBlock::Ssh, EditionCode::Ssh => EditionBlock::Ssh,
EditionCode::Svi EditionCode::Svi
| EditionCode::Svp
| EditionCode::Pal | EditionCode::Pal
| EditionCode::Obf | EditionCode::Obf
| EditionCode::Mew | EditionCode::Mew
@ -92,7 +128,12 @@ impl EditionCode {
| EditionCode::Sfa | EditionCode::Sfa
| EditionCode::Scr | EditionCode::Scr
| EditionCode::Ssp | EditionCode::Ssp
| EditionCode::Pre => EditionBlock::Sv, | EditionCode::Pre
| EditionCode::Jtg
| EditionCode::Dri
| EditionCode::Blk
| EditionCode::Wht => EditionBlock::Sv,
EditionCode::Meg | EditionCode::Mep | EditionCode::Pfl => EditionBlock::Meg,
} }
} }
} }

View file

@ -2,9 +2,30 @@
use strum::{Display, EnumString}; use strum::{Display, EnumString};
#[derive(Clone, Copy, Display, EnumString)] use crate::malie::models::Lang;
#[derive(Clone, Copy, Debug, Display, EnumString, PartialEq)]
#[strum(serialize_all = "lowercase")] #[strum(serialize_all = "lowercase")]
pub enum Language { pub enum Language {
Es, De,
En, En,
Es,
EsLa,
It,
Fr,
Pt,
}
impl From<Language> for Lang {
fn from(value: Language) -> Self {
match value {
Language::De => Lang::De,
Language::En => Lang::En,
Language::Es => Lang::Es,
Language::EsLa => Lang::EsLa,
Language::It => Lang::It,
Language::Fr => Lang::Fr,
Language::Pt => Lang::Pt,
}
}
} }

49
src/logging.rs Normal file
View file

@ -0,0 +1,49 @@
use anyhow::{Context, Result};
use tracing_subscriber::Layer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use crate::constants::{APP_NAME, SNAKE_CASE_APP_NAME};
use crate::directories::data_directory;
pub enum LogMode {
File,
Print,
}
/// Sets up logging for the application.
pub async fn initialize_logging(mode: LogMode) -> Result<()> {
match mode {
LogMode::File => initialize_file_logging().await,
LogMode::Print => initialize_print_logging(),
}
}
async fn initialize_file_logging() -> Result<()> {
let user_directory = data_directory()
.await
.context("While initializing logging")?;
let log_path = user_directory.join(format!("{APP_NAME}.log"));
let log_file = std::fs::File::create(log_path)?;
let file_subscriber = tracing_subscriber::fmt::layer()
.with_file(true)
.with_line_number(true)
.with_writer(log_file)
.with_target(false)
.with_ansi(false)
.with_filter(tracing_subscriber::filter::EnvFilter::from(format!(
"{SNAKE_CASE_APP_NAME}=debug"
)));
tracing_subscriber::registry().with(file_subscriber).init();
Ok(())
}
fn initialize_print_logging() -> Result<()> {
tracing_subscriber::registry()
.with(tracing_subscriber::filter::EnvFilter::from(format!(
"{SNAKE_CASE_APP_NAME}=debug"
)))
.with(tracing_subscriber::fmt::layer())
.init();
Ok(())
}

View file

@ -1,30 +1,35 @@
use std::str::FromStr; use anyhow::Result;
use anyhow::{Context, Result};
use clap::Parser; use clap::Parser;
pub mod card; use crate::lang::Language;
pub mod cli; pub mod cli;
pub mod downloader; pub mod constants;
pub mod data_store;
pub mod directories;
pub mod editions; pub mod editions;
pub mod lang; pub mod lang;
pub mod logging;
pub mod malie;
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
let args = cli::Args::parse(); let args = cli::Args::parse();
let client = reqwest::Client::new(); let log_mode = if args.command == cli::Command::Tui {
let edition = logging::LogMode::File
editions::EditionCode::from_str(&args.code).context("Couldn't parse edition code")?; } else {
let number = args.number; logging::LogMode::Print
let slug = args.slug.as_deref(); };
let card_info = downloader::card_info::download_card_info( logging::initialize_logging(log_mode).await?;
client.clone(), match args.command {
lang::Language::Es, cli::Command::DownloadData { lang } => download_data(lang).await?,
edition, cli::Command::Tui => todo!(),
number, }
slug, Ok(())
) }
.await?;
println!("{card_info:?}"); async fn download_data(lang: Language) -> Result<()> {
let client = malie::client::Client::new().await?;
client.download_all_data(lang).await?;
Ok(()) Ok(())
} }

114
src/malie/client.rs Normal file
View file

@ -0,0 +1,114 @@
//! Client to download data from malie.io
use anyhow::{Context, Result, anyhow};
use camino::Utf8PathBuf;
use futures::future::try_join_all;
use tokio::fs::File;
use tokio_stream::StreamExt;
use tokio_util::io::StreamReader;
use tracing::{debug, info};
use super::models::{Index, RawIndex};
use crate::data_store;
use crate::directories::data_cache_directory;
use crate::lang::Language;
use crate::malie::models::{Lang, filter_invalid_editions};
/// Client to download data from mallie.io
pub struct Client {
client: reqwest::Client,
data_cache_directory: Utf8PathBuf,
}
const TCGL_BASE_URL: &str = "https://cdn.malie.io/file/malie-io/tcgl/export";
impl Client {
/// Create a new `Client`
pub async fn new() -> Result<Self> {
Ok(Self {
client: reqwest::Client::new(),
data_cache_directory: data_cache_directory().await?,
})
}
pub async fn download_all_data(&self, lang: Language) -> Result<()> {
let lang: Lang = lang.into();
let data_store = data_store::Store::new().await?;
self.download_tcgl_index_json().await?;
let index = self.load_tcgl_index().await?;
data_store.write_index(index.clone()).await?;
let edition_downloads = index.into_iter().filter_map(|edition| {
if edition.lang == lang {
Some(self.download_tcgl_edition_json(edition.path))
} else {
None
}
});
try_join_all(edition_downloads).await?;
Ok(())
}
pub async fn download_tcgl_index_json(&self) -> Result<()> {
let file_path = self.data_cache_directory.join("tcgl_index.json");
let url = format!("{TCGL_BASE_URL}/index.json");
self.download_if_not_exists(file_path, &url).await?;
Ok(())
}
pub async fn download_tcgl_edition_json(&self, url_path: String) -> Result<()> {
let file_path = self.data_cache_directory.join(&url_path);
let url = format!("{TCGL_BASE_URL}/{url_path}");
self.download_if_not_exists(file_path, &url).await?;
Ok(())
}
async fn load_tcgl_index(&self) -> Result<Index> {
let file_path = self.data_cache_directory.join("tcgl_index.json");
let index = tokio::fs::read_to_string(&file_path)
.await
.with_context(|| format!("Failed to read {file_path}"))?;
let index: RawIndex =
serde_json::from_str(&index).with_context(|| format!("Couldn't parse {file_path}"))?;
let index = filter_invalid_editions(index);
Ok(index)
}
async fn download_if_not_exists(&self, file_path: Utf8PathBuf, url: &str) -> Result<()> {
if let Ok(true) = tokio::fs::try_exists(&file_path).await {
debug!("Found {}, skipping download", &file_path);
return Ok(());
}
if let Some(p) = file_path.parent() {
tokio::fs::create_dir_all(p).await?;
}
let response = self.client.get(url).send().await?;
if !response.status().is_success() {
return Err(anyhow!(
"Error {} when downloading: {}",
response.status(),
url
));
}
let mut file = File::create_new(&file_path)
.await
.with_context(|| format!("Couldn't create file {file_path}"))?;
tokio::io::copy_buf(
&mut StreamReader::new(
response
.bytes_stream()
.map(|result| result.map_err(std::io::Error::other)),
),
&mut file,
)
.await
.with_context(|| format!("While writing to file {file_path}"))?;
file.sync_all().await?;
info!("Downloaded {file_path} from {url}");
Ok(())
}
}

4
src/malie/mod.rs Normal file
View file

@ -0,0 +1,4 @@
//! Module to interact with the PTCG data from malie.io
pub mod client;
pub mod models;

77
src/malie/models.rs Normal file
View file

@ -0,0 +1,77 @@
//! Models for malie.io exports
use std::borrow::Cow;
use std::collections::HashMap;
use anyhow::Context;
use serde::{Deserialize, de};
use tracing::warn;
use crate::editions::EditionCode;
pub type RawIndex = HashMap<Lang, HashMap<String, RawEdition>>;
pub type Index = Vec<Edition>;
#[derive(Copy, Clone, Debug, Deserialize, Eq, PartialEq, Hash)]
pub enum Lang {
#[serde(rename = "de-DE")]
De,
#[serde(rename = "en-US")]
En,
#[serde(rename = "es-ES")]
Es,
#[serde(rename = "es-419")]
EsLa,
#[serde(rename = "it-IT")]
It,
#[serde(rename = "fr-FR")]
Fr,
#[serde(rename = "pt-BR")]
Pt,
}
#[derive(Deserialize)]
pub struct RawEdition {
path: String,
#[serde(deserialize_with = "deserialize_edition_code")]
abbr: Option<EditionCode>,
}
#[derive(Debug, Clone)]
pub struct Edition {
pub lang: Lang,
pub path: String,
pub abbr: EditionCode,
}
fn deserialize_edition_code<'de, D>(deserializer: D) -> Result<Option<EditionCode>, D::Error>
where
D: de::Deserializer<'de>,
{
let buf = Cow::<'de, str>::deserialize(deserializer)?;
if buf.is_empty() {
return Ok(None);
}
let result = serde_json::from_str::<EditionCode>(&format!("\"{buf}\""))
.with_context(|| format!("couldn't deserialize edition code {buf}"))
.inspect_err(|e| warn!("{e}"));
Ok(result.ok())
}
pub fn filter_invalid_editions(index: RawIndex) -> Index {
index
.into_iter()
.flat_map(|(lang, v)| {
v.into_values().filter_map(move |e| match e.abbr {
Some(abbr) => Some(Edition {
path: e.path,
abbr,
lang,
}),
None => None,
})
})
.collect()
}