From e3e84be133e8f22440558798a77b3d83c38492c0 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Salinas Date: Sun, 28 Dec 2025 21:20:16 -0300 Subject: [PATCH] download editions for a given language --- Cargo.lock | 528 ++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/cli.rs | 10 +- src/data_store/mod.rs | 2 + src/lang.rs | 25 +- src/main.rs | 8 +- src/malie/client.rs | 32 ++- src/malie/models.rs | 30 ++- 8 files changed, 610 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fc9277..ac83950 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,20 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -32,6 +46,15 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.21" @@ -88,6 +111,117 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "arrow-array" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-buffer" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + +[[package]] +name = "arrow-cast" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-ord", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-ipc" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "flatbuffers", +] + +[[package]] +name = "arrow-ord" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-schema" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" + +[[package]] +name = "arrow-select" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num-traits", +] + [[package]] name = "async-compression" version = "0.4.36" @@ -101,12 +235,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -156,6 +305,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.0" @@ -190,6 +345,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + [[package]] name = "clap" version = "4.5.53" @@ -254,6 +420,26 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -304,6 +490,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "directories" version = "6.0.0" @@ -348,6 +540,16 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +[[package]] +name = "flatbuffers" +version = "25.12.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" +dependencies = [ + "bitflags", + "rustc_version", +] + [[package]] name = "flate2" version = "1.1.5" @@ -449,6 +651,20 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -456,6 +672,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -499,6 +716,7 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", "futures-io", "futures-macro", @@ -569,6 +787,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -683,6 +913,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -811,6 +1065,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "intl-memoizer" version = "0.5.3" @@ -874,12 +1134,75 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + [[package]] name = "libc" version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + [[package]] name = "libredox" version = "0.1.12" @@ -962,6 +1285,44 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -986,6 +1347,53 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parquet" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "bytes", + "chrono", + "futures", + "half", + "hashbrown", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1046,6 +1454,8 @@ dependencies = [ "clap", "directories", "fluent-templates", + "futures", + "parquet", "reqwest", "serde", "serde_json", @@ -1246,6 +1656,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustls" version = "0.23.35" @@ -1358,6 +1777,18 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16c2f82143577edb4921b71ede051dac62ca3c16084e918bf7b40c96ae10eb33" +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -1434,6 +1865,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "slab" version = "0.4.11" @@ -1446,6 +1883,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.6.1" @@ -1564,6 +2007,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1766,6 +2229,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + [[package]] name = "type-map" version = "0.5.1" @@ -1860,6 +2329,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -1994,12 +2469,65 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 0f22737..b467642 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,8 @@ camino = "1.2.2" clap = { version = "4.5.53", features = ["derive"] } directories = "6.0.0" fluent-templates = "0.13.2" +futures = { version = "0.3.31", default-features = false } +parquet = { version = "57.1.0", default-features = false, features = ["arrow", "async", "simdutf8", "snap"] } reqwest = { version = "0.12.28", default-features = false, features = [ "brotli", "http2", diff --git a/src/cli.rs b/src/cli.rs index d759d4b..98681ef 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,7 +1,11 @@ //! CLI parameters +use std::str::FromStr; + use clap::{Parser, Subcommand}; +use crate::lang::Language; + #[derive(Debug, Parser)] #[command(version, about, long_about = None)] pub struct Args { @@ -12,7 +16,11 @@ pub struct Args { #[derive(Debug, Subcommand, PartialEq)] pub enum Command { /// Downloads the card data - DownloadData, + DownloadData { + /// Language to download the data in + #[arg(short, value_parser=::from_str)] + lang: Language, + }, /// Terminal User Interface Tui, } diff --git a/src/data_store/mod.rs b/src/data_store/mod.rs index cdd9e6c..940187b 100644 --- a/src/data_store/mod.rs +++ b/src/data_store/mod.rs @@ -2,6 +2,7 @@ use anyhow::Result; use camino::Utf8PathBuf; +use parquet::arrow::AsyncArrowWriter; use crate::{directories::data_cache_directory, malie::models::Index}; @@ -17,6 +18,7 @@ impl Store { } pub async fn write_index(&self, index: Index) -> Result<()> { + // let mut writer = AsyncArrowWriter::try_new(writer, arrow_schema, props) Ok(()) } } diff --git a/src/lang.rs b/src/lang.rs index 06603a8..865823c 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -2,9 +2,30 @@ use strum::{Display, EnumString}; -#[derive(Clone, Copy, Display, EnumString)] +use crate::malie::models::Lang; + +#[derive(Clone, Copy, Debug, Display, EnumString, PartialEq)] #[strum(serialize_all = "lowercase")] pub enum Language { - Es, + De, En, + Es, + EsLa, + It, + Fr, + Pt, +} + +impl From for Lang { + fn from(value: Language) -> Self { + match value { + Language::De => Lang::De, + Language::En => Lang::En, + Language::Es => Lang::Es, + Language::EsLa => Lang::EsLa, + Language::It => Lang::It, + Language::Fr => Lang::Fr, + Language::Pt => Lang::Pt, + } + } } diff --git a/src/main.rs b/src/main.rs index 09aa84c..56529e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,8 @@ use anyhow::Result; use clap::Parser; +use crate::lang::Language; + pub mod cli; pub mod constants; pub mod data_store; @@ -20,14 +22,14 @@ async fn main() -> Result<()> { }; logging::initialize_logging(log_mode).await?; match args.command { - cli::Command::DownloadData => download_data().await?, + cli::Command::DownloadData { lang } => download_data(lang).await?, cli::Command::Tui => todo!(), } Ok(()) } -async fn download_data() -> Result<()> { +async fn download_data(lang: Language) -> Result<()> { let client = malie::client::Client::new().await?; - client.download_all_data().await?; + client.download_all_data(lang).await?; Ok(()) } diff --git a/src/malie/client.rs b/src/malie/client.rs index 68a4a8f..e8f2de2 100644 --- a/src/malie/client.rs +++ b/src/malie/client.rs @@ -2,15 +2,17 @@ use anyhow::{Context, Result, anyhow}; use camino::Utf8PathBuf; +use futures::future::try_join_all; use tokio::fs::File; use tokio_stream::StreamExt; use tokio_util::io::StreamReader; -use tracing::debug; +use tracing::{debug, info}; use super::models::{Index, RawIndex}; use crate::data_store; use crate::directories::data_cache_directory; -use crate::malie::models::filter_invalid_editions; +use crate::lang::Language; +use crate::malie::models::{Lang, filter_invalid_editions}; /// Client to download data from mallie.io pub struct Client { @@ -29,11 +31,21 @@ impl Client { }) } - pub async fn download_all_data(&self) -> Result<()> { + pub async fn download_all_data(&self, lang: Language) -> Result<()> { + let lang: Lang = lang.into(); let data_store = data_store::Store::new().await?; self.download_tcgl_index_json().await?; let index = self.load_tcgl_index().await?; - data_store.write_index(index).await?; + data_store.write_index(index.clone()).await?; + let edition_downloads = index.into_iter().filter_map(|edition| { + if edition.lang == lang { + Some(self.download_tcgl_edition_json(edition.path)) + } else { + None + } + }); + + try_join_all(edition_downloads).await?; Ok(()) } @@ -44,6 +56,13 @@ impl Client { Ok(()) } + pub async fn download_tcgl_edition_json(&self, url_path: String) -> Result<()> { + let file_path = self.data_cache_directory.join(&url_path); + let url = format!("{TCGL_BASE_URL}/{url_path}"); + self.download_if_not_exists(file_path, &url).await?; + Ok(()) + } + async fn load_tcgl_index(&self) -> Result { let file_path = self.data_cache_directory.join("tcgl_index.json"); let index = tokio::fs::read_to_string(&file_path) @@ -61,6 +80,10 @@ impl Client { return Ok(()); } + if let Some(p) = file_path.parent() { + tokio::fs::create_dir_all(p).await?; + } + let response = self.client.get(url).send().await?; if !response.status().is_success() { return Err(anyhow!( @@ -85,6 +108,7 @@ impl Client { .with_context(|| format!("While writing to file {file_path}"))?; file.sync_all().await?; + info!("Downloaded {file_path} from {url}"); Ok(()) } } diff --git a/src/malie/models.rs b/src/malie/models.rs index 0831402..b84676b 100644 --- a/src/malie/models.rs +++ b/src/malie/models.rs @@ -37,11 +37,11 @@ pub struct RawEdition { abbr: Option, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Edition { - lang: Lang, - path: String, - abbr: EditionCode, + pub lang: Lang, + pub path: String, + pub abbr: EditionCode, } fn deserialize_edition_code<'de, D>(deserializer: D) -> Result, D::Error> @@ -63,19 +63,15 @@ where pub fn filter_invalid_editions(index: RawIndex) -> Index { index .into_iter() - .map(|(lang, v)| { - let lang = lang; - v.into_values() - .map(move |e| match e.abbr { - Some(abbr) => Some(Edition { - path: e.path, - abbr, - lang, - }), - None => None, - }) - .flatten() + .flat_map(|(lang, v)| { + v.into_values().filter_map(move |e| match e.abbr { + Some(abbr) => Some(Edition { + path: e.path, + abbr, + lang, + }), + None => None, + }) }) - .flatten() .collect() }