diff --git a/Cargo.lock b/Cargo.lock index f062610..624f3a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -42,9 +42,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -78,26 +78,26 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", @@ -110,15 +110,15 @@ dependencies = [ [[package]] name = "base64" -version = "0.13.1" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" -version = "0.21.7" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" [[package]] name = "bigdecimal" @@ -139,7 +139,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cexpr", "clang-sys", "itertools", @@ -150,7 +150,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -161,9 +161,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" dependencies = [ "serde", ] @@ -191,9 +191,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58b559fd6448c6e2fd0adb5720cd98a2506594cafa4737ff98c396f3e82f667" +checksum = "0901fc8eb0aca4c83be0106d6f2db17d86a08dfc2c25f0e84464bf381158add6" dependencies = [ "borsh-derive", "cfg_aliases", @@ -201,15 +201,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aadb5b6ccbd078890f6d7003694e33816e6b784358f18e15e7e6d9f065a57cd" +checksum = "51670c3aa053938b0ee3bd67c3817e471e626151131b934038e83c5bf8de48f5" dependencies = [ "once_cell", - "proc-macro-crate 3.1.0", + "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "syn_derive", ] @@ -258,9 +258,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" @@ -295,9 +295,9 @@ checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" [[package]] name = "chrono" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", @@ -327,25 +327,6 @@ dependencies = [ "cc", ] -[[package]] -name = "config" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23738e11972c7643e4ec947840fc463b6a571afcd3e735bdfce7d03c7a784aca" -dependencies = [ - "async-trait", - "json5", - "lazy_static", - "nom", - "pathdiff", - "ron 0.7.1", - "rust-ini 0.18.0", - "serde", - "serde_json", - "toml 0.5.11", - "yaml-rust", -] - [[package]] name = "config" version = "0.14.0" @@ -358,11 +339,11 @@ dependencies = [ "lazy_static", "nom", "pathdiff", - "ron 0.8.1", - "rust-ini 0.19.0", + "ron", + "rust-ini", "serde", "serde_json", - "toml 0.8.11", + "toml", "yaml-rust", ] @@ -580,7 +561,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -591,7 +572,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -614,12 +595,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dlv-list" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" - [[package]] name = "dlv-list" version = "0.5.2" @@ -662,9 +637,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "flate2" @@ -732,7 +707,7 @@ checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -744,7 +719,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -756,7 +731,7 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -821,7 +796,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -889,35 +864,16 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "h2" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31d030e59af851932b72ceebadf4a2b5986dba4c3b99dd2493f8273a0f151943" +checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069" dependencies = [ "bytes", "fnv", "futures-core", "futures-sink", "futures-util", - "http 1.1.0", + "http", "indexmap", "slab", "tokio", @@ -977,17 +933,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.1.0" @@ -999,17 +944,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.0" @@ -1017,7 +951,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", - "http 1.1.0", + "http", ] [[package]] @@ -1028,8 +962,8 @@ checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" dependencies = [ "bytes", "futures-core", - "http 1.1.0", - "http-body 1.0.0", + "http", + "http-body", "pin-project-lite", ] @@ -1045,30 +979,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "hyper" -version = "0.14.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.24", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.2.0" @@ -1078,9 +988,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.2", - "http 1.1.0", - "http-body 1.0.0", + "h2", + "http", + "http-body", "httparse", "httpdate", "itoa", @@ -1092,15 +1002,18 @@ dependencies = [ [[package]] name = "hyper-tls" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", - "hyper 0.14.28", + "http-body-util", + "hyper", + "hyper-util", "native-tls", "tokio", "tokio-native-tls", + "tower-service", ] [[package]] @@ -1112,9 +1025,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", - "http-body 1.0.0", - "hyper 1.2.0", + "http", + "http-body", + "hyper", "pin-project-lite", "socket2", "tokio", @@ -1174,9 +1087,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -1199,9 +1112,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" @@ -1277,9 +1190,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libz-sys" -version = "1.1.15" +version = "1.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" dependencies = [ "cc", "libc", @@ -1326,12 +1239,12 @@ dependencies = [ [[package]] name = "mediawiki" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fba1740cf9170faae4e7e6b74964d9ee198b667a6dc3cc810c941fedd281b7" +checksum = "b41a2d023414add3e2b8fc49729d217bebee27205b1543f9e1562460d5c34e4c" dependencies = [ - "base64 0.21.7", - "config 0.13.4", + "base64 0.22.0", + "config", "futures", "hmac", "nanoid", @@ -1345,9 +1258,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "mime" @@ -1384,27 +1297,27 @@ dependencies = [ [[package]] name = "mysql-common-derive" -version = "0.30.2" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" +checksum = "c60492b5eb751e55b42d716b6b26dceb66767996cd7a5560a842fbf613ca2e92" dependencies = [ "darling", "heck", "num-bigint", - "proc-macro-crate 1.3.1", + "proc-macro-crate", "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "termcolor", "thiserror", ] [[package]] name = "mysql_async" -version = "0.33.0" +version = "0.34.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6750b17ce50f8f112ef1a8394121090d47c596b56a6a17569ca680a9626e2ef2" +checksum = "fbfe87d7e35cb72363326216cc1712b865d8d4f70abf3b2d2e6b251fb6b2f427" dependencies = [ "bytes", "crossbeam", @@ -1437,14 +1350,14 @@ dependencies = [ [[package]] name = "mysql_common" -version = "0.31.0" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef" +checksum = "0ccdc1fe2bb3ef97e07ba4397327ed45509a1e2e499e2f8265243879cbc7313c" dependencies = [ "base64 0.21.7", "bigdecimal", "bindgen", - "bitflags 2.4.2", + "bitflags 2.5.0", "bitvec", "btoi", "byteorder", @@ -1587,7 +1500,7 @@ version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cfg-if", "foreign-types", "libc", @@ -1604,7 +1517,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1615,9 +1528,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.101" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -1625,23 +1538,13 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "ordered-multimap" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" -dependencies = [ - "dlv-list 0.3.0", - "hashbrown 0.12.3", -] - [[package]] name = "ordered-multimap" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ed8acf08e98e744e5384c8bc63ceb0364e68a6854187221c18df61c4797690e" dependencies = [ - "dlv-list 0.5.2", + "dlv-list", "hashbrown 0.13.2", ] @@ -1698,9 +1601,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f8023d0fb78c8e03784ea1c7f3fa36e68a723138990b8d5a47d916b651e7a8" +checksum = "311fb059dee1a7b802f036316d790138c613a4e8b180c822e3925a662e9f0c95" dependencies = [ "memchr", "thiserror", @@ -1709,9 +1612,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d24f72393fd16ab6ac5738bc33cdb6a9aa73f8b902e8fe29cf4e67d7dd1026" +checksum = "f73541b156d32197eecda1a4014d7f868fd2bcb3c550d5386087cfba442bf69c" dependencies = [ "pest", "pest_generator", @@ -1719,22 +1622,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc17e2a6c7d0a492f0158d7a4bd66cc17280308bbaff78d5bef566dca35ab80" +checksum = "c35eeed0a3fab112f75165fdc026b3913f4183133f19b49be773ac9ea966e8bd" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] name = "pest_meta" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934cd7631c050f4674352a6e835d5f6711ffbfb9345c2fc0107155ac495ae293" +checksum = "2adbf29bb9776f28caece835398781ab24435585fe0d4dc1374a61db5accedca" dependencies = [ "once_cell", "pest", @@ -1747,11 +1650,11 @@ version = "0.1.0" dependencies = [ "async-trait", "chrono", - "config 0.14.0", + "config", "futures", "htmlescape", "http-body-util", - "hyper 1.2.0", + "hyper", "hyper-util", "lazy_static", "mysql_async", @@ -1787,14 +1690,14 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -1820,16 +1723,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "proc-macro-crate" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" -dependencies = [ - "once_cell", - "toml_edit 0.19.15", -] - [[package]] name = "proc-macro-crate" version = "3.1.0" @@ -1955,9 +1848,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -1984,9 +1877,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -2007,9 +1900,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rend" @@ -2022,22 +1915,25 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.26" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bf93c4af7a8bb7d879d51cebe797356ff10ae8516ace542b5182d9dcac10b2" +checksum = "2d66674f2b6fb864665eea7a3c1ac4e3dfacd2fda83cf6f935a612e01b0e3338" dependencies = [ "base64 0.21.7", "bytes", "cookie", "cookie_store", "encoding_rs", + "futures-channel", "futures-core", "futures-util", - "h2 0.3.24", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.28", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", "hyper-tls", + "hyper-util", "ipnet", "js-sys", "log", @@ -2091,17 +1987,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "ron" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88073939a61e5b7680558e6be56b419e208420c2adb92be54921fa6b72283f1a" -dependencies = [ - "base64 0.13.1", - "bitflags 1.3.2", - "serde", -] - [[package]] name = "ron" version = "0.8.1" @@ -2109,21 +1994,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64 0.21.7", - "bitflags 2.4.2", + "bitflags 2.5.0", "serde", "serde_derive", ] -[[package]] -name = "rust-ini" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" -dependencies = [ - "cfg-if", - "ordered-multimap 0.4.3", -] - [[package]] name = "rust-ini" version = "0.19.0" @@ -2131,14 +2006,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e2a3bcec1f113553ef1c88aae6c020a369d03d55b58de9869a0908930385091" dependencies = [ "cfg-if", - "ordered-multimap 0.6.0", + "ordered-multimap", ] [[package]] name = "rust_decimal" -version = "1.34.3" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39449a79f45e8da28c57c341891b69a183044b29518bb8f86dbac9df60bb7df" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" dependencies = [ "arrayvec", "borsh", @@ -2164,11 +2039,11 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", @@ -2219,9 +2094,9 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.9.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -2232,9 +2107,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" dependencies = [ "core-foundation-sys", "libc", @@ -2257,14 +2132,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", @@ -2355,9 +2230,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" @@ -2410,9 +2285,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.52" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -2428,7 +2303,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2502,7 +2377,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2572,9 +2447,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -2597,7 +2472,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2626,23 +2501,14 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - -[[package]] -name = "toml" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af06656561d28735e9c1cd63dfd57132c8155426aa6af24f36a00a351f88c48e" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.7", + "toml_edit 0.22.9", ] [[package]] @@ -2654,17 +2520,6 @@ dependencies = [ "serde", ] -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap", - "toml_datetime", - "winnow 0.5.40", -] - [[package]] name = "toml_edit" version = "0.21.1" @@ -2678,9 +2533,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.7" +version = "0.22.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18769cd1cec395d70860ceb4d932812a0b4d06b1a4bb336745a4d21b9496e992" +checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" dependencies = [ "indexmap", "serde", @@ -2737,7 +2592,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2850,9 +2705,9 @@ checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "uuid" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" [[package]] name = "valuable" @@ -2908,7 +2763,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -2942,7 +2797,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3214,33 +3069,32 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.6" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" dependencies = [ - "libc", "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", "pkg-config", diff --git a/src/datasource.rs b/src/datasource.rs index 3dbe5a8..2fbc194 100644 --- a/src/datasource.rs +++ b/src/datasource.rs @@ -1,16 +1,7 @@ use crate::pagelist::*; -use crate::pagelist_entry::PageListEntry; use crate::platform::Platform; use async_trait::async_trait; -use mysql_async::from_row; -use mysql_async::prelude::Queryable; use mysql_async::Value as MyValue; -use rayon::prelude::*; -use serde_json::value::Value; -use std::collections::HashMap; -use std::time; -use wikibase::mediawiki::api::Api; -use wikibase::mediawiki::title::Title; pub type SQLtuple = (String, Vec); @@ -20,574 +11,3 @@ pub trait DataSource { async fn run(&mut self, platform: &Platform) -> Result; fn name(&self) -> String; } - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceLabels {} - -#[async_trait] -impl DataSource for SourceLabels { - fn name(&self) -> String { - "labels".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("labels_yes") || platform.has_param("labels_any") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let sql = platform.get_label_sql(); - let mut conn = platform - .state() - .get_wiki_db_connection("wikidatawiki") - .await?; - let rows = conn - .exec_iter(sql.0.as_str(), mysql_async::Params::Positional(sql.1)) - .await - .map_err(|e| format!("{:?}", e))? - .map_and_drop(from_row::<(Vec,)>) - .await - .map_err(|e| format!("{:?}", e))?; - conn.disconnect().await.map_err(|e| format!("{:?}", e))?; - let ret = PageList::new_from_wiki_with_capacity("wikidatawiki", rows.len()); - rows.iter() - .map(|row| String::from_utf8_lossy(&row.0)) - .filter_map(|item| Platform::entry_from_entity(&item)) - .for_each(|entry| ret.add_entry(entry).unwrap_or(())); - Ok(ret) - } -} - -impl SourceLabels { - pub fn new() -> Self { - Self {} - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceSitelinks { - main_wiki: String, -} - -#[async_trait] -impl DataSource for SourceSitelinks { - fn name(&self) -> String { - "sitelinks".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("sitelinks_yes") || platform.has_param("sitelinks_any") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let sitelinks_yes = platform.get_param_as_vec("sitelinks_yes", "\n"); - let sitelinks_any = platform.get_param_as_vec("sitelinks_any", "\n"); - let sitelinks_no = platform.get_param_as_vec("sitelinks_no", "\n"); - let sitelinks_min = platform.get_param_blank("min_sitelink_count"); - let sitelinks_max = platform.get_param_blank("max_sitelink_count"); - - let use_min_max = !sitelinks_min.is_empty() || !sitelinks_max.is_empty(); - - let mut yes_any = vec![]; - yes_any.extend(&sitelinks_yes); - yes_any.extend(&sitelinks_any); - self.main_wiki = match yes_any.first() { - Some(wiki) => wiki.to_string(), - None => return Err("No yes/any sitelink found in SourceSitelinks::run".to_string()), - }; - - let sitelinks_any: Vec = sitelinks_any - .iter() - .filter_map(|site| self.site2lang(site)) - .collect(); - let sitelinks_no: Vec = sitelinks_no - .iter() - .filter_map(|site| self.site2lang(site)) - .collect(); - - let mut sql: SQLtuple = (String::new(), vec![]); - sql.0 += "SELECT "; - if use_min_max { - sql.0 += "page_title,(SELECT count(*) FROM langlinks WHERE ll_from=page_id) AS sitelink_count" ; - } else { - sql.0 += "DISTINCT page_title,0"; - } - sql.0 += " FROM page WHERE page_namespace=0"; - - sitelinks_yes - .iter() - .filter_map(|site| self.site2lang(site)) - .for_each(|lang| { - sql.0 += " AND page_id IN (SELECT ll_from FROM langlinks WHERE ll_lang=?)"; - sql.1.push(lang.into()); - }); - if !sitelinks_any.is_empty() { - sql.0 += " AND page_id IN (SELECT ll_from FROM langlinks WHERE ll_lang IN ("; - let tmp = Platform::prep_quote(&sitelinks_any); - Platform::append_sql(&mut sql, tmp); - sql.0 += "))"; - } - if !sitelinks_no.is_empty() { - sql.0 += " AND page_id NOT IN (SELECT ll_from FROM langlinks WHERE ll_lang IN ("; - let tmp = Platform::prep_quote(&sitelinks_no); - Platform::append_sql(&mut sql, tmp); - sql.0 += "))"; - } - - let mut having: Vec = vec![]; - if let Ok(s) = sitelinks_min.parse::() { - having.push(format!("sitelink_count>={}", s)) - } - if let Ok(s) = sitelinks_max.parse::() { - having.push(format!("sitelink_count<={}", s)) - } - - if use_min_max { - sql.0 += " GROUP BY page_title"; - } - if !having.is_empty() { - sql.0 += " HAVING "; - sql.0 += &having.join(" AND "); - } - - let mut conn = platform - .state() - .get_wiki_db_connection(&self.main_wiki) - .await?; - let rows = conn - .exec_iter(sql.0.as_str(), mysql_async::Params::Positional(sql.1)) - .await - .map_err(|e| format!("{:?}", e))? - .map_and_drop(from_row::<(Vec, u32)>) - .await - .map_err(|e| format!("{:?}", e))?; - conn.disconnect().await.map_err(|e| format!("{:?}", e))?; - - let ret = PageList::new_from_wiki_with_capacity(&self.main_wiki, rows.len()); - if use_min_max { - ret.set_has_sitelink_counts(true)?; - } - rows.iter() - .map(|row| (String::from_utf8_lossy(&row.0), row.1)) - .map(|(page, sitelinks)| { - let mut ret = PageListEntry::new(Title::new(&page, 0)); - if use_min_max { - ret.sitelink_count = Some(sitelinks); - } - ret - }) - .for_each(|entry| ret.add_entry(entry).unwrap_or(())); - Ok(ret) - } -} - -impl SourceSitelinks { - pub fn new() -> Self { - Self { - ..Default::default() - } - } - - fn site2lang(&self, site: &str) -> Option { - if *site == self.main_wiki { - return None; - } - let ret = if site.ends_with("wiki") { - site.split_at(site.len() - 4).0.to_owned() - } else { - site.to_owned() - }; - Some(ret) - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceWikidata {} - -#[async_trait] -impl DataSource for SourceWikidata { - fn name(&self) -> String { - "wikidata".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("wpiu_no_statements") && platform.has_param("wikidata_source_sites") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let no_statements = platform.has_param("wpiu_no_statements"); - let sites = platform - .get_param("wikidata_source_sites") - .ok_or_else(|| "Missing parameter \'wikidata_source_sites\'".to_string())?; - let sites: Vec = sites.split(',').map(|s| s.to_string()).collect(); - if sites.is_empty() { - return Err("SourceWikidata: No wikidata source sites given".to_string()); - } - - let sites = Platform::prep_quote(&sites); - - let mut sql = "SELECT ips_item_id FROM wb_items_per_site".to_string(); - if no_statements { - sql += ",page_props,page"; - } - sql += " WHERE ips_site_id IN ("; - sql += &sites.0; - sql += ")"; - if no_statements { - sql += " AND page_namespace=0 AND ips_item_id=substr(page_title,2)*1 AND page_id=pp_page AND pp_propname='wb-claims' AND pp_sortkey=0" ; - } - - // Perform DB query - let mut conn = platform - .state() - .get_wiki_db_connection("wikidatawiki") - .await?; - let rows = conn - .exec_iter(sql.as_str(), ()) - .await - .map_err(|e| format!("{:?}", e))? - .map_and_drop(from_row::) - .await - .map_err(|e| format!("{:?}", e))?; - conn.disconnect().await.map_err(|e| format!("{:?}", e))?; - let ret = PageList::new_from_wiki("wikidatawiki"); - for ips_item_id in rows { - let term_full_entity_id = format!("Q{}", ips_item_id); - if let Some(entry) = Platform::entry_from_entity(&term_full_entity_id) { - ret.add_entry(entry).unwrap_or(()); - } - } - Ok(ret) - } -} - -impl SourceWikidata { - pub fn new() -> Self { - Self {} - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourcePagePile {} - -#[async_trait] -impl DataSource for SourcePagePile { - fn name(&self) -> String { - "pagepile".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("pagepile") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let pagepile = platform - .get_param("pagepile") - .ok_or_else(|| "Missing parameter \'pagepile\'".to_string())?; - let timeout = time::Duration::from_secs(240); - let builder = reqwest::ClientBuilder::new().timeout(timeout); - let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) - .await - .map_err(|e| e.to_string())?; - let params = api.params_into(&[ - ("id", &pagepile.to_string()), - ("action", "get_data"), - ("format", "json"), - ("doit", "1"), - ]); - let text = api - .query_raw("https://tools.wmflabs.org/pagepile/api.php", ¶ms, "GET") - .await - .map_err(|e| format!("PagePile: {:?}", e))?; - let v: Value = - serde_json::from_str(&text).map_err(|e| format!("PagePile JSON: {:?}", e))?; - let wiki = v["wiki"] - .as_str() - .ok_or(format!("PagePile {} does not specify a wiki", &pagepile))?; - let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; // Just because we need query_raw - let ret = PageList::new_from_wiki(wiki); - v["pages"] - .as_array() - .ok_or(format!( - "PagePile {} does not have a 'pages' array", - &pagepile - ))? - .iter() - .filter_map(|title| title.as_str()) - .map(|title| PageListEntry::new(Title::new_from_full(title, &api))) - .for_each(|entry| ret.add_entry(entry).unwrap_or(())); - if ret.is_empty()? { - platform.warn("".to_string())?; - } - Ok(ret) - } -} - -impl SourcePagePile { - pub fn new() -> Self { - Self {} - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceSearch {} - -#[async_trait] -impl DataSource for SourceSearch { - fn name(&self) -> String { - "search".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("search_query") - && platform.has_param("search_wiki") - && platform.has_param("search_max_results") - && !platform.is_param_blank("search_query") - && !platform.is_param_blank("search_wiki") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let wiki = platform - .get_param("search_wiki") - .ok_or_else(|| "Missing parameter \'search_wiki\'".to_string())?; - let query = platform - .get_param("search_query") - .ok_or_else(|| "Missing parameter \'search_query\'".to_string())?; - let max = match platform - .get_param("search_max_results") - .ok_or_else(|| "Missing parameter \'search_max_results\'".to_string())? - .parse::() - { - Ok(max) => max, - Err(e) => return Err(format!("{:?}", e)), - }; - let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; - let srlimit = if max > 500 { 500 } else { max }; - let srlimit = format!("{}", srlimit); - let namespace_ids = platform - .form_parameters() - .ns - .par_iter() - .cloned() - .collect::>(); - let namespace_ids = if namespace_ids.is_empty() { - "*".to_string() - } else { - namespace_ids - .iter() - .map(|i| i.to_string()) - .collect::>() - .join(",") - }; - let params = api.params_into(&[ - ("action", "query"), - ("list", "search"), - ("srlimit", srlimit.as_str()), - ("srsearch", query.as_str()), - ("srnamespace", namespace_ids.as_str()), - ]); - let result = match api.get_query_api_json_limit(¶ms, Some(max)).await { - Ok(result) => result, - Err(e) => return Err(format!("{:?}", e)), - }; - let titles = Api::result_array_to_titles(&result); - let ret = PageList::new_from_wiki(&wiki); - titles - .iter() - .map(|title| PageListEntry::new(title.to_owned())) - .for_each(|entry| ret.add_entry(entry).unwrap_or(())); - if ret.is_empty()? { - platform.warn("".to_string())?; - } - Ok(ret) - } -} - -impl SourceSearch { - pub fn new() -> Self { - Self {} - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceManual {} - -#[async_trait] -impl DataSource for SourceManual { - fn name(&self) -> String { - "manual".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("manual_list") && platform.has_param("manual_list_wiki") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let wiki = platform - .get_param("manual_list_wiki") - .ok_or_else(|| "Missing parameter \'manual_list_wiki\'".to_string())?; - let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; - let ret = PageList::new_from_wiki(&wiki); - platform - .get_param("manual_list") - .ok_or_else(|| "Missing parameter \'manual_list\'".to_string())? - .split('\n') - .filter_map(|line| { - let line = line.trim().to_string(); - if !line.is_empty() { - let title = Title::new_from_full(&line, &api); - let entry = PageListEntry::new(title); - Some(entry) - } else { - None - } - }) - .for_each(|entry| ret.add_entry(entry).unwrap_or(())); - Ok(ret) - } -} - -impl SourceManual { - pub fn new() -> Self { - Self {} - } -} - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct SourceSparql {} - -#[async_trait] -impl DataSource for SourceSparql { - fn name(&self) -> String { - "sparql".to_string() - } - - fn can_run(&self, platform: &Platform) -> bool { - platform.has_param("sparql") - } - - async fn run(&mut self, platform: &Platform) -> Result { - let sparql = platform - .get_param("sparql") - .ok_or_else(|| "Missing parameter \'sparql\'".to_string())?; - - let timeout = time::Duration::from_secs(120); - let builder = reqwest::ClientBuilder::new().timeout(timeout); - let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) - .await - .map_err(|e| format!("SourceSparql::run:1 {:?}", e))?; - - let sparql_url = api - .get_site_info_string("general", "wikibase-sparql") - .map_err(|e| e.to_string())?; - let mut params: HashMap = HashMap::new(); - params.insert("query".to_string(), sparql.to_string()); - params.insert("format".to_string(), "json".to_string()); - - let response = match api - .client() - .post(sparql_url) - .header(reqwest::header::USER_AGENT, "PetScan") - .form(¶ms) - .send() - .await - { - Ok(resp) => resp, - Err(e) => return Err(format!("SPARL: {:?}", e)), - }; - - let ret = PageList::new_from_wiki("wikidatawiki"); - let response = response.text().await.map_err(|e| format!("{:?}", e))?; - let mut mode: u8 = 0; - let mut header = String::new(); - let mut binding = String::new(); - let mut first_var = String::new(); - for line in response.split('\n') { - match line { - "{" => continue, - "}" => continue, - " \"results\" : {" => {} - " \"bindings\" : [ {" => { - mode += 1; - header = "{".to_string() + &header + "\"dummy\": {}}"; - let j: Value = serde_json::from_str(&header).unwrap_or_else(|_| json!({})); - first_var = j["head"]["vars"][0] - .as_str() - .ok_or_else(|| "No variables found in SPARQL result".to_string())? - .to_string(); - } - " }, {" | " } ]" => match mode { - 0 => header += &line, - 1 => { - binding = "{".to_string() + &binding + "}"; - let j: Value = serde_json::from_str(&binding).unwrap_or_else(|_| json!({})); - binding.clear(); - if let Some(entity_url) = j[&first_var]["value"].as_str() { - if let Ok(entity) = api.extract_entity_from_uri(entity_url) { - if let Some(entry) = Platform::entry_from_entity(&entity) { - ret.add_entry(entry).unwrap_or(()) - } - } - } - } - _ => {} - }, - other => match mode { - 0 => header += other, - 1 => binding += other, - _ => {} - }, - } - } - - Ok(ret) - } - - /* - // using serde, obsolete because of high memory usage - fn run(&mut self, platform: &Platform) -> Result { - let sparql = platform - .get_param("sparql") - .ok_or(format!("Missing parameter 'sparql'"))?; - - let timeout = Some(time::Duration::from_secs(120)); - let builder = reqwest::blocking::ClientBuilder::new().timeout(timeout); - let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) - .map_err(|e| format!("SourceSparql::run:1 {:?}", e))?; - let result = api - .sparql_query(sparql.as_str()) - .map_err(|e| format!("SourceSparql::run:2 {:?}", e))?; - let first_var = result["head"]["vars"][0] - .as_str() - .ok_or(format!("No variables found in SPARQL result"))?; - let ret = PageList::new_from_wiki("wikidatawiki"); - api.entities_from_sparql_result(&result, first_var) - .par_iter() - .filter_map(|e| Platform::entry_from_entity(e)) - .for_each(|entry| ret.add_entry(entry)); - if ret.is_empty() { - platform.warn(format!("")); - } - Ok(ret) - } - */ -} - -impl SourceSparql { - pub fn new() -> Self { - Self {} - } -} diff --git a/src/datasource_database.rs b/src/datasource_database.rs index cce6579..9cd83c7 100644 --- a/src/datasource_database.rs +++ b/src/datasource_database.rs @@ -760,7 +760,7 @@ impl SourceDatabase { let mut after: String = self.params.after.clone(); let mut is_before_after_done: bool = false; if let Some(max_age) = self.params.max_age { - let utc = Utc::now().sub(Duration::hours(max_age)); + let utc = Utc::now().sub(Duration::try_hours(max_age).unwrap_or_default()); before = String::new(); after = utc.format("%Y%m%d%H%M%S").to_string(); } diff --git a/src/datasource_labels.rs b/src/datasource_labels.rs new file mode 100644 index 0000000..1d99800 --- /dev/null +++ b/src/datasource_labels.rs @@ -0,0 +1,48 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::platform::Platform; +use async_trait::async_trait; +use mysql_async::from_row; +use mysql_async::prelude::Queryable; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceLabels {} + +#[async_trait] +impl DataSource for SourceLabels { + fn name(&self) -> String { + "labels".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("labels_yes") || platform.has_param("labels_any") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let sql = platform.get_label_sql(); + let mut conn = platform + .state() + .get_wiki_db_connection("wikidatawiki") + .await?; + let rows = conn + .exec_iter(sql.0.as_str(), mysql_async::Params::Positional(sql.1)) + .await + .map_err(|e| format!("{:?}", e))? + .map_and_drop(from_row::<(Vec,)>) + .await + .map_err(|e| format!("{:?}", e))?; + conn.disconnect().await.map_err(|e| format!("{:?}", e))?; + let ret = PageList::new_from_wiki_with_capacity("wikidatawiki", rows.len()); + rows.iter() + .map(|row| String::from_utf8_lossy(&row.0)) + .filter_map(|item| Platform::entry_from_entity(&item)) + .for_each(|entry| ret.add_entry(entry).unwrap_or(())); + Ok(ret) + } +} + +impl SourceLabels { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/datasource_manual.rs b/src/datasource_manual.rs new file mode 100644 index 0000000..463fb7e --- /dev/null +++ b/src/datasource_manual.rs @@ -0,0 +1,50 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::pagelist_entry::PageListEntry; +use crate::platform::Platform; +use async_trait::async_trait; +use wikibase::mediawiki::title::Title; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceManual {} + +#[async_trait] +impl DataSource for SourceManual { + fn name(&self) -> String { + "manual".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("manual_list") && platform.has_param("manual_list_wiki") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let wiki = platform + .get_param("manual_list_wiki") + .ok_or_else(|| "Missing parameter \'manual_list_wiki\'".to_string())?; + let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; + let ret = PageList::new_from_wiki(&wiki); + platform + .get_param("manual_list") + .ok_or_else(|| "Missing parameter \'manual_list\'".to_string())? + .split('\n') + .filter_map(|line| { + let line = line.trim().to_string(); + if !line.is_empty() { + let title = Title::new_from_full(&line, &api); + let entry = PageListEntry::new(title); + Some(entry) + } else { + None + } + }) + .for_each(|entry| ret.add_entry(entry).unwrap_or(())); + Ok(ret) + } +} + +impl SourceManual { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/datasource_pagepile.rs b/src/datasource_pagepile.rs new file mode 100644 index 0000000..58dbfd4 --- /dev/null +++ b/src/datasource_pagepile.rs @@ -0,0 +1,71 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::pagelist_entry::PageListEntry; +use crate::platform::Platform; +use async_trait::async_trait; +use serde_json::value::Value; +use std::time; +use wikibase::mediawiki::api::Api; +use wikibase::mediawiki::title::Title; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourcePagePile {} + +#[async_trait] +impl DataSource for SourcePagePile { + fn name(&self) -> String { + "pagepile".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("pagepile") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let pagepile = platform + .get_param("pagepile") + .ok_or_else(|| "Missing parameter \'pagepile\'".to_string())?; + let timeout = time::Duration::from_secs(240); + let builder = reqwest::ClientBuilder::new().timeout(timeout); + let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) + .await + .map_err(|e| e.to_string())?; + let params = api.params_into(&[ + ("id", &pagepile.to_string()), + ("action", "get_data"), + ("format", "json"), + ("doit", "1"), + ]); + let text = api + .query_raw("https://tools.wmflabs.org/pagepile/api.php", ¶ms, "GET") + .await + .map_err(|e| format!("PagePile: {:?}", e))?; + let v: Value = + serde_json::from_str(&text).map_err(|e| format!("PagePile JSON: {:?}", e))?; + let wiki = v["wiki"] + .as_str() + .ok_or(format!("PagePile {} does not specify a wiki", &pagepile))?; + let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; // Just because we need query_raw + let ret = PageList::new_from_wiki(wiki); + v["pages"] + .as_array() + .ok_or(format!( + "PagePile {} does not have a 'pages' array", + &pagepile + ))? + .iter() + .filter_map(|title| title.as_str()) + .map(|title| PageListEntry::new(Title::new_from_full(title, &api))) + .for_each(|entry| ret.add_entry(entry).unwrap_or(())); + if ret.is_empty()? { + platform.warn("".to_string())?; + } + Ok(ret) + } +} + +impl SourcePagePile { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/datasource_search.rs b/src/datasource_search.rs new file mode 100644 index 0000000..13e40ef --- /dev/null +++ b/src/datasource_search.rs @@ -0,0 +1,87 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::pagelist_entry::PageListEntry; +use crate::platform::Platform; +use async_trait::async_trait; +use rayon::prelude::*; +use wikibase::mediawiki::api::Api; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceSearch {} + +#[async_trait] +impl DataSource for SourceSearch { + fn name(&self) -> String { + "search".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("search_query") + && platform.has_param("search_wiki") + && platform.has_param("search_max_results") + && !platform.is_param_blank("search_query") + && !platform.is_param_blank("search_wiki") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let wiki = platform + .get_param("search_wiki") + .ok_or_else(|| "Missing parameter \'search_wiki\'".to_string())?; + let query = platform + .get_param("search_query") + .ok_or_else(|| "Missing parameter \'search_query\'".to_string())?; + let max = match platform + .get_param("search_max_results") + .ok_or_else(|| "Missing parameter \'search_max_results\'".to_string())? + .parse::() + { + Ok(max) => max, + Err(e) => return Err(format!("{:?}", e)), + }; + let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; + let srlimit = if max > 500 { 500 } else { max }; + let srlimit = format!("{}", srlimit); + let namespace_ids = platform + .form_parameters() + .ns + .par_iter() + .cloned() + .collect::>(); + let namespace_ids = if namespace_ids.is_empty() { + "*".to_string() + } else { + namespace_ids + .iter() + .map(|i| i.to_string()) + .collect::>() + .join(",") + }; + let params = api.params_into(&[ + ("action", "query"), + ("list", "search"), + ("srlimit", srlimit.as_str()), + ("srsearch", query.as_str()), + ("srnamespace", namespace_ids.as_str()), + ]); + let result = match api.get_query_api_json_limit(¶ms, Some(max)).await { + Ok(result) => result, + Err(e) => return Err(format!("{:?}", e)), + }; + let titles = Api::result_array_to_titles(&result); + let ret = PageList::new_from_wiki(&wiki); + titles + .iter() + .map(|title| PageListEntry::new(title.to_owned())) + .for_each(|entry| ret.add_entry(entry).unwrap_or(())); + if ret.is_empty()? { + platform.warn("".to_string())?; + } + Ok(ret) + } +} + +impl SourceSearch { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/datasource_sitelinks.rs b/src/datasource_sitelinks.rs new file mode 100644 index 0000000..ffa5db6 --- /dev/null +++ b/src/datasource_sitelinks.rs @@ -0,0 +1,145 @@ +use crate::datasource::{DataSource, SQLtuple}; +use crate::pagelist::*; +use crate::pagelist_entry::PageListEntry; +use crate::platform::Platform; +use async_trait::async_trait; +use mysql_async::from_row; +use mysql_async::prelude::Queryable; +use wikibase::mediawiki::title::Title; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceSitelinks { + main_wiki: String, +} + +#[async_trait] +impl DataSource for SourceSitelinks { + fn name(&self) -> String { + "sitelinks".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("sitelinks_yes") || platform.has_param("sitelinks_any") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let sitelinks_yes = platform.get_param_as_vec("sitelinks_yes", "\n"); + let sitelinks_any = platform.get_param_as_vec("sitelinks_any", "\n"); + let sitelinks_no = platform.get_param_as_vec("sitelinks_no", "\n"); + let sitelinks_min = platform.get_param_blank("min_sitelink_count"); + let sitelinks_max = platform.get_param_blank("max_sitelink_count"); + + let use_min_max = !sitelinks_min.is_empty() || !sitelinks_max.is_empty(); + + let mut yes_any = vec![]; + yes_any.extend(&sitelinks_yes); + yes_any.extend(&sitelinks_any); + self.main_wiki = match yes_any.first() { + Some(wiki) => wiki.to_string(), + None => return Err("No yes/any sitelink found in SourceSitelinks::run".to_string()), + }; + + let sitelinks_any: Vec = sitelinks_any + .iter() + .filter_map(|site| self.site2lang(site)) + .collect(); + let sitelinks_no: Vec = sitelinks_no + .iter() + .filter_map(|site| self.site2lang(site)) + .collect(); + + let mut sql: SQLtuple = (String::new(), vec![]); + sql.0 += "SELECT "; + if use_min_max { + sql.0 += "page_title,(SELECT count(*) FROM langlinks WHERE ll_from=page_id) AS sitelink_count" ; + } else { + sql.0 += "DISTINCT page_title,0"; + } + sql.0 += " FROM page WHERE page_namespace=0"; + + sitelinks_yes + .iter() + .filter_map(|site| self.site2lang(site)) + .for_each(|lang| { + sql.0 += " AND page_id IN (SELECT ll_from FROM langlinks WHERE ll_lang=?)"; + sql.1.push(lang.into()); + }); + if !sitelinks_any.is_empty() { + sql.0 += " AND page_id IN (SELECT ll_from FROM langlinks WHERE ll_lang IN ("; + let tmp = Platform::prep_quote(&sitelinks_any); + Platform::append_sql(&mut sql, tmp); + sql.0 += "))"; + } + if !sitelinks_no.is_empty() { + sql.0 += " AND page_id NOT IN (SELECT ll_from FROM langlinks WHERE ll_lang IN ("; + let tmp = Platform::prep_quote(&sitelinks_no); + Platform::append_sql(&mut sql, tmp); + sql.0 += "))"; + } + + let mut having: Vec = vec![]; + if let Ok(s) = sitelinks_min.parse::() { + having.push(format!("sitelink_count>={}", s)) + } + if let Ok(s) = sitelinks_max.parse::() { + having.push(format!("sitelink_count<={}", s)) + } + + if use_min_max { + sql.0 += " GROUP BY page_title"; + } + if !having.is_empty() { + sql.0 += " HAVING "; + sql.0 += &having.join(" AND "); + } + + let mut conn = platform + .state() + .get_wiki_db_connection(&self.main_wiki) + .await?; + let rows = conn + .exec_iter(sql.0.as_str(), mysql_async::Params::Positional(sql.1)) + .await + .map_err(|e| format!("{:?}", e))? + .map_and_drop(from_row::<(Vec, u32)>) + .await + .map_err(|e| format!("{:?}", e))?; + conn.disconnect().await.map_err(|e| format!("{:?}", e))?; + + let ret = PageList::new_from_wiki_with_capacity(&self.main_wiki, rows.len()); + if use_min_max { + ret.set_has_sitelink_counts(true)?; + } + rows.iter() + .map(|row| (String::from_utf8_lossy(&row.0), row.1)) + .map(|(page, sitelinks)| { + let mut ret = PageListEntry::new(Title::new(&page, 0)); + if use_min_max { + ret.sitelink_count = Some(sitelinks); + } + ret + }) + .for_each(|entry| ret.add_entry(entry).unwrap_or(())); + Ok(ret) + } +} + +impl SourceSitelinks { + pub fn new() -> Self { + Self { + ..Default::default() + } + } + + fn site2lang(&self, site: &str) -> Option { + if *site == self.main_wiki { + return None; + } + let ret = if site.ends_with("wiki") { + site.split_at(site.len() - 4).0.to_owned() + } else { + site.to_owned() + }; + Some(ret) + } +} diff --git a/src/datasource_sparql.rs b/src/datasource_sparql.rs new file mode 100644 index 0000000..9a22ee2 --- /dev/null +++ b/src/datasource_sparql.rs @@ -0,0 +1,134 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::platform::Platform; +use async_trait::async_trait; +use serde_json::value::Value; +use std::collections::HashMap; +use std::time; +use wikibase::mediawiki::api::Api; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceSparql {} + +#[async_trait] +impl DataSource for SourceSparql { + fn name(&self) -> String { + "sparql".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("sparql") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let sparql = platform + .get_param("sparql") + .ok_or_else(|| "Missing parameter \'sparql\'".to_string())?; + + let timeout = time::Duration::from_secs(120); + let builder = reqwest::ClientBuilder::new().timeout(timeout); + let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) + .await + .map_err(|e| format!("SourceSparql::run:1 {:?}", e))?; + + let sparql_url = api + .get_site_info_string("general", "wikibase-sparql") + .map_err(|e| e.to_string())?; + let mut params: HashMap = HashMap::new(); + params.insert("query".to_string(), sparql.to_string()); + params.insert("format".to_string(), "json".to_string()); + + let response = match api + .client() + .post(sparql_url) + .header(reqwest::header::USER_AGENT, "PetScan") + .form(¶ms) + .send() + .await + { + Ok(resp) => resp, + Err(e) => return Err(format!("SPARL: {:?}", e)), + }; + + let ret = PageList::new_from_wiki("wikidatawiki"); + let response = response.text().await.map_err(|e| format!("{:?}", e))?; + let mut mode: u8 = 0; + let mut header = String::new(); + let mut binding = String::new(); + let mut first_var = String::new(); + for line in response.split('\n') { + match line { + "{" => continue, + "}" => continue, + " \"results\" : {" => {} + " \"bindings\" : [ {" => { + mode += 1; + header = "{".to_string() + &header + "\"dummy\": {}}"; + let j: Value = serde_json::from_str(&header).unwrap_or_else(|_| json!({})); + first_var = j["head"]["vars"][0] + .as_str() + .ok_or_else(|| "No variables found in SPARQL result".to_string())? + .to_string(); + } + " }, {" | " } ]" => match mode { + 0 => header += &line, + 1 => { + binding = "{".to_string() + &binding + "}"; + let j: Value = serde_json::from_str(&binding).unwrap_or_else(|_| json!({})); + binding.clear(); + if let Some(entity_url) = j[&first_var]["value"].as_str() { + if let Ok(entity) = api.extract_entity_from_uri(entity_url) { + if let Some(entry) = Platform::entry_from_entity(&entity) { + ret.add_entry(entry).unwrap_or(()) + } + } + } + } + _ => {} + }, + other => match mode { + 0 => header += other, + 1 => binding += other, + _ => {} + }, + } + } + + Ok(ret) + } + + /* + // using serde, obsolete because of high memory usage + fn run(&mut self, platform: &Platform) -> Result { + let sparql = platform + .get_param("sparql") + .ok_or(format!("Missing parameter 'sparql'"))?; + + let timeout = Some(time::Duration::from_secs(120)); + let builder = reqwest::blocking::ClientBuilder::new().timeout(timeout); + let api = Api::new_from_builder("https://www.wikidata.org/w/api.php", builder) + .map_err(|e| format!("SourceSparql::run:1 {:?}", e))?; + let result = api + .sparql_query(sparql.as_str()) + .map_err(|e| format!("SourceSparql::run:2 {:?}", e))?; + let first_var = result["head"]["vars"][0] + .as_str() + .ok_or(format!("No variables found in SPARQL result"))?; + let ret = PageList::new_from_wiki("wikidatawiki"); + api.entities_from_sparql_result(&result, first_var) + .par_iter() + .filter_map(|e| Platform::entry_from_entity(e)) + .for_each(|entry| ret.add_entry(entry)); + if ret.is_empty() { + platform.warn(format!("")); + } + Ok(ret) + } + */ +} + +impl SourceSparql { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/datasource_wikidata.rs b/src/datasource_wikidata.rs new file mode 100644 index 0000000..26e6b27 --- /dev/null +++ b/src/datasource_wikidata.rs @@ -0,0 +1,72 @@ +use crate::datasource::DataSource; +use crate::pagelist::*; +use crate::platform::Platform; +use async_trait::async_trait; +use mysql_async::from_row; +use mysql_async::prelude::Queryable; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct SourceWikidata {} + +#[async_trait] +impl DataSource for SourceWikidata { + fn name(&self) -> String { + "wikidata".to_string() + } + + fn can_run(&self, platform: &Platform) -> bool { + platform.has_param("wpiu_no_statements") && platform.has_param("wikidata_source_sites") + } + + async fn run(&mut self, platform: &Platform) -> Result { + let no_statements = platform.has_param("wpiu_no_statements"); + let sites = platform + .get_param("wikidata_source_sites") + .ok_or_else(|| "Missing parameter \'wikidata_source_sites\'".to_string())?; + let sites: Vec = sites.split(',').map(|s| s.to_string()).collect(); + if sites.is_empty() { + return Err("SourceWikidata: No wikidata source sites given".to_string()); + } + + let sites = Platform::prep_quote(&sites); + + let mut sql = "SELECT ips_item_id FROM wb_items_per_site".to_string(); + if no_statements { + sql += ",page_props,page"; + } + sql += " WHERE ips_site_id IN ("; + sql += &sites.0; + sql += ")"; + if no_statements { + sql += " AND page_namespace=0 AND ips_item_id=substr(page_title,2)*1 AND page_id=pp_page AND pp_propname='wb-claims' AND pp_sortkey=0" ; + } + + // Perform DB query + let mut conn = platform + .state() + .get_wiki_db_connection("wikidatawiki") + .await?; + let rows = conn + .exec_iter(sql.as_str(), ()) + .await + .map_err(|e| format!("{:?}", e))? + .map_and_drop(from_row::) + .await + .map_err(|e| format!("{:?}", e))?; + conn.disconnect().await.map_err(|e| format!("{:?}", e))?; + let ret = PageList::new_from_wiki("wikidatawiki"); + for ips_item_id in rows { + let term_full_entity_id = format!("Q{}", ips_item_id); + if let Some(entry) = Platform::entry_from_entity(&term_full_entity_id) { + ret.add_entry(entry).unwrap_or(()); + } + } + Ok(ret) + } +} + +impl SourceWikidata { + pub fn new() -> Self { + Self {} + } +} diff --git a/src/form_parameters.rs b/src/form_parameters.rs index f6fab0a..d2d900b 100644 --- a/src/form_parameters.rs +++ b/src/form_parameters.rs @@ -95,16 +95,6 @@ impl FormParameters { self.ns = Self::ns_from_params(&self.params); } - /* - pub fn to_string(&self) -> String { - self.params - .iter() - .map(|(k, v)| Self::percent_encode(k) + "=" + &Self::percent_encode(v)) - .collect::>() - .join("&") - } - */ - pub fn to_string_no_doit(&self) -> String { self.params .iter() @@ -148,6 +138,7 @@ impl FormParameters { } } + /// Legacy parameter support fn legacy_parameters(&mut self) { self.fallback("language", "lang"); self.fallback("categories", "cats"); @@ -200,3 +191,156 @@ impl FormParameters { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_legacy_parameters() { + let mut form_params = FormParameters::new(); + form_params.set_param("manual_list_wiki", "enwiki"); + form_params.legacy_parameters(); + assert_eq!( + form_params.params.get("common_wiki_other"), + Some(&"enwiki".to_string()) + ); + assert_eq!( + form_params.params.get("manual_list_wiki"), + Some(&"".to_string()) + ); + } + + #[test] + fn test_has_param_with_value() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + assert_eq!(form_params.has_param_with_value("test"), true); + assert_eq!(form_params.has_param_with_value("test2"), false); + } + + #[test] + fn test_to_string_no_doit() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + form_params.set_param("doit", "1"); + assert_eq!(form_params.to_string_no_doit(), "test=value".to_string()); + } + + #[test] + fn test_rebase() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + let mut form_params2 = FormParameters::new(); + form_params2.set_param("test2", "value2"); + form_params2.rebase(&form_params); + assert_eq!(form_params2.params.get("test"), Some(&"value".to_string())); + assert_eq!( + form_params2.params.get("test2"), + Some(&"value2".to_string()) + ); + } + + #[test] + fn test_outcome_from_query() { + let form_params = FormParameters::outcome_from_query("test=value&test2=value2"); + assert_eq!(form_params.is_ok(), true); + let form_params = form_params.unwrap(); + assert_eq!(form_params.params.get("test"), Some(&"value".to_string())); + assert_eq!(form_params.params.get("test2"), Some(&"value2".to_string())); + } + + #[test] + fn test_has_param() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + assert_eq!(form_params.has_param("test"), true); + assert_eq!(form_params.has_param("test2"), false); + } + + #[test] + fn test_set_param() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + assert_eq!(form_params.params.get("test"), Some(&"value".to_string())); + } + + #[test] + fn test_ns_from_params() { + let mut params = HashMap::new(); + params.insert("ns[0]".to_string(), "1".to_string()); + params.insert("ns[1]".to_string(), "1".to_string()); + params.insert("ns[2]".to_string(), "1".to_string()); + let form_params = FormParameters::new_from_pairs(params); + assert_eq!(form_params.ns.contains(&0), true); + assert_eq!(form_params.ns.contains(&1), true); + assert_eq!(form_params.ns.contains(&2), true); + } + + #[test] + fn test_percent_encode() { + assert_eq!( + FormParameters::percent_encode("test value"), + "test%20value".to_string() + ); + } + + #[test] + fn test_fallback() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + form_params.fallback("test2", "test"); + assert_eq!(form_params.params.get("test2"), Some(&"value".to_string())); + } + + #[test] + fn test_new_from_pairs() { + let mut params = HashMap::new(); + params.insert("test".to_string(), "value".to_string()); + let form_params = FormParameters::new_from_pairs(params); + assert_eq!(form_params.params.get("test"), Some(&"value".to_string())); + } + + #[test] + fn test_new() { + let form_params = FormParameters::new(); + assert_eq!(form_params.params.len(), 0); + } + + #[test] + fn test_to_string() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + assert_eq!(form_params.to_string(), "test=value".to_string()); + } + + #[test] + fn test_rebase_empty() { + let mut form_params = FormParameters::new(); + let form_params2 = FormParameters::new(); + form_params.rebase(&form_params2); + assert_eq!(form_params.params.len(), 0); + } + + #[test] + fn test_rebase_no_overwrite() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + let mut form_params2 = FormParameters::new(); + form_params2.set_param("test", "value2"); + form_params.rebase(&form_params2); + assert_eq!(form_params.params.get("test"), Some(&"value".to_string())); + } + + #[test] + fn test_rebase_overwrite() { + let mut form_params = FormParameters::new(); + form_params.set_param("test", "value"); + let mut form_params2 = FormParameters::new(); + form_params2.set_param("test", "value2"); + form_params2.set_param("test2", "value3"); + form_params.rebase(&form_params2); + assert_eq!(form_params.params.get("test"), Some(&"value".to_string())); + assert_eq!(form_params.params.get("test2"), Some(&"value3".to_string())); + } +} diff --git a/src/main.rs b/src/main.rs index 79e8378..12a5f7d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,11 +11,26 @@ extern crate serde_json; pub mod app_state; pub mod datasource; pub mod datasource_database; +pub mod datasource_labels; +pub mod datasource_manual; +pub mod datasource_pagepile; +pub mod datasource_search; +pub mod datasource_sitelinks; +pub mod datasource_sparql; +pub mod datasource_wikidata; pub mod form_parameters; pub mod pagelist; pub mod pagelist_entry; pub mod platform; pub mod render; +pub mod render_html; +pub mod render_json; +pub mod render_kml; +pub mod render_pagepile; +pub mod render_params; +pub mod render_plaintext; +pub mod render_tsv; +pub mod render_wikitext; pub mod wdfist; use std::convert::Infallible; diff --git a/src/platform.rs b/src/platform.rs index 774482f..9df7f97 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -1,12 +1,26 @@ use crate::app_state::AppState; use crate::datasource::*; use crate::datasource_database::{SourceDatabase, SourceDatabaseParameters}; +use crate::datasource_labels::SourceLabels; +use crate::datasource_manual::SourceManual; +use crate::datasource_pagepile::SourcePagePile; +use crate::datasource_search::SourceSearch; +use crate::datasource_sitelinks::SourceSitelinks; +use crate::datasource_sparql::SourceSparql; +use crate::datasource_wikidata::SourceWikidata; use crate::form_parameters::FormParameters; use crate::pagelist::*; use crate::pagelist_entry::{ FileInfo, LinkCount, PageCoordinates, PageListEntry, PageListSort, TriState, }; use crate::render::*; +use crate::render_html::RenderHTML; +use crate::render_json::RenderJSON; +use crate::render_kml::RenderKML; +use crate::render_pagepile::RenderPagePile; +use crate::render_plaintext::RenderPlainText; +use crate::render_tsv::RenderTSV; +use crate::render_wikitext::RenderWiki; use crate::wdfist::*; use futures::future::join_all; use mysql_async as my; @@ -485,6 +499,7 @@ impl Platform { for element in sql_batch.1.iter_mut() { *element = match element { MyValue::Bytes(x) => { + let x = &x; let u2s = Title::underscores_to_spaces(&String::from_utf8_lossy(x)) ; MyValue::Bytes(u2s.into()) } diff --git a/src/render.rs b/src/render.rs index e292759..7a1671d 100644 --- a/src/render.rs +++ b/src/render.rs @@ -1,90 +1,10 @@ -use crate::app_state::AppState; -use crate::form_parameters::FormParameters; use crate::pagelist_entry::{LinkCount, PageListEntry}; use crate::platform::*; +use crate::render_params::RenderParams; use async_trait::async_trait; -use chrono::prelude::*; -use htmlescape::encode_minimal; -use serde_json::Value; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; -use wikibase::mediawiki::api::Api; -use wikibase::mediawiki::title::Title; -static MAX_HTML_RESULTS: usize = 10000; -static AUTOLIST_WIKIDATA: &str = "www.wikidata.org"; -static AUTOLIST_COMMONS: &str = "commons.wikimedia.org"; - -//________________________________________________________________________________________________________________________ - -#[derive(Debug, Clone)] -pub struct RenderParams { - wiki: String, - file_data: bool, - file_usage: bool, - thumbnails_in_wiki_output: bool, - wdi: String, - show_wikidata_item: bool, - is_wikidata: bool, - add_coordinates: bool, - add_image: bool, - add_defaultsort: bool, - add_disambiguation: bool, - add_incoming_links: bool, - add_sitelinks: bool, - do_output_redlinks: bool, - use_autolist: bool, - autolist_creator_mode: bool, - autolist_wiki_server: String, - api: Api, - state: Arc, - row_number: usize, - json_output_compatability: String, - json_callback: String, - json_sparse: bool, - json_pretty: bool, - giu: bool, -} - -impl RenderParams { - pub async fn new(platform: &Platform, wiki: &str) -> Result { - let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; - let mut ret = Self { - wiki: wiki.to_string(), - file_data: platform.has_param("ext_image_data"), - file_usage: platform.has_param("file_usage_data"), - thumbnails_in_wiki_output: platform.has_param("thumbnails_in_wiki_output"), - wdi: platform.get_param_default("wikidata_item", "no"), - add_coordinates: platform.has_param("add_coordinates"), - add_image: platform.has_param("add_image") - || platform.get_param_blank("format") == "kml", - add_defaultsort: platform.has_param("add_defaultsort"), - add_disambiguation: platform.has_param("add_disambiguation"), - add_incoming_links: platform.get_param_blank("sortby") == "incoming_links", - add_sitelinks: platform.get_param_blank("sortby") == "sitelinks", - show_wikidata_item: false, - is_wikidata: wiki == "wikidatawiki", - do_output_redlinks: platform.do_output_redlinks(), - use_autolist: false, // Possibly set downstream - autolist_creator_mode: false, // Possibly set downstream - autolist_wiki_server: AUTOLIST_WIKIDATA.to_string(), // Possibly set downstream - api, - state: platform.state(), - row_number: 0, - json_output_compatability: platform - .get_param_default("output_compatability", "catscan"), // Default; "quick-intersection" ? - json_callback: platform.get_param_blank("callback"), - json_sparse: platform.has_param("sparse"), - json_pretty: platform.has_param("json-pretty"), - giu: platform.has_param("giu"), - }; - ret.show_wikidata_item = ret.wdi == "any" || ret.wdi == "with"; - Ok(ret) - } -} - -//________________________________________________________________________________________________________________________ +pub static AUTOLIST_WIKIDATA: &str = "www.wikidata.org"; +pub static AUTOLIST_COMMONS: &str = "commons.wikimedia.org"; #[async_trait] pub trait Render { @@ -111,15 +31,15 @@ pub trait Render { fn get_initial_columns(&self, params: &RenderParams) -> Vec<&str> { let mut columns = vec![]; - if params.use_autolist { + if params.use_autolist() { columns.push("checkbox"); } columns.push("number"); - if params.add_image { + if params.add_image() { columns.push("image"); } columns.push("title"); - if params.do_output_redlinks { + if params.do_output_redlinks() { //columns.push("namespace"); columns.push("redlink_count"); } else { @@ -128,28 +48,28 @@ pub trait Render { columns.push("size"); columns.push("timestamp"); } - if params.show_wikidata_item { + if params.show_wikidata_item() { columns.push("wikidata_item"); } - if params.add_coordinates { + if params.add_coordinates() { columns.push("coordinates"); } - if params.add_defaultsort { + if params.add_defaultsort() { columns.push("defaultsort"); } - if params.add_disambiguation { + if params.add_disambiguation() { columns.push("disambiguation"); } - if params.add_incoming_links { + if params.add_incoming_links() { columns.push("incoming_links"); } - if params.add_sitelinks { + if params.add_sitelinks() { columns.push("sitelinks"); } - if params.file_data { + if params.file_data() { self.file_data_keys().iter().for_each(|k| columns.push(*k)); } - if params.file_usage { + if params.file_usage() { columns.push("fileusage"); } columns @@ -241,7 +161,7 @@ pub trait Render { "timestamp" => self.opt_string(&entry.get_page_timestamp()), "wikidata_item" => self.render_cell_wikidata_item(entry, params), "image" => self.render_cell_image(&entry.get_page_image(), params), - "number" => params.row_number.to_string(), + "number" => params.row_number().to_string(), "defaultsort" => self.opt_string(&entry.get_defaultsort()), "disambiguation" => self.opt_bool(&entry.disambiguation.as_option_bool()), "incoming_links" => self.opt_linkcount(&entry.incoming_links), @@ -303,1293 +223,3 @@ pub trait Render { ret } } - -//________________________________________________________________________________________________________________________ - -/// Renders wiki text -pub struct RenderWiki {} - -#[async_trait] -impl Render for RenderWiki { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let mut params = RenderParams::new(platform, wiki).await?; - let mut rows: Vec = vec![]; - rows.push("== ".to_string() + &platform.combination().to_string() + " =="); - - let petscan_query_url = - "https://petscan.wmflabs.org/?".to_string() + &platform.form_parameters().to_string(); - let petscan_query_url_no_doit = "https://petscan.wmflabs.org/?".to_string() - + &platform.form_parameters().to_string_no_doit(); - - let utc: DateTime = Utc::now(); - rows.push(format!("Last updated on {}.", utc.to_rfc2822())); - - rows.push(format!( - "[{} Regenerate this table] or [{} edit the query].\n", - &petscan_query_url, &petscan_query_url_no_doit - )); - rows.push("{| border=1 class='wikitable'".to_string()); - let mut header: Vec<(&str, &str)> = vec![ - ("title", "Title"), - ("page_id", "Page ID"), - ("namespace", "Namespace"), - ("size", "Size (bytes)"), - ("timestamp", "Last change"), - ]; - if params.show_wikidata_item { - header.push(("wikidata_item", "Wikidata")); - } - if params.file_data { - self.file_data_keys() - .iter() - .for_each(|k| header.push((k, k))); - } - if params.do_output_redlinks { - header = vec![("redlink_count", "Wanted"), ("title", "Title")]; - } - let mut header: Vec<(String, String)> = header - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - for col in self.get_initial_columns(¶ms) { - if !header.iter().any(|(k, _)| col == k) && col != "number" { - header.push((col.to_string(), col.to_string())); - } - } - rows.push( - "!".to_string() - + &header - .iter() - .map(|(_, v)| v.clone()) - .collect::>() - .join(" !! "), - ); - - for entry in entries { - params.row_number += 1; - rows.push("|-".to_string()); - let row = self.row_from_entry(&entry, &header, ¶ms, platform); - let row = "| ".to_string() + &row.join(" || "); - rows.push(row); - } - - rows.push("|}".to_string()); - - Ok(MyResponse { - s: rows.join("\n"), - content_type: ContentType::Plain, - }) - } - - fn render_cell_title(&self, entry: &PageListEntry, params: &RenderParams) -> String { - if entry.title().namespace_id() == 6 { - if params.thumbnails_in_wiki_output { - match entry.title().full_pretty(¶ms.api) { - Some(file) => format!("[[{}|120px|]]", &file), - None => format!("[[File:{}|120px|]]", entry.title().pretty()), - } - } else { - match entry.title().full_pretty(¶ms.api) { - Some(file) => format!("[[:{}|]]", &file), - None => format!("[[:File:{}|]]", entry.title().pretty()), - } - } - } else { - self.render_wikilink(entry, params) - } - } - - fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - match entry.get_wikidata_item() { - Some(q) => format!("[[:d:{}|]]", q), - None => String::new(), - } - } - - fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { - format!("[[User:{user}|]]") - } - - fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { - match image { - Some(img) => format!("[[File:{}|120px|]]", img), - None => String::new(), - } - } - - fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().namespace_id().to_string() - } -} - -impl RenderWiki { - pub fn new() -> Box { - Box::new(Self {}) - } - - fn render_wikilink(&self, entry: &PageListEntry, params: &RenderParams) -> String { - if params.is_wikidata { - match &entry.get_wikidata_label() { - Some(label) => format!("[[{}|{}]]", &entry.title().pretty(), label), - None => format!("[[{}]]", entry.title().pretty()), - } - } else { - let mut ret = "[[".to_string(); - if entry.title().namespace_id() == 14 { - ret += ":"; - } - ret += &entry - .title() - .full_pretty(¶ms.api) - .unwrap_or_else(|| entry.title().pretty().to_string()); - if !params.do_output_redlinks { - ret += "|"; - } - ret += "]]"; - ret - } - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders CSV and TSV -pub struct RenderTSV { - separator: String, -} - -#[async_trait] -impl Render for RenderTSV { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let mut params = RenderParams::new(platform, wiki).await?; - let mut rows: Vec = vec![]; - let mut header: Vec<(&str, &str)> = vec![ - ("number", "number"), - ("title", "title"), - ("page_id", "pageid"), - ("namespace", "namespace"), - ("size", "length"), - ("timestamp", "touched"), - ]; - if params.show_wikidata_item { - header.push(("wikidata_item", "Wikidata")); - } - if params.file_data { - self.file_data_keys() - .iter() - .for_each(|k| header.push((k, k))); - } - let mut header: Vec<(String, String)> = header - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - for col in self.get_initial_columns(¶ms) { - if !header.iter().any(|(k, _)| col == k) && col != "number" { - header.push((col.to_string(), col.to_string())); - } - } - rows.push( - header - .iter() - .map(|(_, v)| self.escape_cell(v)) - .collect::>() - .join(&self.separator), - ); - - for entry in entries { - params.row_number += 1; - let row = self.row_from_entry(&entry, &header, ¶ms, platform); - let row: Vec = row.iter().map(|s| self.escape_cell(s)).collect(); - let row = row.join(&self.separator); - rows.push(row); - } - - Ok(MyResponse { - s: rows.join("\n"), - content_type: match self.separator.as_str() { - "," => ContentType::CSV, - "\t" => ContentType::TSV, - _ => ContentType::Plain, // Fallback - }, - }) - } - - fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().with_underscores() - } - - fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - match entry.get_wikidata_item() { - Some(q) => q, - None => String::new(), - } - } - - fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { - user.to_string() - } - - fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { - match image { - Some(img) => img.to_string(), - None => String::new(), - } - } - - fn render_cell_namespace(&self, entry: &PageListEntry, params: &RenderParams) -> String { - entry - .title() - .namespace_name(¶ms.api) - .unwrap_or("UNKNOWN_NAMESPACE") - .to_string() - } -} - -impl RenderTSV { - pub fn new(separator: &str) -> Box { - Box::new(Self { - separator: separator.to_string(), - }) - } - - fn escape_cell(&self, s: &str) -> String { - if self.separator == "," { - format!("\"{}\"", s.replace('\"', "\\\"")) - } else { - s.replace('\t', " ") - } - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders HTML -pub struct RenderHTML {} - -#[async_trait] -impl Render for RenderHTML { - async fn response( - &self, - platform: &Platform, - wiki: &str, - mut entries: Vec, - ) -> Result { - let mut params = RenderParams::new(platform, wiki).await?; - let mut rows = vec![]; - - rows.push("
".to_string()); - rows.push(""); - - /* - // TODO - for ( auto a:platform->errors ) { - ret += "" ; - } - */ - - // Wikidata edit box? - if params.do_output_redlinks { - // Yeah no - } else if wiki != "wikidatawiki" && platform.get_param_blank("wikidata_item") == "without" { - rows.push("
".to_string()); - params.use_autolist = true; - params.autolist_creator_mode = true; - } else if wiki == "wikidatawiki" { - rows.push("
".to_string()); - params.use_autolist = true; - } else if wiki != "wikidatawiki" && params.do_output_redlinks { - rows.push("
".to_string()); - params.use_autolist = true; - params.autolist_creator_mode = true; - } else if wiki == "commonswiki" && entries.iter().all(|e| e.title().namespace_id() == 6) { - // If it's Commons, and all results are files - rows.push("
".to_string()); - params.use_autolist = true; - params.autolist_wiki_server = AUTOLIST_COMMONS.to_string(); - } - - if params.use_autolist { - rows.push(format!( - "", - params.autolist_wiki_server - )); - } - - // Gallery? - let only_files = entries - .iter() - .any(|entry| entry.title().namespace_id() == 6); - if only_files && (!params.use_autolist || params.autolist_wiki_server == AUTOLIST_COMMONS) { - rows.push( "
".to_string()); - rows.push( "".to_string()); - rows.push( "".to_string()); - rows.push("
".to_string()); - } - - rows.push(format!( - "

", - entries.len() - )); - - for warning in platform.warnings()? { - rows.push(format!( - "
{}
", - warning - )); - } - - let header = self.get_initial_columns(¶ms); - rows.push("
".to_string()); - rows.push(self.get_table_header(&header, ¶ms)); - rows.push("".to_string()); - - let header: Vec<(String, String)> = header - .iter() - .map(|x| (x.to_string(), x.to_string())) - .collect(); - - let entries_len = entries.len(); - let mut output = rows.join("\n"); - entries.drain(..).for_each(|entry| { - if params.row_number < MAX_HTML_RESULTS { - params.row_number += 1; - let row = self.row_from_entry(&entry, &header, ¶ms, platform); - let row = self.render_html_row(&row, &header); - output += &row; - } - }); - - let mut rows = vec![]; - rows.push("
".to_string()); - - if entries_len > MAX_HTML_RESULTS { - rows.push( format!("
Only the first {} results are shown in HTML, so as to not crash your browser; other formats will have complete results.
",MAX_HTML_RESULTS) ); - } - - if let Some(duration) = platform.query_time() { - let seconds = (duration.as_millis() as f32) / 1000_f32; - rows.push(format!( - "
", - seconds - )); - } - rows.push("".to_string()); - output += &rows.join("\n"); - let interface_language = platform.get_param_default("interface_language", "en"); - let state = platform.state(); - let html = state.get_main_page(interface_language); - let html = html.replace( - "", - encode_minimal(&platform.form_parameters().to_string()).as_str(), - ); - let mut html = html.replace("", &output); - if let Some(psid) = platform.psid { - let psid_string = format!("{}", psid); - html = html.replace("", &psid_string); - }; - - Ok(MyResponse { - s: html, - content_type: ContentType::HTML, - }) - } - - fn render_cell_title(&self, entry: &PageListEntry, params: &RenderParams) -> String { - self.render_wikilink( - entry.title(), - ¶ms.wiki, - &entry.get_wikidata_label(), - params, - true, - &entry.get_wikidata_description(), - entry.redlink_count.is_some(), - ) - } - fn render_cell_wikidata_item(&self, entry: &PageListEntry, params: &RenderParams) -> String { - match entry.get_wikidata_item() { - Some(q) => self.render_wikilink( - &Title::new(&q, 0), - "wikidatawiki", - &None, - params, - false, - &entry.get_wikidata_description(), - entry.redlink_count.is_some(), - ), - None => String::new(), - } - } - fn render_user_name(&self, user: &str, params: &RenderParams) -> String { - let title = Title::new(user, 2); - self.render_wikilink(&title, ¶ms.wiki, &None, params, false, &None, false) - } - fn render_cell_image(&self, image: &Option, params: &RenderParams) -> String { - match image { - Some(img) => { - let thumnail_size = "120px"; // TODO - let server_url = match params.state.get_server_url_for_wiki(¶ms.wiki) { - Ok(url) => url, - _ => return String::new(), - }; - let file = self.escape_attribute(img); - let url = format!("{}/wiki/File:{}", &server_url, &file); - let src = format!( - "{}/wiki/Special:Redirect/file/{}?width={}", - &server_url, &file, &thumnail_size - ); - format!("
",url,src) - } - None => String::new(), - } - } - fn render_cell_namespace(&self, entry: &PageListEntry, params: &RenderParams) -> String { - let namespace_name = entry - .title() - .namespace_name(¶ms.api) - .unwrap_or("UNKNOWN NAMESPACE") - .to_string(); - if namespace_name.is_empty() { - "Article".to_string() - } else { - namespace_name - } - } - - fn render_cell_fileusage(&self, entry: &PageListEntry, params: &RenderParams) -> String { - match &entry.get_file_info() { - Some(fi) => { - let mut rows: Vec = vec![]; - for fu in &fi.file_usage { - let html = "
".to_string() - + &fu.wiki().to_owned() - + ":" - + &self.render_wikilink( - fu.title(), - fu.wiki(), - &None, - params, - false, - &entry.get_wikidata_description(), - entry.redlink_count.is_some(), - ) - + "
"; - rows.push(html); - } - rows.join("\n") - } - None => String::new(), - } - } - - fn render_coordinates(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - match &entry.get_coordinates() { - Some(coords) => { - let lang = "en"; // TODO - let mut url = format!( - "https://tools.wmflabs.org/geohack/geohack.php?language={}¶ms=", - &lang - ); - if coords.lat < 0.0 { - url += &format!("{}_S_", -coords.lat); - } else { - url += &format!("{}_N_", coords.lat); - }; - if coords.lon < 0.0 { - url += &format!("{}_W_", -coords.lon) - } else { - url += &format!("{}_E_", coords.lon) - }; - url += "globe:earth"; - format!( - "{}/{}", - url, &coords.lat, &coords.lon - ) - } - None => String::new(), - } - } - - fn render_cell_checkbox( - &self, - entry: &PageListEntry, - params: &RenderParams, - platform: &Platform, - ) -> String { - let mut q = String::new(); - let checked: &str; - if params.autolist_creator_mode { - if platform.label_exists(entry.title().pretty()) || entry.title().pretty().contains('(') - { - checked = ""; - } else { - checked = "checked"; - } - q = match SystemTime::now().duration_since(UNIX_EPOCH) { - Ok(since) => format!("create_item_{}_{}", ¶ms.row_number, since.as_micros()), - _ => String::new(), - } - } else { - if params.autolist_wiki_server == AUTOLIST_COMMONS { - q = match entry.page_id { - Some(id) => id.to_string(), - None => String::new(), - } - } else if params.autolist_wiki_server == AUTOLIST_WIKIDATA { - q = entry.title().pretty().to_string(); - if q.is_empty() { - panic!("RenderHTML::render_cell_checkbox q is blank") - } - q.remove(0); - } else { - // TODO paranoia - } - checked = "checked"; - }; - format!( - "", - &q, &q, &checked - ) - } -} - -impl RenderHTML { - pub fn new() -> Box { - Box::new(Self {}) - } - - fn escape_attribute(&self, s: &str) -> String { - FormParameters::percent_encode(s) - .replace('<', "<") - .replace('>', ">") - .replace('"', """) - .replace('\'', "'") - } - - /* trunk-ignore(clippy/too_many_arguments) */ - fn render_wikilink( - &self, - title: &Title, - wiki: &str, - alt_label: &Option, - params: &RenderParams, - is_page_link: bool, - wikidata_description: &Option, - is_redlink: bool, - ) -> String { - let server = match params.state.get_server_url_for_wiki(wiki) { - Ok(url) => url, - Err(_e) => return String::new(), - }; - let full_title = match title.full_with_underscores(¶ms.api) { - Some(ft) => ft, - None => format!("{:?}", title), - }; - let full_title_pretty = match title.full_pretty(¶ms.api) { - Some(ft) => ft, - None => format!("{:?}", title), - }; - let url = server + "/wiki/" + &self.escape_attribute(&full_title); - let label = match alt_label { - Some(label) => label.to_string(), - None => match is_page_link { - true => title.pretty().to_string(), - false => full_title_pretty, - }, - }; - let mut ret = "" + &label + ""); - - // TODO properties? - if is_page_link && wiki == "wikidatawiki" && title.namespace_id() == 0 { - ret += &format!(" [{}]", title.pretty()); - match &wikidata_description { - Some(desc) => ret += &format!("
{}
", &desc), - None => {} - } - } - ret - } - - fn render_html_row(&self, row: &[String], header: &[(String, String)]) -> String { - let mut ret = "".to_string(); - for (col_num, item) in row.iter().enumerate() { - let header_key = match header.get(col_num) { - Some(x) => x.0.to_string(), - None => "UNKNOWN".to_string(), - }; - let class_name = match header_key.as_str() { - "number" | "page_id" | "timestamp" | "size" => "text-right text-monospace", - "title" => "link_container", - _ => "", - }; - if class_name.is_empty() { - ret += ""; - } else { - ret += ""; - } - ret += &item; - ret += ""; - } - ret += ""; - ret - } - - fn get_table_header(&self, columns: &[&str], _params: &RenderParams) -> String { - let mut ret = "".to_string(); - ret += ""; - let fdk = self.file_data_keys(); - for col in columns { - let col = col.to_string(); - let x = match col.as_str() { - "checkbox" => "".to_string(), - "number" => "".to_string(), - "image" => "".to_string(), - "title" => "".to_string(), - "page_id" => "".to_string(), - "namespace" => "".to_string(), - "linknumber" => "".to_string(), - "redlink_count" => "".to_string(), - "size" => "".to_string(), - "timestamp" => "".to_string(), - "wikidata_item" => "".to_string(), - "coordinates" => "".to_string(), - "defaultsort" => { - "".to_string() - } - "disambiguation" => "".to_string(), - "incoming_links" => "".to_string(), - "sitelinks" => "".to_string(), - "fileusage" => "".to_string(), - other => { - // File data etc. - if fdk.contains(&other) { - format!("", &other) - } else { - format!("", &other) - } - } - }; - ret += &x.to_string(); - } - ret += ""; - ret - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders JSON -pub struct RenderJSON {} - -#[async_trait] -impl Render for RenderJSON { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let mut params = RenderParams::new(platform, wiki).await?; - let mut content_type = ContentType::JSON; - if params.json_pretty { - content_type = ContentType::Plain; - } - params.file_usage = params.giu || params.file_usage; - if params.giu { - params.json_sparse = false; - } - - // Header - let mut header: Vec<(&str, &str)> = vec![ - ("title", "Title"), - ("page_id", "Page ID"), - ("namespace", "Namespace"), - ("size", "Size (bytes)"), - ("timestamp", "Last change"), - ]; - if params.show_wikidata_item { - header.push(("wikidata_item", "Wikidata")); - } - let mut header: Vec<(String, String)> = header - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - for col in self.get_initial_columns(¶ms) { - if !header.iter().any(|(k, _)| col == k) && col != "number" { - header.push((col.to_string(), col.to_string())); - } - } - let mut header: Vec<(String, String)> = header - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - for col in self.get_initial_columns(¶ms) { - if !header.iter().any(|(k, _)| col == k) && col != "number" { - header.push((col.to_string(), col.to_string())); - } - } - if params.file_data { - self.file_data_keys() - .iter() - .for_each(|k| header.push((k.to_string(), k.to_string()))); - } - - let value: Value = match params.json_output_compatability.as_str() { - "quick-intersection" => self.quick_intersection(platform, entries, ¶ms, &header), - _ => self.cat_scan(platform, entries, ¶ms, &header), // Default - }; - - let mut out: String = String::new(); - if !params.json_callback.is_empty() { - out += ¶ms.json_callback; - out += "("; - } - - let output = if params.json_pretty { - ::serde_json::to_string_pretty(&value) - } else { - ::serde_json::to_string(&value) - }; - match output { - Ok(o) => out += &o, - Err(e) => return Err(format!("JSON encoding failed: {:?}", e)), - }; - - if !params.json_callback.is_empty() { - out += ")"; - } - - Ok(MyResponse { - s: out, - content_type, - }) - } - - fn render_cell_wikidata_item(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - "N/A".to_string() - } - fn render_user_name(&self, _user: &str, _params: &RenderParams) -> String { - "N/A".to_string() - } - fn render_cell_image(&self, _image: &Option, _params: &RenderParams) -> String { - "N/A".to_string() - } - fn render_cell_namespace(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - "N/A".to_string() - } - fn render_cell_title(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - "N/A".to_string() - } -} - -impl RenderJSON { - pub fn new() -> Box { - Box::new(Self {}) - } - - fn get_query_string(&self, platform: &Platform) -> String { - "https://petscan.wmflabs.org/?".to_string() + &platform.form_parameters().to_string() - } - - fn cat_scan( - &self, - platform: &Platform, - entries: Vec, - params: &RenderParams, - header: &[(String, String)], - ) -> Value { - let entry_data: Vec = if params.json_sparse { - entries - .iter() - .filter_map(|entry| { - Some(json!(entry.title().full_with_underscores(¶ms.api)?)) - }) - .collect() - } else { - entries.iter().map(|entry| { - let mut o = json!({ - "n":"page", - "title":entry.title().with_underscores(), - "id":entry.page_id.unwrap_or(0), - "namespace":entry.title().namespace_id(), - "len":entry.page_bytes.unwrap_or(0), - "touched":entry.get_page_timestamp().unwrap_or_default(), - "nstext":params.api.get_canonical_namespace_name(entry.title().namespace_id()).unwrap_or("") - }); - if let Some(q) = entry.get_wikidata_item() { - o["q"] = json!(q); - o["metadata"]["wikidata"] = json!(q); - } - self.add_metadata(&mut o, entry, header); - if params.file_data { - match &o["metadata"].get("fileusage") { - Some(_) => o["gil"] = o["metadata"]["fileusage"].to_owned(), - None => {} - } - self.file_data_keys().iter().for_each(|k|{ - match &o["metadata"].get(k) { - Some(_) => o[k] = o["metadata"][k].to_owned(), - None => {} - } - }); - } - o - }).collect() - }; - let seconds: f32 = match platform.query_time() { - Some(duration) => (duration.as_millis() as f32) / (1000_f32), - None => 0.0, - }; - json!({"n":"result","a":{"query":self.get_query_string(platform),"querytime_sec":seconds},"*":[{"n":"combination","a":{"type":platform.get_param_default("combination","subset"),"*":entry_data}}]}) - } - - fn quick_intersection( - &self, - platform: &Platform, - entries: Vec, - params: &RenderParams, - header: &[(String, String)], - ) -> Value { - let mut ret = json!({ - "namespaces":{}, - "status":"OK", - "start":0, - "max":entries.len()+1, - "query":self.get_query_string(platform), - "pagecount":entries.len(), - "pages":[] - }); - if let Some(duration) = platform.query_time() { - ret["querytime"] = json!((duration.as_millis() as f32) / 1000_f32) - } - - // Namespaces - if let Some(namespaces) = params.api.get_site_info()["query"]["namespaces"].as_object() { - for (k, v) in namespaces { - if let Some(ns_local_name) = v["*"].as_str() { - ret["namespaces"][k] = json!(ns_local_name) - } - } - } - - // Entries - if params.json_sparse { - ret["pages"] = entries - .iter() - .filter_map(|entry| entry.title().full_with_underscores(¶ms.api)) - .collect(); - } else { - ret["pages"] = entries - .iter() - .map(|entry| { - let mut o = json!({ - "page_id" : entry.page_id.unwrap_or(0), - "page_namespace" : entry.title().namespace_id(), - "page_title" : entry.title().with_underscores(), - "page_latest" : entry.get_page_timestamp().unwrap_or_default(), - "page_len" : entry.page_bytes.unwrap_or(0), - //"meta" : {} - }); - if params.giu || params.file_usage { - if let Some(fu) = self.get_file_usage(entry) { - o["giu"] = fu - } - } - self.add_metadata(&mut o, entry, header); - o - }) - .collect(); - } - - ret - } - - fn get_file_info_value(&self, entry: &PageListEntry, key: &str) -> Option { - match &entry.get_file_info() { - Some(fi) => match key { - "img_size" => fi.img_size.as_ref().map(|s| json!(s)), - "img_width" => fi.img_width.as_ref().map(|s| json!(s)), - "img_height" => fi.img_height.as_ref().map(|s| json!(s)), - "img_media_type" => fi.img_media_type.as_ref().map(|s| json!(s)), - "img_major_mime" => fi.img_major_mime.as_ref().map(|s| json!(s)), - "img_minor_mime" => fi.img_minor_mime.as_ref().map(|s| json!(s)), - "img_user_text" => fi.img_user_text.as_ref().map(|s| json!(s)), - "img_timestamp" => fi.img_timestamp.as_ref().map(|s| json!(s)), - "img_sha1" => fi.img_sha1.as_ref().map(|s| json!(s)), - other => { - println!("KEY NOT FOUND:{}", &other); - None - } - }, - None => None, - } - } - - fn get_file_usage(&self, entry: &PageListEntry) -> Option { - match &entry.get_file_info() { - Some(fi) => match fi.file_usage.is_empty() { - true => None, - false => Some( - fi.file_usage - .iter() - .map(|fu| { - json!({ - "ns":fu.title().namespace_id(), - "page":fu.title().with_underscores(), - "wiki":fu.wiki() - }) - }) - .collect(), - ), - }, - None => None, - } - } - - fn get_file_usage_as_string(&self, entry: &PageListEntry) -> Option { - match &entry.get_file_info() { - Some(fi) => match fi.file_usage.is_empty() { - true => None, - false => Some(json!(fi - .file_usage - .iter() - .map(|fu| { - format!( - "{}:{}:{}:{}", - fu.wiki(), - fu.title().namespace_id(), - fu.namespace_name(), - fu.title().with_underscores() - ) - }) - .collect::>() - .join("|"))), - }, - None => None, - } - } - - fn add_metadata(&self, o: &mut Value, entry: &PageListEntry, header: &[(String, String)]) { - header.iter().for_each(|(head, _)| { - let value = match head.to_string().as_str() { - "checkbox" | "number" | "page_id" | "title" | "namespace" | "size" - | "timestamp" => None, - "image" => entry.get_page_image().map(|s| json!(s)), - "linknumber" => entry.link_count.as_ref().map(|s| json!(s)), - "wikidata" => entry.get_wikidata_item().map(|s| json!(s)), - "defaultsort" => entry.get_defaultsort().map(|s| json!(s)), - "disambiguation" => Some(entry.disambiguation.as_json()), - "incoming_links" => entry.incoming_links.as_ref().map(|s| json!(s)), - "sitelinks" => entry.sitelink_count.as_ref().map(|s| json!(s)), - "coordinates" => entry - .get_coordinates() - .as_ref() - .map(|coord| json!(format!("{}/{}", coord.lat, coord.lon))), - "fileusage" => self.get_file_usage_as_string(entry), - other => self.get_file_info_value(entry, other), - }; - if let Some(v) = value { - o["metadata"][head] = v - } - }); - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders PagePile -pub struct RenderPagePile {} - -#[async_trait] -impl Render for RenderPagePile { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; - let url = "https://pagepile.toolforge.org/api.php"; - let data: String = entries - .iter() - .map(|e| format!("{}\t{}", e.title().pretty(), e.title().namespace_id())) - .collect::>() - .join("\n"); - let mut params: HashMap = - [("action", "create_pile_with_data"), ("wiki", wiki)] - .iter() - .map(|x| (x.0.to_string(), x.1.to_string())) - .collect(); - params.insert("data".to_string(), data); - - let result = match api.query_raw(url, ¶ms, "POST").await { - Ok(r) => r, - Err(e) => return Err(format!("PagePile generation failed: {:?}", e)), - }; - let json: serde_json::value::Value = match serde_json::from_str(&result) { - Ok(j) => j, - Err(e) => { - return Err(format!( - "PagePile generation did not return valid JSON: {:?}", - e - )) - } - }; - let pagepile_id = match json["pile"]["id"].as_u64() { - Some(id) => id, - None => { - return Err(format!( - "PagePile generation did not return a pagepile ID: {:?}", - json.clone() - )) - } - }; - let url = format!( - "https://tools.wmflabs.org/pagepile/api.php?action=get_data&id={}", - pagepile_id - ); - let html = format!("

Redirect

The document can be found here.",&url,&url) ; - Ok(MyResponse { - s: html, - content_type: ContentType::HTML, - }) - } - - fn render_cell_title(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - String::new() - } - fn render_cell_wikidata_item(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - String::new() - } - fn render_user_name(&self, _user: &str, _params: &RenderParams) -> String { - String::new() - } - fn render_cell_image(&self, _image: &Option, _params: &RenderParams) -> String { - String::new() - } - fn render_cell_namespace(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { - String::new() - } -} - -impl RenderPagePile { - pub fn new() -> Box { - Box::new(Self {}) - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders KML -pub struct RenderKML {} - -#[async_trait] -impl Render for RenderKML { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let params = RenderParams::new(platform, wiki).await?; - let server = match params.state.get_server_url_for_wiki(wiki) { - Ok(url) => url, - Err(_e) => String::new(), - }; - let mut kml = String::new(); - kml += r#" - "#; - - for entry in entries { - if let Some(coords) = &entry.get_coordinates() { - let title = entry.title(); - let label = if let "wikidatawiki" = wiki { - match entry.get_wikidata_label() { - Some(s) => s, - None => title.pretty().to_string(), - } - } else { - title.pretty().to_string() - }; - kml += r#""#; - kml += format!("{}", self.escape_xml(&label)).as_str(); - if let Some(desc) = entry.get_wikidata_description() { - kml += - format!("{}", self.escape_xml(&desc)).as_str(); - } - - kml += ""; - if let Some(q) = entry.get_wikidata_item() { - kml += format!( - "{}", - self.escape_xml(&q) - ) - .as_str(); - } - - let full_title = match title.full_with_underscores(¶ms.api) { - Some(ft) => ft, - None => format!("{:?}", title), - }; - let url = format!("{}/wiki/{}", &server, &self.escape_attribute(&full_title)); - kml += format!( - "{}", - self.escape_xml(&url) - ) - .as_str(); - - if let Some(img) = entry.get_page_image() { - let file = self.escape_attribute(&img); - let src = format!( - "{}/wiki/Special:Redirect/file/{}?width={}", - &server, &file, 120 - ); - kml += format!( - "{}", - self.escape_xml(&src) - ) - .as_str(); - } - - kml += ""; - - kml += format!( - "{}, {}, 0.", - coords.lon, coords.lat - ) - .as_str(); - kml += r#""#; - } - } - - kml += r#""#; - - Ok(MyResponse { - s: kml, - content_type: ContentType::Plain, - }) - } - - fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().pretty().to_string() - } - - fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - match entry.get_wikidata_item() { - Some(q) => format!("[[:d:{}|]]", q), - None => String::new(), - } - } - - fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { - format!("[[User:{user}|]]") - } - - fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { - match image { - Some(img) => format!("[[File:{}|120px|]]", img), - None => String::new(), - } - } - - fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().namespace_id().to_string() - } -} - -impl RenderKML { - pub fn new() -> Box { - Box::new(Self {}) - } - - fn escape_xml(&self, s: &str) -> String { - s.replace('<', "<") - .replace('>', ">") - .replace('"', """) - .replace('\'', "'") - .replace('&', "&") - } - - fn escape_attribute(&self, s: &str) -> String { - FormParameters::percent_encode(s) - .replace('<', "<") - .replace('>', ">") - .replace('"', """) - .replace('\'', "'") - } -} - -//________________________________________________________________________________________________________________________ - -/// Renders PlainText -pub struct RenderPlainText {} - -#[async_trait] -impl Render for RenderPlainText { - async fn response( - &self, - platform: &Platform, - wiki: &str, - entries: Vec, - ) -> Result { - let params = RenderParams::new(platform, wiki).await?; - let output = entries - .iter() - .filter_map(|entry| entry.title().full_pretty(¶ms.api)) - .collect::>() - .join("\n"); - Ok(MyResponse { - s: output, - content_type: ContentType::Plain, - }) - } - - fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().pretty().to_string() - } - - fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - match entry.get_wikidata_item() { - Some(q) => format!("[[:d:{}|]]", q), - None => String::new(), - } - } - - fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { - format!("[[User:{user}|]]") - } - - fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { - match image { - Some(img) => format!("[[File:{}|120px|]]", img), - None => String::new(), - } - } - - fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { - entry.title().namespace_id().to_string() - } -} - -impl RenderPlainText { - pub fn new() -> Box { - Box::new(Self {}) - } -} diff --git a/src/render_html.rs b/src/render_html.rs new file mode 100644 index 0000000..e068c5e --- /dev/null +++ b/src/render_html.rs @@ -0,0 +1,433 @@ +use crate::form_parameters::FormParameters; +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::{Render, AUTOLIST_COMMONS, AUTOLIST_WIKIDATA}; +use crate::render_params::RenderParams; +use async_trait::async_trait; +use htmlescape::encode_minimal; +use std::time::{SystemTime, UNIX_EPOCH}; +use wikibase::mediawiki::title::Title; + +static MAX_HTML_RESULTS: usize = 10000; + +/// Renders HTML +pub struct RenderHTML {} + +#[async_trait] +impl Render for RenderHTML { + async fn response( + &self, + platform: &Platform, + wiki: &str, + mut entries: Vec, + ) -> Result { + let mut params = RenderParams::new(platform, wiki).await?; + let mut rows = vec![]; + + rows.push("
".to_string()); + rows.push(""); + + /* + // TODO + for ( auto a:platform->errors ) { + ret += "" ; + } + */ + + // Wikidata edit box? + if params.do_output_redlinks() { + // Yeah no + } else if wiki != "wikidatawiki" && platform.get_param_blank("wikidata_item") == "without" { + rows.push("
".to_string()); + *params.use_autolist_mut() = true; + *params.autolist_creator_mode_mut() = true; + } else if wiki == "wikidatawiki" { + rows.push("
".to_string()); + *params.use_autolist_mut() = true; + } else if wiki != "wikidatawiki" && params.do_output_redlinks() { + rows.push("
".to_string()); + *params.use_autolist_mut() = true; + *params.autolist_creator_mode_mut() = true; + } else if wiki == "commonswiki" && entries.iter().all(|e| e.title().namespace_id() == 6) { + // If it's Commons, and all results are files + rows.push("
".to_string()); + *params.use_autolist_mut() = true; + params.set_autolist_wiki_server(AUTOLIST_COMMONS); + } + + if params.use_autolist() { + rows.push(format!( + "", + params.autolist_wiki_server() + )); + } + + // Gallery? + let only_files = entries + .iter() + .any(|entry| entry.title().namespace_id() == 6); + if only_files + && (!params.use_autolist() || params.autolist_wiki_server() == AUTOLIST_COMMONS) + { + rows.push( "
".to_string()); + rows.push( "".to_string()); + rows.push( "".to_string()); + rows.push("
".to_string()); + } + + rows.push(format!( + "

", + entries.len() + )); + + for warning in platform.warnings()? { + rows.push(format!( + "
{}
", + warning + )); + } + + let header = self.get_initial_columns(¶ms); + rows.push("
".to_string()); + rows.push(self.get_table_header(&header, ¶ms)); + rows.push("
".to_string()); + + let header: Vec<(String, String)> = header + .iter() + .map(|x| (x.to_string(), x.to_string())) + .collect(); + + let entries_len = entries.len(); + let mut output = rows.join("\n"); + entries.drain(..).for_each(|entry| { + if params.row_number() < MAX_HTML_RESULTS { + *params.row_number_mut() += 1; + let row = self.row_from_entry(&entry, &header, ¶ms, platform); + let row = self.render_html_row(&row, &header); + output += &row; + } + }); + + let mut rows = vec![]; + rows.push("
#UNKNOWN:'{}'
".to_string()); + + if entries_len > MAX_HTML_RESULTS { + rows.push( format!("
Only the first {} results are shown in HTML, so as to not crash your browser; other formats will have complete results.
",MAX_HTML_RESULTS) ); + } + + if let Some(duration) = platform.query_time() { + let seconds = (duration.as_millis() as f32) / 1000_f32; + rows.push(format!( + "
", + seconds + )); + } + rows.push("".to_string()); + output += &rows.join("\n"); + let interface_language = platform.get_param_default("interface_language", "en"); + let state = platform.state(); + let html = state.get_main_page(interface_language); + let html = html.replace( + "", + encode_minimal(&platform.form_parameters().to_string()).as_str(), + ); + let mut html = html.replace("", &output); + if let Some(psid) = platform.psid { + let psid_string = format!("{}", psid); + html = html.replace("", &psid_string); + }; + + Ok(MyResponse { + s: html, + content_type: ContentType::HTML, + }) + } + + fn render_cell_title(&self, entry: &PageListEntry, params: &RenderParams) -> String { + self.render_wikilink( + entry.title(), + params.wiki(), + &entry.get_wikidata_label(), + params, + true, + &entry.get_wikidata_description(), + entry.redlink_count.is_some(), + ) + } + fn render_cell_wikidata_item(&self, entry: &PageListEntry, params: &RenderParams) -> String { + match entry.get_wikidata_item() { + Some(q) => self.render_wikilink( + &Title::new(&q, 0), + "wikidatawiki", + &None, + params, + false, + &entry.get_wikidata_description(), + entry.redlink_count.is_some(), + ), + None => String::new(), + } + } + fn render_user_name(&self, user: &str, params: &RenderParams) -> String { + let title = Title::new(user, 2); + self.render_wikilink(&title, params.wiki(), &None, params, false, &None, false) + } + fn render_cell_image(&self, image: &Option, params: &RenderParams) -> String { + match image { + Some(img) => { + let thumnail_size = "120px"; // TODO + let server_url = match params.state().get_server_url_for_wiki(params.wiki()) { + Ok(url) => url, + _ => return String::new(), + }; + let file = self.escape_attribute(img); + let url = format!("{}/wiki/File:{}", &server_url, &file); + let src = format!( + "{}/wiki/Special:Redirect/file/{}?width={}", + &server_url, &file, &thumnail_size + ); + format!("
",url,src) + } + None => String::new(), + } + } + fn render_cell_namespace(&self, entry: &PageListEntry, params: &RenderParams) -> String { + let namespace_name = entry + .title() + .namespace_name(params.api()) + .unwrap_or("UNKNOWN NAMESPACE") + .to_string(); + if namespace_name.is_empty() { + "Article".to_string() + } else { + namespace_name + } + } + + fn render_cell_fileusage(&self, entry: &PageListEntry, params: &RenderParams) -> String { + match &entry.get_file_info() { + Some(fi) => { + let mut rows: Vec = vec![]; + for fu in &fi.file_usage { + let html = "
".to_string() + + &fu.wiki().to_owned() + + ":" + + &self.render_wikilink( + fu.title(), + fu.wiki(), + &None, + params, + false, + &entry.get_wikidata_description(), + entry.redlink_count.is_some(), + ) + + "
"; + rows.push(html); + } + rows.join("\n") + } + None => String::new(), + } + } + + fn render_coordinates(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + match &entry.get_coordinates() { + Some(coords) => { + let lang = "en"; // TODO + let mut url = format!( + "https://tools.wmflabs.org/geohack/geohack.php?language={}¶ms=", + &lang + ); + if coords.lat < 0.0 { + url += &format!("{}_S_", -coords.lat); + } else { + url += &format!("{}_N_", coords.lat); + }; + if coords.lon < 0.0 { + url += &format!("{}_W_", -coords.lon) + } else { + url += &format!("{}_E_", coords.lon) + }; + url += "globe:earth"; + format!( + "{}/{}", + url, &coords.lat, &coords.lon + ) + } + None => String::new(), + } + } + + fn render_cell_checkbox( + &self, + entry: &PageListEntry, + params: &RenderParams, + platform: &Platform, + ) -> String { + let mut q = String::new(); + let checked: &str; + if params.autolist_creator_mode() { + if platform.label_exists(entry.title().pretty()) || entry.title().pretty().contains('(') + { + checked = ""; + } else { + checked = "checked"; + } + q = match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(since) => format!("create_item_{}_{}", params.row_number(), since.as_micros()), + _ => String::new(), + } + } else { + if params.autolist_wiki_server() == AUTOLIST_COMMONS { + q = match entry.page_id { + Some(id) => id.to_string(), + None => String::new(), + } + } else if params.autolist_wiki_server() == AUTOLIST_WIKIDATA { + q = entry.title().pretty().to_string(); + if q.is_empty() { + panic!("RenderHTML::render_cell_checkbox q is blank") + } + q.remove(0); + } else { + // TODO paranoia + } + checked = "checked"; + }; + format!( + "", + &q, &q, &checked + ) + } +} + +impl RenderHTML { + pub fn new() -> Box { + Box::new(Self {}) + } + + fn escape_attribute(&self, s: &str) -> String { + FormParameters::percent_encode(s) + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") + } + + /* trunk-ignore(clippy/too_many_arguments) */ + fn render_wikilink( + &self, + title: &Title, + wiki: &str, + alt_label: &Option, + params: &RenderParams, + is_page_link: bool, + wikidata_description: &Option, + is_redlink: bool, + ) -> String { + let server = match params.state().get_server_url_for_wiki(wiki) { + Ok(url) => url, + Err(_e) => return String::new(), + }; + let full_title = match title.full_with_underscores(params.api()) { + Some(ft) => ft, + None => format!("{:?}", title), + }; + let full_title_pretty = match title.full_pretty(params.api()) { + Some(ft) => ft, + None => format!("{:?}", title), + }; + let url = server + "/wiki/" + &self.escape_attribute(&full_title); + let label = match alt_label { + Some(label) => label.to_string(), + None => match is_page_link { + true => title.pretty().to_string(), + false => full_title_pretty, + }, + }; + let mut ret = "" + &label + ""); + + // TODO properties? + if is_page_link && wiki == "wikidatawiki" && title.namespace_id() == 0 { + ret += &format!(" [{}]", title.pretty()); + match &wikidata_description { + Some(desc) => ret += &format!("
{}
", &desc), + None => {} + } + } + ret + } + + fn render_html_row(&self, row: &[String], header: &[(String, String)]) -> String { + let mut ret = "".to_string(); + for (col_num, item) in row.iter().enumerate() { + let header_key = match header.get(col_num) { + Some(x) => x.0.to_string(), + None => "UNKNOWN".to_string(), + }; + let class_name = match header_key.as_str() { + "number" | "page_id" | "timestamp" | "size" => "text-right text-monospace", + "title" => "link_container", + _ => "", + }; + if class_name.is_empty() { + ret += ""; + } else { + ret += ""; + } + ret += &item; + ret += ""; + } + ret += ""; + ret + } + + fn get_table_header(&self, columns: &[&str], _params: &RenderParams) -> String { + let mut ret = "".to_string(); + ret += ""; + let fdk = self.file_data_keys(); + for col in columns { + let col = col.to_string(); + let x = match col.as_str() { + "checkbox" => "".to_string(), + "number" => "".to_string(), + "image" => "".to_string(), + "title" => "".to_string(), + "page_id" => "".to_string(), + "namespace" => "".to_string(), + "linknumber" => "".to_string(), + "redlink_count" => "".to_string(), + "size" => "".to_string(), + "timestamp" => "".to_string(), + "wikidata_item" => "".to_string(), + "coordinates" => "".to_string(), + "defaultsort" => { + "".to_string() + } + "disambiguation" => "".to_string(), + "incoming_links" => "".to_string(), + "sitelinks" => "".to_string(), + "fileusage" => "".to_string(), + other => { + // File data etc. + if fdk.contains(&other) { + format!("", &other) + } else { + format!("", &other) + } + } + }; + ret += &x.to_string(); + } + ret += ""; + ret + } +} diff --git a/src/render_json.rs b/src/render_json.rs new file mode 100644 index 0000000..5e09feb --- /dev/null +++ b/src/render_json.rs @@ -0,0 +1,323 @@ +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; +use serde_json::Value; + +/// Renders JSON +pub struct RenderJSON {} + +#[async_trait] +impl Render for RenderJSON { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let mut params = RenderParams::new(platform, wiki).await?; + let mut content_type = ContentType::JSON; + if params.json_pretty() { + content_type = ContentType::Plain; + } + params.set_file_usage(params.giu() || params.file_usage()); + if params.giu() { + params.set_json_sparse(false); + } + + // Header + let mut header: Vec<(&str, &str)> = vec![ + ("title", "Title"), + ("page_id", "Page ID"), + ("namespace", "Namespace"), + ("size", "Size (bytes)"), + ("timestamp", "Last change"), + ]; + if params.show_wikidata_item() { + header.push(("wikidata_item", "Wikidata")); + } + let mut header: Vec<(String, String)> = header + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + for col in self.get_initial_columns(¶ms) { + if !header.iter().any(|(k, _)| col == k) && col != "number" { + header.push((col.to_string(), col.to_string())); + } + } + let mut header: Vec<(String, String)> = header + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + for col in self.get_initial_columns(¶ms) { + if !header.iter().any(|(k, _)| col == k) && col != "number" { + header.push((col.to_string(), col.to_string())); + } + } + if params.file_data() { + self.file_data_keys() + .iter() + .for_each(|k| header.push((k.to_string(), k.to_string()))); + } + + let value: Value = match params.json_output_compatability() { + "quick-intersection" => self.quick_intersection(platform, entries, ¶ms, &header), + _ => self.cat_scan(platform, entries, ¶ms, &header), // Default + }; + + let mut out: String = String::new(); + if !params.json_callback().is_empty() { + out += params.json_callback(); + out += "("; + } + + let output = if params.json_pretty() { + ::serde_json::to_string_pretty(&value) + } else { + ::serde_json::to_string(&value) + }; + match output { + Ok(o) => out += &o, + Err(e) => return Err(format!("JSON encoding failed: {:?}", e)), + }; + + if !params.json_callback().is_empty() { + out += ")"; + } + + Ok(MyResponse { + s: out, + content_type, + }) + } + + fn render_cell_wikidata_item(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + "N/A".to_string() + } + fn render_user_name(&self, _user: &str, _params: &RenderParams) -> String { + "N/A".to_string() + } + fn render_cell_image(&self, _image: &Option, _params: &RenderParams) -> String { + "N/A".to_string() + } + fn render_cell_namespace(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + "N/A".to_string() + } + fn render_cell_title(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + "N/A".to_string() + } +} + +impl RenderJSON { + pub fn new() -> Box { + Box::new(Self {}) + } + + fn get_query_string(&self, platform: &Platform) -> String { + "https://petscan.wmflabs.org/?".to_string() + &platform.form_parameters().to_string() + } + + fn cat_scan( + &self, + platform: &Platform, + entries: Vec, + params: &RenderParams, + header: &[(String, String)], + ) -> Value { + let entry_data: Vec = if params.json_sparse() { + entries + .iter() + .filter_map(|entry| { + Some(json!(entry.title().full_with_underscores(params.api())?)) + }) + .collect() + } else { + entries.iter().map(|entry| { + let mut o = json!({ + "n":"page", + "title":entry.title().with_underscores(), + "id":entry.page_id.unwrap_or(0), + "namespace":entry.title().namespace_id(), + "len":entry.page_bytes.unwrap_or(0), + "touched":entry.get_page_timestamp().unwrap_or_default(), + "nstext":params.api().get_canonical_namespace_name(entry.title().namespace_id()).unwrap_or("") + }); + if let Some(q) = entry.get_wikidata_item() { + o["q"] = json!(q); + o["metadata"]["wikidata"] = json!(q); + } + self.add_metadata(&mut o, entry, header); + if params.file_data() { + match &o["metadata"].get("fileusage") { + Some(_) => o["gil"] = o["metadata"]["fileusage"].to_owned(), + None => {} + } + self.file_data_keys().iter().for_each(|k|{ + match &o["metadata"].get(k) { + Some(_) => o[k] = o["metadata"][k].to_owned(), + None => {} + } + }); + } + o + }).collect() + }; + let seconds: f32 = match platform.query_time() { + Some(duration) => (duration.as_millis() as f32) / (1000_f32), + None => 0.0, + }; + json!({"n":"result","a":{"query":self.get_query_string(platform),"querytime_sec":seconds},"*":[{"n":"combination","a":{"type":platform.get_param_default("combination","subset"),"*":entry_data}}]}) + } + + fn quick_intersection( + &self, + platform: &Platform, + entries: Vec, + params: &RenderParams, + header: &[(String, String)], + ) -> Value { + let mut ret = json!({ + "namespaces":{}, + "status":"OK", + "start":0, + "max":entries.len()+1, + "query":self.get_query_string(platform), + "pagecount":entries.len(), + "pages":[] + }); + if let Some(duration) = platform.query_time() { + ret["querytime"] = json!((duration.as_millis() as f32) / 1000_f32) + } + + // Namespaces + if let Some(namespaces) = params.api().get_site_info()["query"]["namespaces"].as_object() { + for (k, v) in namespaces { + if let Some(ns_local_name) = v["*"].as_str() { + ret["namespaces"][k] = json!(ns_local_name) + } + } + } + + // Entries + if params.json_sparse() { + ret["pages"] = entries + .iter() + .filter_map(|entry| entry.title().full_with_underscores(params.api())) + .collect(); + } else { + ret["pages"] = entries + .iter() + .map(|entry| { + let mut o = json!({ + "page_id" : entry.page_id.unwrap_or(0), + "page_namespace" : entry.title().namespace_id(), + "page_title" : entry.title().with_underscores(), + "page_latest" : entry.get_page_timestamp().unwrap_or_default(), + "page_len" : entry.page_bytes.unwrap_or(0), + //"meta" : {} + }); + if params.giu() || params.file_usage() { + if let Some(fu) = self.get_file_usage(entry) { + o["giu"] = fu + } + } + self.add_metadata(&mut o, entry, header); + o + }) + .collect(); + } + + ret + } + + fn get_file_info_value(&self, entry: &PageListEntry, key: &str) -> Option { + match &entry.get_file_info() { + Some(fi) => match key { + "img_size" => fi.img_size.as_ref().map(|s| json!(s)), + "img_width" => fi.img_width.as_ref().map(|s| json!(s)), + "img_height" => fi.img_height.as_ref().map(|s| json!(s)), + "img_media_type" => fi.img_media_type.as_ref().map(|s| json!(s)), + "img_major_mime" => fi.img_major_mime.as_ref().map(|s| json!(s)), + "img_minor_mime" => fi.img_minor_mime.as_ref().map(|s| json!(s)), + "img_user_text" => fi.img_user_text.as_ref().map(|s| json!(s)), + "img_timestamp" => fi.img_timestamp.as_ref().map(|s| json!(s)), + "img_sha1" => fi.img_sha1.as_ref().map(|s| json!(s)), + other => { + println!("KEY NOT FOUND:{}", &other); + None + } + }, + None => None, + } + } + + fn get_file_usage(&self, entry: &PageListEntry) -> Option { + match &entry.get_file_info() { + Some(fi) => match fi.file_usage.is_empty() { + true => None, + false => Some( + fi.file_usage + .iter() + .map(|fu| { + json!({ + "ns":fu.title().namespace_id(), + "page":fu.title().with_underscores(), + "wiki":fu.wiki() + }) + }) + .collect(), + ), + }, + None => None, + } + } + + fn get_file_usage_as_string(&self, entry: &PageListEntry) -> Option { + match &entry.get_file_info() { + Some(fi) => match fi.file_usage.is_empty() { + true => None, + false => Some(json!(fi + .file_usage + .iter() + .map(|fu| { + format!( + "{}:{}:{}:{}", + fu.wiki(), + fu.title().namespace_id(), + fu.namespace_name(), + fu.title().with_underscores() + ) + }) + .collect::>() + .join("|"))), + }, + None => None, + } + } + + fn add_metadata(&self, o: &mut Value, entry: &PageListEntry, header: &[(String, String)]) { + header.iter().for_each(|(head, _)| { + let value = match head.to_string().as_str() { + "checkbox" | "number" | "page_id" | "title" | "namespace" | "size" + | "timestamp" => None, + "image" => entry.get_page_image().map(|s| json!(s)), + "linknumber" => entry.link_count.as_ref().map(|s| json!(s)), + "wikidata" => entry.get_wikidata_item().map(|s| json!(s)), + "defaultsort" => entry.get_defaultsort().map(|s| json!(s)), + "disambiguation" => Some(entry.disambiguation.as_json()), + "incoming_links" => entry.incoming_links.as_ref().map(|s| json!(s)), + "sitelinks" => entry.sitelink_count.as_ref().map(|s| json!(s)), + "coordinates" => entry + .get_coordinates() + .as_ref() + .map(|coord| json!(format!("{}/{}", coord.lat, coord.lon))), + "fileusage" => self.get_file_usage_as_string(entry), + other => self.get_file_info_value(entry, other), + }; + if let Some(v) = value { + o["metadata"][head] = v + } + }); + } +} diff --git a/src/render_kml.rs b/src/render_kml.rs new file mode 100644 index 0000000..f974add --- /dev/null +++ b/src/render_kml.rs @@ -0,0 +1,165 @@ +use crate::form_parameters::FormParameters; +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; + +/// Renders KML +pub struct RenderKML {} + +#[async_trait] +impl Render for RenderKML { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let params = RenderParams::new(platform, wiki).await?; + let server = match params.state().get_server_url_for_wiki(wiki) { + Ok(url) => url, + Err(_e) => String::new(), + }; + let mut kml = String::new(); + kml += r#" + "#; + + for entry in entries { + if let Some(coords) = &entry.get_coordinates() { + let title = entry.title(); + let label = if let "wikidatawiki" = wiki { + match entry.get_wikidata_label() { + Some(s) => s, + None => title.pretty().to_string(), + } + } else { + title.pretty().to_string() + }; + kml += r#""#; + kml += format!("{}", self.escape_xml(&label)).as_str(); + if let Some(desc) = entry.get_wikidata_description() { + kml += + format!("{}", self.escape_xml(&desc)).as_str(); + } + + kml += ""; + if let Some(q) = entry.get_wikidata_item() { + kml += format!( + "{}", + self.escape_xml(&q) + ) + .as_str(); + } + + let full_title = match title.full_with_underscores(params.api()) { + Some(ft) => ft, + None => format!("{:?}", title), + }; + let url = format!("{}/wiki/{}", &server, &self.escape_attribute(&full_title)); + kml += format!( + "{}", + self.escape_xml(&url) + ) + .as_str(); + + if let Some(img) = entry.get_page_image() { + let file = self.escape_attribute(&img); + let src = format!( + "{}/wiki/Special:Redirect/file/{}?width={}", + &server, &file, 120 + ); + kml += format!( + "{}", + self.escape_xml(&src) + ) + .as_str(); + } + + kml += ""; + + kml += format!( + "{}, {}, 0.", + coords.lon, coords.lat + ) + .as_str(); + kml += r#""#; + } + } + + kml += r#""#; + + Ok(MyResponse { + s: kml, + content_type: ContentType::Plain, + }) + } + + fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().pretty().to_string() + } + + fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + match entry.get_wikidata_item() { + Some(q) => format!("[[:d:{}|]]", q), + None => String::new(), + } + } + + fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { + format!("[[User:{user}|]]") + } + + fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { + match image { + Some(img) => format!("[[File:{}|120px|]]", img), + None => String::new(), + } + } + + fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().namespace_id().to_string() + } +} + +impl RenderKML { + pub fn new() -> Box { + Box::new(Self {}) + } + + fn escape_xml(&self, s: &str) -> String { + s.replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") + .replace('&', "&") + } + + fn escape_attribute(&self, s: &str) -> String { + FormParameters::percent_encode(s) + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_escape_xml() { + let r = RenderKML::new(); + assert_eq!( + r.escape_xml("<>&\"'"), + "&lt;&gt;&&quot;&apos;" + ); + } + + #[test] + fn test_escape_attribute() { + let r = RenderKML::new(); + assert_eq!(r.escape_attribute("<>&\"'"), "%3C%3E%26%22%27"); + } +} diff --git a/src/render_pagepile.rs b/src/render_pagepile.rs new file mode 100644 index 0000000..dafac3e --- /dev/null +++ b/src/render_pagepile.rs @@ -0,0 +1,87 @@ +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; +use std::collections::HashMap; + +/// Renders PagePile +pub struct RenderPagePile {} + +#[async_trait] +impl Render for RenderPagePile { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; + let url = "https://pagepile.toolforge.org/api.php"; + let data: String = entries + .iter() + .map(|e| format!("{}\t{}", e.title().pretty(), e.title().namespace_id())) + .collect::>() + .join("\n"); + let mut params: HashMap = + [("action", "create_pile_with_data"), ("wiki", wiki)] + .iter() + .map(|x| (x.0.to_string(), x.1.to_string())) + .collect(); + params.insert("data".to_string(), data); + + let result = match api.query_raw(url, ¶ms, "POST").await { + Ok(r) => r, + Err(e) => return Err(format!("PagePile generation failed: {:?}", e)), + }; + let json: serde_json::value::Value = match serde_json::from_str(&result) { + Ok(j) => j, + Err(e) => { + return Err(format!( + "PagePile generation did not return valid JSON: {:?}", + e + )) + } + }; + let pagepile_id = match json["pile"]["id"].as_u64() { + Some(id) => id, + None => { + return Err(format!( + "PagePile generation did not return a pagepile ID: {:?}", + json.clone() + )) + } + }; + let url = format!( + "https://tools.wmflabs.org/pagepile/api.php?action=get_data&id={}", + pagepile_id + ); + let html = format!("

Redirect

The document can be found here.",&url,&url) ; + Ok(MyResponse { + s: html, + content_type: ContentType::HTML, + }) + } + + fn render_cell_title(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + String::new() + } + fn render_cell_wikidata_item(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + String::new() + } + fn render_user_name(&self, _user: &str, _params: &RenderParams) -> String { + String::new() + } + fn render_cell_image(&self, _image: &Option, _params: &RenderParams) -> String { + String::new() + } + fn render_cell_namespace(&self, _entry: &PageListEntry, _params: &RenderParams) -> String { + String::new() + } +} + +impl RenderPagePile { + pub fn new() -> Box { + Box::new(Self {}) + } +} diff --git a/src/render_params.rs b/src/render_params.rs new file mode 100644 index 0000000..c4a64a4 --- /dev/null +++ b/src/render_params.rs @@ -0,0 +1,191 @@ +use crate::app_state::AppState; +use crate::platform::*; +use crate::render::AUTOLIST_WIKIDATA; +use std::sync::Arc; +use wikibase::mediawiki::api::Api; + +#[derive(Debug, Clone)] +pub struct RenderParams { + wiki: String, + file_data: bool, + file_usage: bool, + thumbnails_in_wiki_output: bool, + wdi: String, + show_wikidata_item: bool, + is_wikidata: bool, + add_coordinates: bool, + add_image: bool, + add_defaultsort: bool, + add_disambiguation: bool, + add_incoming_links: bool, + add_sitelinks: bool, + do_output_redlinks: bool, + use_autolist: bool, + autolist_creator_mode: bool, + autolist_wiki_server: String, + api: Api, + state: Arc, + row_number: usize, + json_output_compatability: String, + json_callback: String, + json_sparse: bool, + json_pretty: bool, + giu: bool, +} + +impl RenderParams { + pub async fn new(platform: &Platform, wiki: &str) -> Result { + let api = platform.state().get_api_for_wiki(wiki.to_string()).await?; + let mut ret = Self { + wiki: wiki.to_string(), + file_data: platform.has_param("ext_image_data"), + file_usage: platform.has_param("file_usage_data"), + thumbnails_in_wiki_output: platform.has_param("thumbnails_in_wiki_output"), + wdi: platform.get_param_default("wikidata_item", "no"), + add_coordinates: platform.has_param("add_coordinates"), + add_image: platform.has_param("add_image") + || platform.get_param_blank("format") == "kml", + add_defaultsort: platform.has_param("add_defaultsort"), + add_disambiguation: platform.has_param("add_disambiguation"), + add_incoming_links: platform.get_param_blank("sortby") == "incoming_links", + add_sitelinks: platform.get_param_blank("sortby") == "sitelinks", + show_wikidata_item: false, + is_wikidata: wiki == "wikidatawiki", + do_output_redlinks: platform.do_output_redlinks(), + use_autolist: false, // Possibly set downstream + autolist_creator_mode: false, // Possibly set downstream + autolist_wiki_server: AUTOLIST_WIKIDATA.to_string(), // Possibly set downstream + api, + state: platform.state(), + row_number: 0, + json_output_compatability: platform + .get_param_default("output_compatability", "catscan"), // Default; "quick-intersection" ? + json_callback: platform.get_param_blank("callback"), + json_sparse: platform.has_param("sparse"), + json_pretty: platform.has_param("json-pretty"), + giu: platform.has_param("giu"), + }; + ret.show_wikidata_item = ret.wdi == "any" || ret.wdi == "with"; + Ok(ret) + } + + pub fn show_wikidata_item(&self) -> bool { + self.show_wikidata_item + } + + pub fn file_data(&self) -> bool { + self.file_data + } + + pub fn do_output_redlinks(&self) -> bool { + self.do_output_redlinks + } + + pub fn row_number_mut(&mut self) -> &mut usize { + &mut self.row_number + } + + pub fn thumbnails_in_wiki_output(&self) -> bool { + self.thumbnails_in_wiki_output + } + + pub fn api(&self) -> &Api { + &self.api + } + + pub fn is_wikidata(&self) -> bool { + self.is_wikidata + } + + pub fn use_autolist_mut(&mut self) -> &mut bool { + &mut self.use_autolist + } + + pub fn use_autolist(&self) -> bool { + self.use_autolist + } + + pub fn autolist_creator_mode_mut(&mut self) -> &mut bool { + &mut self.autolist_creator_mode + } + + pub fn autolist_wiki_server(&self) -> &str { + &self.autolist_wiki_server + } + + pub fn set_autolist_wiki_server(&mut self, autolist_wiki_server: &str) { + self.autolist_wiki_server = autolist_wiki_server.to_string(); + } + + pub fn row_number(&self) -> usize { + self.row_number + } + + pub fn state(&self) -> &AppState { + &self.state + } + + pub fn wiki(&self) -> &str { + &self.wiki + } + + pub fn autolist_creator_mode(&self) -> bool { + self.autolist_creator_mode + } + + pub fn json_pretty(&self) -> bool { + self.json_pretty + } + + pub fn file_usage(&self) -> bool { + self.file_usage + } + + pub fn giu(&self) -> bool { + self.giu + } + + pub fn set_json_sparse(&mut self, json_sparse: bool) { + self.json_sparse = json_sparse; + } + + pub fn set_file_usage(&mut self, file_usage: bool) { + self.file_usage = file_usage; + } + + pub fn json_output_compatability(&self) -> &str { + &self.json_output_compatability + } + + pub fn json_callback(&self) -> &str { + &self.json_callback + } + + pub fn json_sparse(&self) -> bool { + self.json_sparse + } + + pub fn add_image(&self) -> bool { + self.add_image + } + + pub fn add_coordinates(&self) -> bool { + self.add_coordinates + } + + pub fn add_defaultsort(&self) -> bool { + self.add_defaultsort + } + + pub fn add_disambiguation(&self) -> bool { + self.add_disambiguation + } + + pub fn add_incoming_links(&self) -> bool { + self.add_incoming_links + } + + pub fn add_sitelinks(&self) -> bool { + self.add_sitelinks + } +} diff --git a/src/render_plaintext.rs b/src/render_plaintext.rs new file mode 100644 index 0000000..3f1f78d --- /dev/null +++ b/src/render_plaintext.rs @@ -0,0 +1,61 @@ +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; + +/// Renders PlainText +pub struct RenderPlainText {} + +#[async_trait] +impl Render for RenderPlainText { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let params = RenderParams::new(platform, wiki).await?; + let output = entries + .iter() + .filter_map(|entry| entry.title().full_pretty(params.api())) + .collect::>() + .join("\n"); + Ok(MyResponse { + s: output, + content_type: ContentType::Plain, + }) + } + + fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().pretty().to_string() + } + + fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + match entry.get_wikidata_item() { + Some(q) => format!("[[:d:{}|]]", q), + None => String::new(), + } + } + + fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { + format!("[[User:{user}|]]") + } + + fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { + match image { + Some(img) => format!("[[File:{}|120px|]]", img), + None => String::new(), + } + } + + fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().namespace_id().to_string() + } +} + +impl RenderPlainText { + pub fn new() -> Box { + Box::new(Self {}) + } +} diff --git a/src/render_tsv.rs b/src/render_tsv.rs new file mode 100644 index 0000000..62677d1 --- /dev/null +++ b/src/render_tsv.rs @@ -0,0 +1,118 @@ +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; + +/// Renders CSV and TSV +pub struct RenderTSV { + separator: String, +} + +#[async_trait] +impl Render for RenderTSV { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let mut params = RenderParams::new(platform, wiki).await?; + let mut rows: Vec = vec![]; + let mut header: Vec<(&str, &str)> = vec![ + ("number", "number"), + ("title", "title"), + ("page_id", "pageid"), + ("namespace", "namespace"), + ("size", "length"), + ("timestamp", "touched"), + ]; + if params.show_wikidata_item() { + header.push(("wikidata_item", "Wikidata")); + } + if params.file_data() { + self.file_data_keys() + .iter() + .for_each(|k| header.push((k, k))); + } + let mut header: Vec<(String, String)> = header + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + for col in self.get_initial_columns(¶ms) { + if !header.iter().any(|(k, _)| col == k) && col != "number" { + header.push((col.to_string(), col.to_string())); + } + } + rows.push( + header + .iter() + .map(|(_, v)| self.escape_cell(v)) + .collect::>() + .join(&self.separator), + ); + + for entry in entries { + *params.row_number_mut() += 1; + let row = self.row_from_entry(&entry, &header, ¶ms, platform); + let row: Vec = row.iter().map(|s| self.escape_cell(s)).collect(); + let row = row.join(&self.separator); + rows.push(row); + } + + Ok(MyResponse { + s: rows.join("\n"), + content_type: match self.separator.as_str() { + "," => ContentType::CSV, + "\t" => ContentType::TSV, + _ => ContentType::Plain, // Fallback + }, + }) + } + + fn render_cell_title(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().with_underscores() + } + + fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + match entry.get_wikidata_item() { + Some(q) => q, + None => String::new(), + } + } + + fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { + user.to_string() + } + + fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { + match image { + Some(img) => img.to_string(), + None => String::new(), + } + } + + fn render_cell_namespace(&self, entry: &PageListEntry, params: &RenderParams) -> String { + entry + .title() + .namespace_name(params.api()) + .unwrap_or("UNKNOWN_NAMESPACE") + .to_string() + } +} + +impl RenderTSV { + pub fn new(separator: &str) -> Box { + Box::new(Self { + separator: separator.to_string(), + }) + } + + fn escape_cell(&self, s: &str) -> String { + if self.separator == "," { + format!("\"{}\"", s.replace('\"', "\\\"")) + } else { + s.replace('\t', " ") + } + } +} diff --git a/src/render_wikitext.rs b/src/render_wikitext.rs new file mode 100644 index 0000000..a61a9d8 --- /dev/null +++ b/src/render_wikitext.rs @@ -0,0 +1,156 @@ +use crate::pagelist_entry::PageListEntry; +use crate::platform::*; +use crate::render::Render; +use crate::render_params::RenderParams; +use async_trait::async_trait; +use chrono::prelude::*; + +/// Renders wiki text +pub struct RenderWiki {} + +#[async_trait] +impl Render for RenderWiki { + async fn response( + &self, + platform: &Platform, + wiki: &str, + entries: Vec, + ) -> Result { + let mut params = RenderParams::new(platform, wiki).await?; + let mut rows: Vec = vec![]; + rows.push("== ".to_string() + &platform.combination().to_string() + " =="); + + let petscan_query_url = + "https://petscan.wmflabs.org/?".to_string() + &platform.form_parameters().to_string(); + let petscan_query_url_no_doit = "https://petscan.wmflabs.org/?".to_string() + + &platform.form_parameters().to_string_no_doit(); + + let utc: DateTime = Utc::now(); + rows.push(format!("Last updated on {}.", utc.to_rfc2822())); + + rows.push(format!( + "[{} Regenerate this table] or [{} edit the query].\n", + &petscan_query_url, &petscan_query_url_no_doit + )); + rows.push("{| border=1 class='wikitable'".to_string()); + let mut header: Vec<(&str, &str)> = vec![ + ("title", "Title"), + ("page_id", "Page ID"), + ("namespace", "Namespace"), + ("size", "Size (bytes)"), + ("timestamp", "Last change"), + ]; + if params.show_wikidata_item() { + header.push(("wikidata_item", "Wikidata")); + } + if params.file_data() { + self.file_data_keys() + .iter() + .for_each(|k| header.push((k, k))); + } + if params.do_output_redlinks() { + header = vec![("redlink_count", "Wanted"), ("title", "Title")]; + } + let mut header: Vec<(String, String)> = header + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + for col in self.get_initial_columns(¶ms) { + if !header.iter().any(|(k, _)| col == k) && col != "number" { + header.push((col.to_string(), col.to_string())); + } + } + rows.push( + "!".to_string() + + &header + .iter() + .map(|(_, v)| v.clone()) + .collect::>() + .join(" !! "), + ); + + for entry in entries { + *params.row_number_mut() += 1; + rows.push("|-".to_string()); + let row = self.row_from_entry(&entry, &header, ¶ms, platform); + let row = "| ".to_string() + &row.join(" || "); + rows.push(row); + } + + rows.push("|}".to_string()); + + Ok(MyResponse { + s: rows.join("\n"), + content_type: ContentType::Plain, + }) + } + + fn render_cell_title(&self, entry: &PageListEntry, params: &RenderParams) -> String { + if entry.title().namespace_id() == 6 { + if params.thumbnails_in_wiki_output() { + match entry.title().full_pretty(params.api()) { + Some(file) => format!("[[{}|120px|]]", &file), + None => format!("[[File:{}|120px|]]", entry.title().pretty()), + } + } else { + match entry.title().full_pretty(params.api()) { + Some(file) => format!("[[:{}|]]", &file), + None => format!("[[:File:{}|]]", entry.title().pretty()), + } + } + } else { + self.render_wikilink(entry, params) + } + } + + fn render_cell_wikidata_item(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + match entry.get_wikidata_item() { + Some(q) => format!("[[:d:{}|]]", q), + None => String::new(), + } + } + + fn render_user_name(&self, user: &str, _params: &RenderParams) -> String { + format!("[[User:{user}|]]") + } + + fn render_cell_image(&self, image: &Option, _params: &RenderParams) -> String { + match image { + Some(img) => format!("[[File:{}|120px|]]", img), + None => String::new(), + } + } + + fn render_cell_namespace(&self, entry: &PageListEntry, _params: &RenderParams) -> String { + entry.title().namespace_id().to_string() + } +} + +impl RenderWiki { + pub fn new() -> Box { + Box::new(Self {}) + } + + fn render_wikilink(&self, entry: &PageListEntry, params: &RenderParams) -> String { + if params.is_wikidata() { + match &entry.get_wikidata_label() { + Some(label) => format!("[[{}|{}]]", &entry.title().pretty(), label), + None => format!("[[{}]]", entry.title().pretty()), + } + } else { + let mut ret = "[[".to_string(); + if entry.title().namespace_id() == 14 { + ret += ":"; + } + ret += &entry + .title() + .full_pretty(params.api()) + .unwrap_or_else(|| entry.title().pretty().to_string()); + if !params.do_output_redlinks() { + ret += "|"; + } + ret += "]]"; + ret + } + } +}
#UNKNOWN:'{}'