commit a6d105392eb9456040ef514295639d2649af9570 Author: Clayton Wilson Date: Mon Apr 8 00:31:30 2024 -0500 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..431c95b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,36 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'unspread'", + "cargo": { + "args": ["build", "--bin=unspread", "--package=unspread"], + "filter": { + "name": "unspread", + "kind": "bin" + } + }, + "args": ["slams"], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'unspread'", + "cargo": { + "args": ["test", "--no-run", "--bin=unspread", "--package=unspread"], + "filter": { + "name": "unspread", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..419d512 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,630 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "calamine" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a3a315226fdc5b1c3e33521073e1712a05944bc0664d665ff1f6ff0396334da" +dependencies = [ + "byteorder", + "codepage", + "encoding_rs", + "log", + "quick-xml", + "serde", + "zip", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "codepage" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0e9222c0cdf2c6ac27d73f664f9520266fa911c3106329d359f8861cb8bde9" +dependencies = [ + "encoding_rs", +] + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "directories" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy-regex" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d12be4595afdf58bd19e4a9f4e24187da2a66700786ff660a418e9059937a4c" +dependencies = [ + "lazy-regex-proc_macros", + "once_cell", + "regex", +] + +[[package]] +name = "lazy-regex-proc_macros" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44bcd58e6c97a7fcbaffcdc95728b393b8d98933bfadad49ed4097845b57ef0b" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "encoding_rs", + "memchr", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_users" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "syn" +version = "2.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unspread" +version = "0.0.2" +dependencies = [ + "calamine", + "clap", + "csv", + "directories", + "lazy-regex", + "regex", +] + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" + +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..eb76d18 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "unspread" +version = "0.0.2" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +calamine = "0.24.0" +csv = "1.3.0" +clap = { version = "4.5.4", features = ["derive"] } +directories = "5.0.1" +lazy-regex = "3.1.0" +regex = "1.10.2" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e005270 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,352 @@ +use calamine::{open_workbook, Reader, Xlsx}; +use clap::{command, Parser}; +use std::{ + error::Error, + fs, io, + path::{Component, Path, PathBuf}, + process::exit, + vec, +}; +use {directories::UserDirs, lazy_regex::*}; + +fn press_to_exit(exit_code: i32) -> ! { + println!("Press enter to exit"); + let mut buffer = String::new(); + let _ = io::stdin().read_line(&mut buffer); + exit(exit_code); +} + +#[derive(Debug)] +enum HeadersMode { + Combine, + Remove, + Ignore, +} + +impl From for HeadersMode { + fn from(value: u8) -> Self { + match value { + 0 => HeadersMode::Combine, + 1 => HeadersMode::Remove, + 2 => HeadersMode::Ignore, + _ => HeadersMode::Combine, + } + } +} + +/// Combine data spread across multiple spreadsheets into one +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// The folder that contains the spreadsheet files + /// Default './input' + #[arg()] + folder: Option, + + /// Combined spreadsheet output file + /// Default 'output.csv' + #[arg(short = 'o', default_value_t = String::from("./output.csv"))] + out_file: String, + + /// 0 = Combine headers, 1 = Remove headers, 2 = Ignore headers + #[arg(short = 'm', long, default_value_t = 0)] + headers_mode: u8, +} + +fn data_from_csv(path: PathBuf) -> Result>, Box> { + // let reader = csv::Reader::from_path(path)?; + let reader = csv::ReaderBuilder::new() + .has_headers(false) + .from_path(path)?; + let cells = reader + .into_records() + // for c in cells { + // println!("{:?}", c); + // } + .filter_map(|res| match res { + Ok(row) => Some(row), + Err(e) => { + println!("{}", e.to_string()); + None + } + }) + .map(|row| { + row.iter() + .map(|cell| cell.to_string()) + .collect::>() + }) + .collect::>>(); + + Ok(cells) + // Ok(vec![vec!["".to_string()]]) +} + +fn data_from_excel(path: PathBuf) -> Result>, Box> { + let mut workbook: Xlsx<_> = match open_workbook(path) { + Ok(s) => s, + Err(e) => return Err(Box::new(e)), + }; + + let binding = workbook.sheet_names(); + let sheet_one_name = match binding.get(0) { + Some(s) => s, + None => return Err("Xslx file does not have any sheets".into()), + }; + + if workbook.sheet_names().len() > 1 { + println!("Warning: A spreadsheet contains more than one sheet. This tool will only read the first sheet and ignore the rest."); + } + + let sheet_one = workbook.worksheet_range(sheet_one_name)?; + + // println!("{:?}", sheet_one); + let data = sheet_one + .rows() + .map(|r| { + let cells = r.iter().map(|c| c.to_string()); + cells.collect::>() + }) + .collect::>>(); + + Ok(data) +} + +/// https://stackoverflow.com/questions/68231306/stdfscanonicalize-for-files-that-dont-exist +/// build a usable path from a user input which may be absolute +/// (if it starts with / or ~) or relative to the supplied base_dir. +/// (we might want to try detect windows drives in the future, too) +pub fn path_from>(base_dir: P, input: &str) -> PathBuf { + let tilde = regex!(r"^~(/|$)"); + if input.starts_with('/') { + // if the input starts with a `/`, we use it as is + input.into() + } else if tilde.is_match(input) { + // if the input starts with `~` as first token, we replace + // this `~` with the user home directory + PathBuf::from(&*tilde.replace(input, |c: &Captures| { + if let Some(user_dirs) = UserDirs::new() { + format!("{}{}", user_dirs.home_dir().to_string_lossy(), &c[1],) + } else { + println!("no user dirs found, no expansion of ~"); + c[0].to_string() + } + })) + } else { + // we put the input behind the source (the selected directory + // or its parent) and we normalize so that the user can type + // paths with `../` + normalize_path(base_dir.as_ref().join(input)) + } +} + +/// Improve the path to try remove and solve .. token. +/// +/// This assumes that `a/b/../c` is `a/c` which might be different from +/// what the OS would have chosen when b is a link. This is OK +/// for broot verb arguments but can't be generally used elsewhere +/// +/// This function ensures a given path ending with '/' still +/// ends with '/' after normalization. +pub fn normalize_path>(path: P) -> PathBuf { + let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/')); + let mut normalized = PathBuf::new(); + for component in path.as_ref().components() { + match &component { + Component::ParentDir => { + if !normalized.pop() { + normalized.push(component); + } + } + _ => { + normalized.push(component); + } + } + } + if ends_with_slash { + normalized.push(""); + } + normalized +} + +fn save_to_csv(data: &Vec>, destination: &PathBuf) -> Result<(), Box> { + let mut writer = csv::Writer::from_path(destination)?; + + data.iter().for_each(|row| { + match writer.write_record(row) { + Ok(_) => {} + Err(e) => { + println!("{}", e.to_string()); + } + }; + }); + + Ok(()) +} + +fn main() { + let args = Args::parse(); + let headers_mode = HeadersMode::from(args.headers_mode); + + let folder_name = match args.folder { + Some(folder) => folder, + None => { + println!("No folder provided, using default folder 'input'"); + "./input".to_string() + } + }; + + let spreadsheet_folder = match Path::new(&folder_name).canonicalize() { + Ok(path) => path, + Err(e) => { + println!(" '{}': {}", &folder_name, e.to_string()); + press_to_exit(1); + } + }; + + if !spreadsheet_folder.is_dir() { + println!("Argument for folder must be a valid folder"); + press_to_exit(1); + } + + let output_file = path_from(".", &args.out_file); + if output_file.is_dir() { + println!("Output file cannot be a directory"); + press_to_exit(1); + } + + let dir = match fs::read_dir(spreadsheet_folder) { + Ok(d) => d, + Err(e) => { + println!("Error opening the inputs folder: {}", e.to_string()); + press_to_exit(1); + } + }; + + let mut total_file_count = 0; + + let dir_entries = dir.filter_map(|x| { + total_file_count += 1; + x.ok() + }); + + let mut headers: Vec = vec![]; + let mut combined_spreadsheet_data: Vec> = vec![]; + + for (index, entry) in dir_entries.enumerate() { + // Filter out folders + match entry.file_type() { + Ok(f) => { + if f.is_dir() { + println!( + "Skipping directory {}", + entry + .file_name() + .into_string() + .unwrap_or("`error`".to_string()) + ); + continue; + } + } + Err(_) => { + println!( + "Skipping directory {}", + entry + .file_name() + .into_string() + .unwrap_or("`error`".to_string()) + ); + } + } + + let name_binding = entry.file_name(); + let file_name = name_binding.as_os_str().to_string_lossy(); + + let spreadsheet_data = match { + if file_name.ends_with(".csv") { + data_from_csv(entry.path()) + } else if file_name.ends_with(".ods") + | file_name.ends_with("xls") + | file_name.ends_with("xlsx") + | file_name.ends_with("xlsm") + | file_name.ends_with("xlsb") + | file_name.ends_with("xla") + | file_name.ends_with("xlam") + { + data_from_excel(entry.path()) + } else { + println!("Unsupported file type: {}", file_name); + continue; + } + } { + Ok(data) => data, + Err(e) => { + println!( + "Error occurred while reading file {} : {}", + file_name, + e.to_string() + ); + continue; + } + }; + + // println!("{:?}", spreadsheet_data); + + // Save headers on first spreadsheet + if index == 0 { + headers = match spreadsheet_data.get(0) { + Some(h) => h.to_owned(), + None => { + println!("Skipping {} since it's empty", file_name); + continue; + } + } + } + + let first_row = match spreadsheet_data.get(0) { + Some(data) => data, + None => { + println!("Skipping {} since it's empty", file_name); + continue; + } + }; + + let mut temp = vec![]; + let mut final_spreadsheet_data = match headers_mode { + HeadersMode::Ignore => spreadsheet_data, + HeadersMode::Remove => { + if first_row.len() != headers.len() { + temp = spreadsheet_data; + } else { + for header_pair in first_row.iter().zip(headers.iter()) { + if header_pair.0 != header_pair.1 { + temp = spreadsheet_data.clone(); + break; + } + } + + if temp.len() == 0 { + temp = spreadsheet_data.clone(); + temp.remove(0); + } + } + temp + } + HeadersMode::Combine => { + if index == 0 { + spreadsheet_data + } else { + temp = spreadsheet_data.clone(); + temp.remove(0); + temp + } + } + }; + + combined_spreadsheet_data.append(&mut final_spreadsheet_data) + } + + match save_to_csv(&combined_spreadsheet_data, &output_file) { + Ok(_) => println!("Success: {} lines written", combined_spreadsheet_data.len()), + Err(e) => println!("Failure {}", e.to_string()), + } +}