Skip to content

Commit

Permalink
Add database fields such that the index can be generated from the dat…
Browse files Browse the repository at this point in the history
…abase
  • Loading branch information
arlosi committed Aug 10, 2022
1 parent d480f3a commit c6f9beb
Show file tree
Hide file tree
Showing 20 changed files with 428 additions and 15 deletions.
10 changes: 5 additions & 5 deletions cargo-registry-index/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ extern crate serde;
pub mod testing;

use anyhow::{anyhow, Context};
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
Expand Down Expand Up @@ -101,7 +101,7 @@ pub struct Crate {
pub vers: String,
pub deps: Vec<Dependency>,
pub cksum: String,
pub features: HashMap<String, Vec<String>>,
pub features: BTreeMap<String, Vec<String>>,
/// This field contains features with new, extended syntax. Specifically,
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
///
Expand All @@ -112,7 +112,7 @@ pub struct Crate {
/// will fail to load due to not being able to parse the new syntax, even
/// with a `Cargo.lock` file.
#[serde(skip_serializing_if = "Option::is_none")]
pub features2: Option<HashMap<String, Vec<String>>>,
pub features2: Option<BTreeMap<String, Vec<String>>>,
pub yanked: Option<bool>,
#[serde(default)]
pub links: Option<String>,
Expand All @@ -139,7 +139,7 @@ pub struct Crate {
pub v: Option<u32>,
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq)]
pub struct Dependency {
pub name: String,
pub req: String,
Expand All @@ -152,7 +152,7 @@ pub struct Dependency {
pub package: Option<String>,
}

#[derive(Copy, Clone, Serialize, Deserialize, Debug)]
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq)]
#[serde(rename_all = "lowercase")]
pub enum DependencyKind {
Normal,
Expand Down
5 changes: 5 additions & 0 deletions migrations/2022-03-22-183805_http-index/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE dependencies
DROP COLUMN explicit_name;
ALTER TABLE versions
DROP COLUMN checksum,
DROP COLUMN links;
5 changes: 5 additions & 0 deletions migrations/2022-03-22-183805_http-index/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE dependencies
ADD COLUMN explicit_name VARCHAR NULL;
ALTER TABLE versions
ADD COLUMN checksum CHAR(64) NULL,
ADD COLUMN links VARCHAR NULL;
110 changes: 110 additions & 0 deletions src/admin/import_cksum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
use std::{
fs::File,
io::{BufRead, BufReader},
thread,
time::Duration,
};

use cargo_registry_index::{Repository, RepositoryConfig};
use diesel::prelude::*;
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};

use crate::{
admin::dialoguer,
db,
schema::{crates, dependencies, versions},
};

#[derive(clap::Parser, Debug, Copy, Clone)]
#[clap(
name = "import-cksum",
about = "Import missing fields from git into the database"
)]
pub struct Opts {
/// Time in milliseconds to sleep between crate updates to reduce database load.
#[clap(long)]
pause_millis: u64,
}

pub fn run(opts: Opts) -> anyhow::Result<()> {
let conn = db::oneoff_connection().unwrap();
println!("fetching git repo");
let config = RepositoryConfig::from_environment();
let repo = Repository::open(&config)?;
repo.reset_head()?;
println!("HEAD is at {}", repo.head_oid()?);

let files = repo.get_files_modified_since(None)?;
println!("found {} crates", files.len());
if !dialoguer::confirm("continue?") {
return Ok(());
}

let pb = ProgressBar::new(files.len() as u64);
pb.set_style(ProgressStyle::with_template("{bar:60} ({pos}/{len}, ETA {eta})").unwrap());

for file in files.iter().progress_with(pb) {
thread::sleep(Duration::from_millis(opts.pause_millis));

let crate_name = file.file_name().unwrap().to_str().unwrap();
let path = repo.index_file(crate_name);
if !path.exists() {
continue;
}
let file = File::open(path)?;
let reader = BufReader::new(file);
for line in reader.lines() {
let krate: cargo_registry_index::Crate = serde_json::from_str(&line?)?;
conn.transaction(|| import_data(&conn, krate))?;
}
}

Ok(())
}

fn import_data(conn: &PgConnection, krate: cargo_registry_index::Crate) -> QueryResult<()> {
let (version_id, checksum): (i32, Option<String>) = versions::table
.inner_join(crates::table)
.filter(crates::name.eq(&krate.name))
.filter(versions::num.eq(&krate.vers))
.select((versions::id, versions::checksum))
.first(conn)?;

if checksum.is_none() {
// Update the `checksum` and `links` fields.
diesel::update(versions::table)
.set((
versions::checksum.eq(&krate.cksum),
versions::links.eq(&krate.links),
))
.filter(versions::id.eq(version_id))
.execute(conn)?;
// Update the `explicit_name` field for each dependency.
for dep in &krate.deps {
if let Some(package) = &dep.package {
// This is a little tricky because there can be two identical deps in the
// database. The only difference in git is the field we're trying to
// fill (explicit_name). Using `first` here & filtering out existing `explicit_name`
// entries ensure that we assign one explicit_name to each dep.
let id: i32 = dependencies::table
.inner_join(crates::table)
.filter(dependencies::explicit_name.is_null())
.filter(dependencies::version_id.eq(version_id))
.filter(dependencies::req.eq(&dep.req))
.filter(dependencies::features.eq(&dep.features))
.filter(dependencies::optional.eq(&dep.optional))
.filter(dependencies::default_features.eq(&dep.default_features))
.filter(dependencies::target.is_not_distinct_from(&dep.target))
.filter(dependencies::kind.eq(dep.kind.map(|k| k as i32).unwrap_or_default()))
.filter(crates::name.eq(package))
.select(dependencies::id)
.first(conn)?;
diesel::update(dependencies::table)
.set(dependencies::explicit_name.eq(&dep.name))
.filter(dependencies::id.eq(id))
.execute(conn)?;
}
}
}
Ok(())
}
1 change: 1 addition & 0 deletions src/admin/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod delete_crate;
pub mod delete_version;
pub mod dialoguer;
pub mod import_cksum;
pub mod migrate;
pub mod on_call;
pub mod populate;
Expand Down
4 changes: 3 additions & 1 deletion src/bin/crates-admin.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![warn(clippy::all, rust_2018_idioms)]

use cargo_registry::admin::{
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
delete_crate, delete_version, import_cksum, migrate, populate, render_readmes, test_pagerduty,
transfer_crates, upload_index, verify_token, yank_version,
};

Expand All @@ -24,6 +24,7 @@ enum SubCommand {
Migrate(migrate::Opts),
UploadIndex(upload_index::Opts),
YankVersion(yank_version::Opts),
ImportCksum(import_cksum::Opts),
}

fn main() -> anyhow::Result<()> {
Expand All @@ -42,6 +43,7 @@ fn main() -> anyhow::Result<()> {
SubCommand::Migrate(opts) => migrate::run(opts)?,
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
SubCommand::YankVersion(opts) => yank_version::run(opts),
SubCommand::ImportCksum(opts) => import_cksum::run(opts)?,
}

Ok(())
Expand Down
60 changes: 60 additions & 0 deletions src/controllers/krate/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
use std::cmp::Reverse;
use std::str::FromStr;

use conduit::{Body, Response};

use crate::controllers::frontend_prelude::*;
use crate::controllers::helpers::pagination::PaginationOptions;

Expand All @@ -15,6 +17,7 @@ use crate::models::{
TopVersions, User, Version, VersionOwnerAction,
};
use crate::schema::*;
use crate::util::errors::not_found;
use crate::views::{
EncodableCategory, EncodableCrate, EncodableDependency, EncodableKeyword, EncodableVersion,
};
Expand Down Expand Up @@ -396,3 +399,60 @@ pub fn reverse_dependencies(req: &mut dyn RequestExt) -> EndpointResult {
"meta": { "total": total },
})))
}

/// Generate the sparse registry config.json file
pub fn config_json(req: &mut dyn RequestExt) -> EndpointResult {
let headers = req.headers();
let proto = headers
.get("X-Forwarded-Proto")
.and_then(|v| v.to_str().ok())
.unwrap_or_else(|| {
if req.app().config.env() == crate::Env::Development {
"http"
} else {
"https"
}
});
let domain_name = headers
.get("X-Forwarded-Host")
.or_else(|| headers.get(http::header::HOST))
.and_then(|v| v.to_str().ok())
.unwrap_or_else(|| &req.app().config.domain_name);

let dl = format!("{proto}://{domain_name}/api/v1/crates");
let api = format!("{proto}://{domain_name}/");

#[derive(Serialize)]
struct R {
dl: String,
api: String,
}
Ok(req.json(&R { dl, api }))
}

/// Generate a sparse registry index file
pub fn versions_registry(req: &mut dyn RequestExt) -> EndpointResult {
let crate_name = &req.params()["crate_id"];

let x1 = req.params()["x1"].as_str();
let x2 = req.params().find("x2");
if (x1, x2)
!= match crate_name.len() {
1 => ("1", None),
2 => ("2", None),
3 => ("3", Some(&crate_name[0..1])),
_ => (&crate_name[0..2], Some(&crate_name[2..4])),
}
{
return Err(not_found());
}

let conn = req.db_read()?;
let krate: Crate = Crate::by_name(crate_name).first(&*conn)?;
let body = krate.index_metadata(&*conn)?;

Ok(Response::builder()
.header(header::CONTENT_TYPE, "text/plain; charset=utf-8")
.body(Body::from_vec(body))
.unwrap()) // Header values are well formed, so should not panic
}
16 changes: 10 additions & 6 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use flate2::read::GzDecoder;
use hex::ToHex;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
Expand Down Expand Up @@ -154,6 +154,11 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
// This is only redundant for now. Eventually the duplication will be removed.
let license = new_crate.license.clone();

// Read tarball from request
let mut tarball = Vec::new();
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();

// Persist the new version of this crate
let version = NewVersion::new(
krate.id,
Expand All @@ -165,6 +170,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
// to get here, and max upload sizes are way less than i32 max
file_length as i32,
user.id,
hex_cksum.clone(),
links.clone(),
)?
.save(&conn, &verified_email_address)?;

Expand All @@ -191,10 +198,6 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
let ignored_invalid_badges = Badge::update_crate(&conn, &krate, new_crate.badges.as_ref())?;
let top_versions = krate.top_versions(&conn)?;

// Read tarball from request
let mut tarball = Vec::new();
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
let pkg_name = format!("{}-{}", krate.name, vers);
let cargo_vcs_info = verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
let pkg_path_in_vcs = cargo_vcs_info.map(|info| info.path_in_vcs);
Expand All @@ -217,7 +220,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
.uploader()
.upload_crate(app.http_client(), tarball, &krate, vers)?;

let (features, features2): (HashMap<_, _>, HashMap<_, _>) =
let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) =
features.into_iter().partition(|(_k, vals)| {
!vals
.iter()
Expand Down Expand Up @@ -367,6 +370,7 @@ pub fn add_dependencies(
default_features.eq(dep.default_features),
features.eq(&dep.features),
target.eq(dep.target.as_deref()),
explicit_name.eq(dep.explicit_name_in_toml.as_deref())
),
))
})
Expand Down
2 changes: 2 additions & 0 deletions src/downloads_counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ mod tests {
None,
0,
self.user.id,
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
None,
)
.expect("failed to create version")
.save(conn, "ghost@example.com")
Expand Down
11 changes: 11 additions & 0 deletions src/models/dependency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct Dependency {
pub features: Vec<String>,
pub target: Option<String>,
pub kind: DependencyKind,
pub explicit_name: Option<String>,
}

#[derive(Debug, QueryableByName)]
Expand All @@ -43,6 +44,16 @@ pub enum DependencyKind {
// if you add a kind here, be sure to update `from_row` below.
}

impl From<cargo_registry_index::DependencyKind> for DependencyKind {
fn from(dk: cargo_registry_index::DependencyKind) -> Self {
match dk {
cargo_registry_index::DependencyKind::Normal => DependencyKind::Normal,
cargo_registry_index::DependencyKind::Build => DependencyKind::Build,
cargo_registry_index::DependencyKind::Dev => DependencyKind::Dev,
}
}
}

impl From<DependencyKind> for IndexDependencyKind {
fn from(dk: DependencyKind) -> Self {
match dk {
Expand Down
Loading

0 comments on commit c6f9beb

Please sign in to comment.