From 2342a02afba04a46e89f6365fffefcea193d9835 Mon Sep 17 00:00:00 2001 From: Micah Wylde Date: Mon, 9 Sep 2024 16:53:59 -0700 Subject: [PATCH] Add Python to docker containers --- Cargo.lock | 4 +- crates/arroyo-connectors/Cargo.toml | 2 +- crates/arroyo-connectors/src/redis/mod.rs | 4 +- crates/arroyo-formats/src/json/mod.rs | 47 +---------------------- docker/Dockerfile | 22 ++++++++++- docker/install_deps.sh | 11 ++++++ 6 files changed, 38 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcfcc2183..296151e42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4873,7 +4873,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -7058,7 +7058,7 @@ dependencies = [ "indoc", "libc", "memoffset", - "parking_lot 0.12.3", + "parking_lot 0.11.2", "portable-atomic", "pyo3-build-config", "pyo3-ffi", diff --git a/crates/arroyo-connectors/Cargo.toml b/crates/arroyo-connectors/Cargo.toml index 785fa6554..9c0bddc21 100644 --- a/crates/arroyo-connectors/Cargo.toml +++ b/crates/arroyo-connectors/Cargo.toml @@ -61,7 +61,7 @@ tokio-tungstenite = { version = "0.20.1", features = ["native-tls"] } reqwest = { version = "0.11.20", features = ["stream"] } # Redis -redis = { version = "0.26.0", features = ["default", "tokio-rustls-comp", "cluster-async", "connection-manager"] } +redis = { version = "0.26.1", features = ["default", "tokio-rustls-comp", "cluster-async", "connection-manager"] } # Fluvio fluvio = {version = "0.23", features = ["openssl"]} diff --git a/crates/arroyo-connectors/src/redis/mod.rs b/crates/arroyo-connectors/src/redis/mod.rs index 44b333759..30bae8c2f 100644 --- a/crates/arroyo-connectors/src/redis/mod.rs +++ b/crates/arroyo-connectors/src/redis/mod.rs @@ -122,7 +122,7 @@ async fn test_inner( .unwrap(); redis::cmd("PING") - .query_async(&mut connection) + .query_async::<()>(&mut connection) .await .map_err(|e| anyhow!("Received error sending PING command: {:?}", e))?; } @@ -138,7 +138,7 @@ async fn test_inner( .unwrap(); redis::cmd("PING") - .query_async(&mut connection) + .query_async::<()>(&mut connection) .await .map_err(|e| anyhow!("Received error sending PING command: {:?}", e))?; } diff --git a/crates/arroyo-formats/src/json/mod.rs b/crates/arroyo-formats/src/json/mod.rs index 9e5b07e1c..8539e99b7 100644 --- a/crates/arroyo-formats/src/json/mod.rs +++ b/crates/arroyo-formats/src/json/mod.rs @@ -1,55 +1,10 @@ -use arrow::datatypes::{Field, Fields, SchemaRef}; -use arrow_array::builder::{ArrayBuilder, StringBuilder}; +use arrow::datatypes::{Field, Fields}; use arrow_schema::DataType; -use arroyo_rpc::formats::JsonFormat; use serde_json::{json, Value}; use std::collections::HashMap; pub mod schema; -pub fn deserialize_slice_json( - schema: &SchemaRef, - buffer: &mut [Box], - format: &JsonFormat, - msg: &[u8], -) -> Result<(), String> { - let msg = if format.confluent_schema_registry { - &msg[5..] - } else { - msg - }; - - if format.unstructured { - let (idx, _) = schema - .column_with_name("value") - .expect("no 'value' column for unstructured json format"); - let array = buffer[idx] - .as_any_mut() - .downcast_mut::() - .expect("'value' column has incorrect type"); - - if format.include_schema { - // we need to deserialize it to pull out the payload - let v: Value = serde_json::from_slice(msg) - .map_err(|e| format!("Failed to deserialize json: {:?}", e))?; - let payload = v.get("payload").ok_or_else(|| { - "`include_schema` set to true, but record does not have a payload field".to_string() - })?; - - array.append_value(serde_json::to_string(payload).unwrap()); - } else { - array.append_value( - String::from_utf8(msg.to_vec()).map_err(|_| "data is not valid UTF-8")?, - ); - }; - } else { - serde_json::from_slice(msg) - .map_err(|e| format!("Failed to deserialize JSON into schema: {:?}", e))?; - } - - Ok(()) -} - pub fn field_to_json_schema(field: &Field) -> Value { match field.data_type() { arrow::datatypes::DataType::Null => { diff --git a/docker/Dockerfile b/docker/Dockerfile index 5ab928e95..1f9e41a5e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -8,9 +8,12 @@ ARG PROFILE=release RUN apt-get update && \ apt-get -y install curl pkg-config unzip build-essential libssl-dev openssl \ - cmake clang wget postgresql postgresql-client supervisor python3 python-is-python3 sudo bash libsasl2-dev && \ + cmake clang wget postgresql postgresql-client supervisor sudo bash libsasl2-dev && \ cargo install refinery_cli +# Install Python (manually, as no bookworm package for 3.12) + + # Install node & pnpm RUN mkdir -p /etc/apt/keyrings && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ @@ -57,6 +60,14 @@ RUN apt-get update && \ COPY --from=builder /arroyo ./ COPY --from=builder /usr/local/bin/protoc /usr/local/bin/protoc COPY --from=builder /app/include/google /usr/local/include/google +COPY --from=builder /app/python /python + +RUN cp -r /python/bin/* /usr/local/bin/ && \ + cp -r /python/include/* /usr/local/include/ && \ + cp -r /python/lib/* /usr/local/lib/ && \ + cp -r /python/share/* /usr/local/share/ && \ + rm -rf /python && \ + ldconfig ENV INSTALL_RUSTC=true \ INSTALL_CLANG=true \ @@ -76,6 +87,15 @@ ENV ARROYO__API__RUN_HTTP_PORT=5115 COPY --from=builder /arroyo ./ COPY --from=builder /usr/local/bin/protoc /usr/local/bin/protoc COPY --from=builder /app/include/google /usr/local/include/google +COPY --from=builder /app/python /python + +RUN cp -r /python/bin/* /usr/local/bin/ && \ + cp -r /python/include/* /usr/local/include/ && \ + cp -r /python/lib/* /usr/local/lib/ && \ + cp -r /python/share/* /usr/local/share/ && \ + rm -rf /python && \ + ldconfig + EXPOSE 5115 ENTRYPOINT [ "/app/arroyo" ] diff --git a/docker/install_deps.sh b/docker/install_deps.sh index bca2abe9c..2f746848c 100644 --- a/docker/install_deps.sh +++ b/docker/install_deps.sh @@ -7,14 +7,25 @@ echo env if [ "$(uname -m)" = "x86_64" ]; then export PROTO_ARCH="x86_64"; export MOLD_ARCH="x86_64"; + export PY_ARCH="x86_64"; elif [ "$(uname -m)" = "aarch64" ]; then export PROTO_ARCH="aarch_64"; export MOLD_ARCH="aarch64"; + export PY_ARCH="aarch64"; else echo "Unsupported architecture: $(uname -m)" exit 1; fi +# Install Python 3.12 (manually, as no bookworm package for 3.12) +curl -OL https://github.com/indygreg/python-build-standalone/releases/download/20240814/cpython-3.12.5+20240814-${PY_ARCH}-unknown-linux-gnu-install_only.tar.gz +tar xvfz cpython*.tar.gz +cp -r python/bin/* /usr/local/bin/ +cp -r python/include/* /usr/local/include/ +cp -r python/lib/* /usr/local/lib/ +cp -r python/share/* /usr/local/share/ +ldconfig + # Install mold curl -OL https://github.com/rui314/mold/releases/download/v1.11.0/mold-1.11.0-${MOLD_ARCH}-linux.tar.gz tar xvfz mold*.tar.gz