Skip to content

Commit

Permalink
add audio streaming & pdf examples (#483)
Browse files Browse the repository at this point in the history
* add audio streaming

* Add pdf examples

* Update samples/rest/text_generation.sh

* remove test.pdf

Change-Id: Icadde0849a0d358b605e7cfe6ff208d49d639dfb

* use alt=sse for all streaming examples

Change-Id: I6ee214edcc06827d1e73f7c1fdd3e380e7988896

---------

Co-authored-by: Mark Daoust <markdaoust@google.com>
  • Loading branch information
shilpakancharla and MarkDaoust authored Jul 23, 2024
1 parent f8b049f commit 695ee95
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 7 deletions.
4 changes: 2 additions & 2 deletions samples/rest/chat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:ge

echo "[START chat_streaming]"
# [START chat_streaming]
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY \
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY \
-H 'Content-Type: application/json' \
-X POST \
-d '{
Expand Down Expand Up @@ -53,7 +53,7 @@ else
B64FLAGS="-w0"
fi

curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY \
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY \
-H 'Content-Type: application/json' \
-X POST \
-d '{
Expand Down
161 changes: 156 additions & 5 deletions samples/rest/text_generation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party)
IMG_PATH=${MEDIA_DIR}/organ.jpg
AUDIO_PATH=${MEDIA_DIR}/sample.mp3
VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4

BASE_URL="https://generativelanguage.googleapis.com"
PDF_PATH=${MEDIA_DIR}/test.pdf

if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then
B64FLAGS="--input"
else
B64FLAGS="-w0"
fi

BASE_URL="https://generativelanguage.googleapis.com"

echo "[START text_gen_text_only_prompt]"
# [START text_gen_text_only_prompt]
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
Expand Down Expand Up @@ -57,7 +58,7 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:g

echo "[START text_gen_multimodal_one_image_prompt_streaming]"
# [START text_gen_multimodal_one_image_prompt_streaming]
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
Expand Down Expand Up @@ -125,6 +126,54 @@ echo
jq ".candidates[].content.parts[].text" response.json
# [END text_gen_multimodal_audio]

echo "[START text_gen_multimodal_audio_streaming]"
# [START text_gen_multimodal_audio_streaming]
# Use File API to upload audio data to API request.
MIME_TYPE=$(file -b --mime-type "${AUDIO_PATH}")
NUM_BYTES=$(wc -c < "${AUDIO_PATH}")
DISPLAY_NAME=AUDIO

tmp_header_file=upload-header.tmp

# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
-D upload-header.tmp \
-H "X-Goog-Upload-Protocol: resumable" \
-H "X-Goog-Upload-Command: start" \
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
-H "Content-Type: application/json" \
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null

upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
rm "${tmp_header_file}"

# Upload the actual bytes.
curl "${upload_url}" \
-H "Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Offset: 0" \
-H "X-Goog-Upload-Command: upload, finalize" \
--data-binary "@${AUDIO_PATH}" 2> /dev/null > file_info.json

file_uri=$(jq ".file.uri" file_info.json)
echo file_uri=$file_uri

curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
"contents": [{
"parts":[
{"text": "Please describe this file."},
{"file_data":{"mime_type": "audio/mpeg", "file_uri": '$file_uri'}}]
}]
}' 2> /dev/null > response.json

cat response.json
echo
# [END text_gen_multimodal_audio_streaming]

echo "[START text_gen_multimodal_video_prompt]"
# [START text_gen_multimodal_video_prompt]
# Use File API to upload audio data to API request.
Expand Down Expand Up @@ -231,7 +280,7 @@ do
state=$(jq ".file.state" file_info.json)
done

curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
Expand All @@ -244,4 +293,106 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:s

cat response.json
echo
# [END text_gen_multimodal_video_prompt_streaming]
# [END text_gen_multimodal_video_prompt_streaming]

echo "[START text_gen_multimodal_pdf]"
# [START text_gen_multimodal_pdf]
MIME_TYPE=$(file -b --mime-type "${PDF_PATH}")
NUM_BYTES=$(wc -c < "${PDF_PATH}")
DISPLAY_NAME=TEXT


echo $MIME_TYPE
tmp_header_file=upload-header.tmp

# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
-D upload-header.tmp \
-H "X-Goog-Upload-Protocol: resumable" \
-H "X-Goog-Upload-Command: start" \
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
-H "Content-Type: application/json" \
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null

upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
rm "${tmp_header_file}"

# Upload the actual bytes.
curl "${upload_url}" \
-H "Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Offset: 0" \
-H "X-Goog-Upload-Command: upload, finalize" \
--data-binary "@${PDF_PATH}" 2> /dev/null > file_info.json

file_uri=$(jq ".file.uri" file_info.json)
echo file_uri=$file_uri

# Now generate content using that file
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
"contents": [{
"parts":[
{"text": "Can you add a few more lines to this poem?"},
{"file_data":{"mime_type": "application/pdf", "file_uri": '$file_uri'}}]
}]
}' 2> /dev/null > response.json

cat response.json
echo

jq ".candidates[].content.parts[].text" response.json
# [END text_gen_multimodal_pdf]

echo "[START text_gen_multimodal_pdf_streaming]"
# [START text_gen_multimodal_pdf_streaming]
MIME_TYPE=$(file -b --mime-type "${PDF_PATH}")
NUM_BYTES=$(wc -c < "${PDF_PATH}")
DISPLAY_NAME=TEXT


echo $MIME_TYPE
tmp_header_file=upload-header.tmp

# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
-D upload-header.tmp \
-H "X-Goog-Upload-Protocol: resumable" \
-H "X-Goog-Upload-Command: start" \
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
-H "Content-Type: application/json" \
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null

upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
rm "${tmp_header_file}"

# Upload the actual bytes.
curl "${upload_url}" \
-H "Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Offset: 0" \
-H "X-Goog-Upload-Command: upload, finalize" \
--data-binary "@${PDF_PATH}" 2> /dev/null > file_info.json

file_uri=$(jq ".file.uri" file_info.json)
echo file_uri=$file_uri

# Now generate content using that file
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
"contents": [{
"parts":[
{"text": "Can you add a few more lines to this poem?"},
{"file_data":{"mime_type": "application/pdf", "file_uri": '$file_uri'}}]
}]
}' 2> /dev/null > response.json

cat response.json
echo
# [END text_gen_multimodal_pdf_streaming]

0 comments on commit 695ee95

Please sign in to comment.