requirements test

neonwatty · Jul 17, 2024 · d1602e0 · d1602e0
1 parent 0705bed
commit d1602e0
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 34 deletions.
diff --git a/transcript_downloader_walkthrough.ipynb b/transcript_downloader_walkthrough.ipynb
@@ -14,6 +14,7 @@
    "outputs": [],
    "source": [
     "import os\n",
+    "\n",
     "# if running in collab pull repo and install requirements\n",
     "if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
     "    !git clone https://github.com/jermwatt/youtube_transcript_downloader.git\n",
@@ -97,15 +98,16 @@
    "source": [
     "import pandas as pd\n",
     "\n",
+    "\n",
     "def parse_input_file(input_file_path: str) -> list:\n",
     "    youtube_urls = []\n",
-    "    with open(input_file_path, 'r') as file:\n",
+    "    with open(input_file_path, \"r\") as file:\n",
     "        for line in file:\n",
     "            youtube_urls.append(line.strip())\n",
     "    return youtube_urls\n",
     "\n",
-    "def save_output(data: list,\n",
-    "                output_file_path: str) -> None:\n",
+    "\n",
+    "def save_output(data: list, output_file_path: str) -> None:\n",
     "    df = pd.DataFrame(data)\n",
     "    df.to_csv(output_file_path, index=False)"
    ]
@@ -134,10 +136,11 @@
    "source": [
     "import re\n",
     "\n",
+    "\n",
     "def is_valid_youtube_shorts_url(url: str) -> bool:\n",
     "    if not isinstance(url, str):\n",
-    "        return False \n",
-    "    pattern = r'^https://www\\.youtube\\.com/shorts/[A-Za-z0-9_-]{11}$'  # youtube vido ids are always 11 chars long\n",
+    "        return False\n",
+    "    pattern = r\"^https://www\\.youtube\\.com/shorts/[A-Za-z0-9_-]{11}$\"  # youtube vido ids are always 11 chars long\n",
     "    return re.match(pattern, url) is not None"
    ]
   },
@@ -176,7 +179,8 @@
    "source": [
     "from typing import List, Dict\n",
     "from youtube_transcript_api import YouTubeTranscriptApi\n",
-    "    \n",
+    "\n",
+    "\n",
     "def get_single_transcript(youtube_url: str) -> dict:\n",
     "    try:\n",
     "        if is_valid_youtube_shorts_url(youtube_url):\n",
@@ -194,6 +198,7 @@
     "        print(f\"FAILURE: transcript pull for youtube_url - {youtube_url} - failed with exception {e}\")\n",
     "        return {}\n",
     "\n",
+    "\n",
     "def get_batch_transcripts(youtube_urls: List[str]) -> List[Dict]:\n",
     "    valid_urls = []\n",
     "    valid_vids = []\n",
@@ -298,10 +303,10 @@
     }
    ],
    "source": [
-    "# print out first few lines of input \n",
+    "# print out first few lines of input\n",
     "with open(\"data/input/test_input.txt\") as myfile:\n",
-    "    first_few_lines=myfile.readlines(1024)[0:3] \n",
-    "print(first_few_lines)\n"
+    "    first_few_lines = myfile.readlines(1024)[0:3]\n",
+    "print(first_few_lines)"
    ]
   },
   {

diff --git a/youtube_shorts_transcript_downloader/app.py b/youtube_shorts_transcript_downloader/app.py
@@ -4,33 +4,36 @@
 
 st.set_page_config(page_title="Youtube Shorts Transcript Downloader", layout="wide")
 st.title("YT Shorts Transcript Downloader")
-st.markdown(
-    "instructions: enter in urls separated by commas or upload a text file with one url per line"
-)
+st.markdown("instructions: enter in urls separated by commas or upload a text file with one url per line")
 
 
 base = st.container(border=True)
 with base:
     col1, sep_col, col2 = st.columns([5, 2, 5])
-    
+
     with col1:
-        text_urls = st.text_area("youtube shorts urls", value="", placeholder="enter urls separated by commas - for example: https://www.youtube.com/shorts/o7a9hx-Pqyo, https://www.youtube.com/shorts/xkAYLnIsfX4")
-
+        text_urls = st.text_area(
+            "youtube shorts urls",
+            value="",
+            placeholder="enter urls separated by commas - for example: https://www.youtube.com/shorts/o7a9hx-Pqyo, https://www.youtube.com/shorts/xkAYLnIsfX4",
+        )
+
     with col2:
         uploaded_file = st.file_uploader("Choose a File", type=["txt"])
-        
+
     col3, col4, col5 = st.columns([3, 2, 3])
     with col3:
         trans_button_val = st.button(label="fetch transcripts", type="primary")
     with col4:
         empty_container = st.container()
     with col5:
         placeholder = st.empty()
-        
+
 download_area = st.container()
 
 # https://www.youtube.com/shorts/o7a9hx-Pqyo, https://www.youtube.com/shorts/xkAYLnIsfX4
 
+
 @st.cache_data
 def convert_df(df: pd.DataFrame) -> "csv":
     # IMPORTANT: Cache the conversion to prevent computation on every rerun
@@ -62,10 +65,10 @@ def button_logic(youtube_short_urls: list) -> None:
         if len(text_urls.strip()) > 0:
             st.warning("you can enter urls manually or from file but not both", icon="⚠️")
             st.stop()
-    
+
     if uploaded_file.type == "text/plain":
         from io import StringIO
-        
+
         stringio = StringIO(uploaded_file.read().decode("utf-8"))
         for line in stringio:
             youtube_short_urls.append(line.strip())
@@ -78,14 +81,14 @@ def button_logic(youtube_short_urls: list) -> None:
         if uploaded_file is not None:
             st.warning("you can enter urls manually or from file but not both", icon="⚠️")
             st.stop()
-        
+
         try:
             text_urls_split = text_urls.split(",")
             text_urls_split = [v.strip() for v in text_urls_split]
             youtube_short_urls = text_urls_split
-        except:
-            st.warning("please check your manually entered urls", icon="⚠️") 
+        except:  # noqa E722
+            st.warning("please check your manually entered urls", icon="⚠️")
             st.stop()
-    
+
     with st.spinner(text="transcript pull in progress..."):
         button_logic(youtube_short_urls)
diff --git a/youtube_shorts_transcript_downloader/transcripts.py b/youtube_shorts_transcript_downloader/transcripts.py
@@ -5,8 +5,8 @@
 
 def is_valid_youtube_shorts_url(url: str) -> bool:
     if not isinstance(url, str):
-        return False 
-    pattern = r'^https://www\.youtube\.com/shorts/[A-Za-z0-9_-]{11}$'  # youtube vido ids are always 11 chars long
+        return False
+    pattern = r"^https://www\.youtube\.com/shorts/[A-Za-z0-9_-]{11}$"  # youtube vido ids are always 11 chars long
     return re.match(pattern, url) is not None
 
 
@@ -24,9 +24,7 @@ def get_single_transcript(youtube_url: str) -> dict:
             print(f"FAILURE: youtube_url is not valid - {youtube_url}")
             return {}
     except Exception as e:
-        print(
-            f"FAILURE: transcript pull for youtube_url - {youtube_url} - failed with exception {e}"
-        )
+        print(f"FAILURE: transcript pull for youtube_url - {youtube_url} - failed with exception {e}")
         return {}
 
 
@@ -39,12 +37,9 @@ def get_batch_transcripts(youtube_urls: List[str]) -> List[Dict]:
             valid_urls.append(url)
             valid_vids.append(vid)
     try:
-        video_transcripts = YouTubeTranscriptApi.get_transcripts(
-            valid_vids, languages=["en"]
-        )[0]
-        print(YouTubeTranscriptApi.get_transcripts(
-            valid_vids, languages=["en"]))
-
+        video_transcripts = YouTubeTranscriptApi.get_transcripts(valid_vids, languages=["en"])[0]
+        print(YouTubeTranscriptApi.get_transcripts(valid_vids, languages=["en"]))
+
         entries = []
         for i in range(len(valid_urls)):
             entry = {}