huggingface · abhishekkrthakur · Sep 27, 2024 · Sep 27, 2024
diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1,58 @@
+# Generating the documentation
+
+To generate the documentation, you have to build it. Several packages are necessary to build the doc.
+
+First, you need to install the project itself by running the following command at the root of the code repository:
+
+```bash
+pip install -e .
+```
+
+You also need to install 2 extra packages:
+
+```bash
+# `hf-doc-builder` to build the docs
+pip install git+https://github.com/huggingface/doc-builder@main
+# `watchdog` for live reloads
+pip install watchdog
+```
+
+---
+**NOTE**
+
+You only need to generate the documentation to inspect it locally (if you're planning changes and want to
+check how they look before committing for instance). You don't have to commit the built documentation.
+
+---
+
+## Building the documentation
+
+Once you have setup the `doc-builder` and additional packages with the pip install command above,
+you can generate the documentation by typing the following command:
+
+```bash
+doc-builder build autotrain docs/source/ --build_dir ~/tmp/test-build
+```
+
+You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate
+the MDX files that will be rendered as the documentation on the main website. You can inspect them in your favorite
+Markdown editor.
+
+## Previewing the documentation
+
+To preview the docs, run the following command:
+
+```bash
+doc-builder preview autotrain docs/source/
+```
+
+The docs will be viewable at [http://localhost:5173](http://localhost:5173). You can also preview the docs once you
+have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives.
+
+---
+**NOTE**
+
+The `preview` command only works with existing doc files. When you add a completely new file, you need to update
+`_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again).
+
+---
diff --git a/docs/source/text_classification_params.mdx b/docs/source/text_classification_params.mdx
@@ -1,5 +1,7 @@
 # Text Classification & Regression Parameters
 
+[[autodoc]] trainers.text_classification.params.TextClassificationParams
+
 ```
 --batch-size BATCH_SIZE
                     Training batch size to use

diff --git a/src/autotrain/app/api_routes.py b/src/autotrain/app/api_routes.py
@@ -28,6 +28,23 @@
 
 
 def create_api_base_model(base_class, class_name):
+    """
+    Creates a new Pydantic model based on a given base class and class name,
+    excluding specified fields.
+
+    Args:
+        base_class (Type): The base Pydantic model class to extend.
+        class_name (str): The name of the new model class to create.
+
+    Returns:
+        Type: A new Pydantic model class with the specified modifications.
+
+    Notes:
+        - The function uses type hints from the base class to define the new model's fields.
+        - Certain fields are excluded from the new model based on the class name.
+        - The function supports different sets of hidden parameters for different class names.
+        - The new model's configuration is set to have no protected namespaces.
+    """
     annotations = get_type_hints(base_class)
     if class_name in ("LLMSFTTrainingParamsAPI", "LLMRewardTrainingParamsAPI"):
         more_hidden_params = [
@@ -206,6 +223,32 @@ class ExtractiveQuestionAnsweringColumnMapping(BaseModel):
 
 
 class APICreateProjectModel(BaseModel):
+    """
+    APICreateProjectModel is a Pydantic model that defines the schema for creating a project.
+
+    Attributes:
+        project_name (str): The name of the project.
+        task (Literal): The type of task for the project. Supported tasks include various LLM tasks,
+            image classification, dreambooth, seq2seq, token classification, text classification,
+            text regression, tabular classification, tabular regression, image regression, VLM tasks,
+            and extractive question answering.
+        base_model (str): The base model to be used for the project.
+        hardware (Literal): The type of hardware to be used for the project. Supported hardware options
+            include various configurations of spaces and local.
+        params (Union): The training parameters for the project. The type of parameters depends on the
+            task selected.
+        username (str): The username of the person creating the project.
+        column_mapping (Optional[Union]): The column mapping for the project. The type of column mapping
+            depends on the task selected.
+        hub_dataset (str): The dataset to be used for the project.
+        train_split (str): The training split of the dataset.
+        valid_split (Optional[str]): The validation split of the dataset.
+
+    Methods:
+        validate_column_mapping(cls, values): Validates the column mapping based on the task selected.
+        validate_params(cls, values): Validates the training parameters based on the task selected.
+    """
+
     project_name: str
     task: Literal[
         "llm:sft",
@@ -530,6 +573,18 @@ def validate_params(cls, values):
 
 
 def api_auth(request: Request):
+    """
+    Authenticates the API request using a Bearer token.
+
+    Args:
+        request (Request): The incoming HTTP request object.
+
+    Returns:
+        str: The verified Bearer token if authentication is successful.
+
+    Raises:
+        HTTPException: If the token is invalid, expired, or missing.
+    """
     authorization = request.headers.get("Authorization")
     if authorization:
         schema, _, token = authorization.partition(" ")
@@ -553,9 +608,24 @@ def api_auth(request: Request):
 @api_router.post("/create_project", response_class=JSONResponse)
 async def api_create_project(project: APICreateProjectModel, token: bool = Depends(api_auth)):
     """
-    This function is used to create a new project
-    :param project: APICreateProjectModel
-    :return: JSONResponse
+    Asynchronously creates a new project based on the provided parameters.
+
+    Args:
+        project (APICreateProjectModel): The model containing the project details and parameters.
+        token (bool, optional): The authentication token. Defaults to Depends(api_auth).
+
+    Returns:
+        dict: A dictionary containing a success message, the job ID of the created project, and a success status.
+
+    Raises:
+        HTTPException: If there is an error during project creation.
+
+    Notes:
+        - The function determines the hardware type based on the project hardware attribute.
+        - It logs the provided parameters and column mapping.
+        - It sets the appropriate parameters based on the task type.
+        - It updates the parameters with the provided ones and creates an AppParams instance.
+        - The function then creates an AutoTrainProject instance and initiates the project creation process.
     """
     provided_params = project.params.model_dump()
     if project.hardware == "local":
@@ -609,18 +679,28 @@ async def api_create_project(project: APICreateProjectModel, token: bool = Depen
 @api_router.get("/version", response_class=JSONResponse)
 async def api_version():
     """
-    This function is used to get the version of the API
-    :return: JSONResponse
+    Returns the current version of the API.
+
+    This asynchronous function retrieves the version of the API from the
+    __version__ variable and returns it in a dictionary.
+
+    Returns:
+        dict: A dictionary containing the API version.
     """
     return {"version": __version__}
 
 
 @api_router.get("/logs", response_class=JSONResponse)
 async def api_logs(job_id: str, token: bool = Depends(api_auth)):
     """
-    This function is used to get the logs of a project
-    :param job_id: str
-    :return: JSONResponse
+    Fetch logs for a specific job.
+
+    Args:
+        job_id (str): The ID of the job for which logs are to be fetched.
+        token (bool, optional): Authentication token, defaults to the result of api_auth dependency.
+
+    Returns:
+        dict: A dictionary containing the logs, success status, and a message.
     """
     # project = AutoTrainProject(job_id=job_id, token=token)
     # logs = project.get_logs()
@@ -630,9 +710,22 @@ async def api_logs(job_id: str, token: bool = Depends(api_auth)):
 @api_router.get("/stop_training", response_class=JSONResponse)
 async def api_stop_training(job_id: str, token: bool = Depends(api_auth)):
     """
-    This function is used to stop the training of a project
-    :param job_id: str
-    :return: JSONResponse
+    Stops the training job with the given job ID.
+
+    This asynchronous function pauses the training job identified by the provided job ID.
+    It uses the Hugging Face API to pause the space associated with the job.
+
+    Args:
+        job_id (str): The ID of the job to stop.
+        token (bool, optional): The authentication token, provided by dependency injection.
+
+    Returns:
+        dict: A dictionary containing a message and a success flag. If the training job
+        was successfully stopped, the message indicates success and the success flag is True.
+        If there was an error, the message contains the error details and the success flag is False.
+
+    Raises:
+        Exception: If there is an error while attempting to stop the training job.
     """
     hf_api = HfApi(token=token)
     try:

diff --git a/src/autotrain/app/app.py b/src/autotrain/app/app.py
@@ -26,6 +26,16 @@
 
 @app.get("/")
 async def forward_to_ui(request: Request):
+    """
+    Forwards the incoming request to the UI endpoint.
+
+    Args:
+        request (Request): The incoming HTTP request.
+
+    Returns:
+        RedirectResponse: A response object that redirects to the UI endpoint,
+                          including any query parameters from the original request.
+    """
     query_params = request.query_params
     url = "/ui/"
     if query_params:

diff --git a/src/autotrain/app/db.py b/src/autotrain/app/db.py
@@ -2,6 +2,36 @@
 
 
 class AutoTrainDB:
+    """
+    A class to manage job records in a SQLite database.
+
+    Attributes:
+    -----------
+    db_path : str
+        The path to the SQLite database file.
+    conn : sqlite3.Connection
+        The SQLite database connection object.
+    c : sqlite3.Cursor
+        The SQLite database cursor object.
+
+    Methods:
+    --------
+    __init__(db_path):
+        Initializes the database connection and creates the jobs table if it does not exist.
+
+    create_jobs_table():
+        Creates the jobs table in the database if it does not exist.
+
+    add_job(pid):
+        Adds a new job with the given process ID (pid) to the jobs table.
+
+    get_running_jobs():
+        Retrieves a list of all running job process IDs (pids) from the jobs table.
+
+    delete_job(pid):
+        Deletes the job with the given process ID (pid) from the jobs table.
+    """
+
     def __init__(self, db_path):
         self.db_path = db_path
         self.conn = sqlite3.connect(db_path)

diff --git a/src/autotrain/app/models.py b/src/autotrain/app/models.py
@@ -4,13 +4,34 @@
 
 
 def get_sorted_models(hub_models):
+    """
+    Filters and sorts a list of models based on their download count.
+
+    Args:
+        hub_models (list): A list of model objects. Each model object must have the attributes 'id', 'downloads', and 'private'.
+
+    Returns:
+        list: A list of model IDs sorted by their download count in descending order. Only includes models that are not private.
+    """
     hub_models = [{"id": m.id, "downloads": m.downloads} for m in hub_models if m.private is False]
     hub_models = sorted(hub_models, key=lambda x: x["downloads"], reverse=True)
     hub_models = [m["id"] for m in hub_models]
     return hub_models
 
 
 def _fetch_text_classification_models():
+    """
+    Fetches and sorts text classification models from the Hugging Face model hub.
+
+    This function retrieves models for the tasks "fill-mask" and "text-classification"
+    from the Hugging Face model hub, sorts them by the number of downloads, and combines
+    them into a single list. Additionally, it fetches trending models based on the number
+    of likes in the past 7 days, sorts them, and places them at the beginning of the list
+    if they are not already included.
+
+    Returns:
+        list: A sorted list of model identifiers from the Hugging Face model hub.
+    """
     hub_models1 = list(
         list_models(
             task="fill-mask",