diff --git a/.gitignore b/.gitignore
index e0582af..9062184 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,4 +167,4 @@ config.yml
 hydra_outputs/
 .commit0*
 .agent*
-docs/analysis_*.md
\ No newline at end of file
+docs/analysis*.md
\ No newline at end of file
diff --git a/docs/analysis.md b/docs/analysis.md
deleted file mode 100644
index bc2a8f9..0000000
--- a/docs/analysis.md
+++ /dev/null
@@ -1,6 +0,0 @@
-
-|  | Name   |  Summary |  |
-|--|--------|----------|--|
-||[reference](/analysis_reference)|3628 / 33 ; duration: 18.66s||
-||[test-save-commit0](/analysis_test-save-commit0)|0 / 0 ; duration: 0.00s||
-||[model_name-claude-3-5-sonnet-20240620__run_tests-0__use_lint_info-0__use_spec_info-0](/analysis_model_name-claude-3-5-sonnet-20240620__run_tests-0__use_lint_info-0__use_spec_info-0)|0 / 0 ; duration: 0.00s||
\ No newline at end of file
diff --git a/docs/javascripts/tablesort.js b/docs/javascripts/tablesort.js
new file mode 100644
index 0000000..c916015
--- /dev/null
+++ b/docs/javascripts/tablesort.js
@@ -0,0 +1,6 @@
+document$.subscribe(function() {
+    var tables = document.querySelectorAll("article table:not([class])")
+    tables.forEach(function(table) {
+      new Tablesort(table)
+    })
+  })
\ No newline at end of file
diff --git a/docs/render_submissions.py b/docs/render_submissions.py
index 799bb16..e2c95c2 100644
--- a/docs/render_submissions.py
+++ b/docs/render_submissions.py
@@ -2,15 +2,19 @@
 import os
 import glob
 import ast
-from datasets import load_dataset
+import subprocess
 import json
 import shutil
 import argparse
+import pypdf
+import tqdm
+
+from datasets import load_dataset
 from transformers import AutoTokenizer
+
 from commit0.harness.constants import SPLIT
 from commit0.harness.utils import clone_repo
 from commit0.cli import write_commit0_dot_file
-import pypdf
 
 import logging
 
@@ -35,26 +39,34 @@ def get_pytest_info(path_to_logs, repo_name, branch_name):
         }
         report_file_path = os.path.join(path_to_logs, pytest_hash, "report.json")
         if not os.path.exists(report_file_path):
-            reason_for_failure = open(
+            if os.path.exists(
                 os.path.join(path_to_logs, pytest_hash, "test_output.txt")
-            ).read()
+            ):
+                reason_for_failure = open(
+                    os.path.join(path_to_logs, pytest_hash, "test_output.txt")
+                ).read()
+            else:
+                reason_for_failure = "Unknown failure."
             pytest_info[testname]["failed_to_run"] = reason_for_failure
             return pytest_info
         pytest_report = json.load(open(report_file_path))
         pytest_summary = pytest_report["summary"]
         pytest_info[testname]["summary"] = pytest_summary
+        # TODO this is a hacky fix, should eventually do a check against true num collected
+        if pytest_summary["collected"] < 5:
+            reason_for_failure = "Pytest collection failure."
+            pytest_info[testname]["failed_to_run"] = reason_for_failure
+            return pytest_info
         pytest_info[testname]["duration"] = pytest_report["duration"]
         if "passed" not in pytest_summary:
             pytest_summary["passed"] = 0
         for test in pytest_report["tests"]:
-            if test["outcome"] == "passed":
+            if test["outcome"] in {"passed", "skipped"}:
                 continue
             if "longrepr" in test:
                 failure_string = test["longrepr"]
             elif "???" in test:
                 failure_string = test["???"]["longrepr"]
-            elif test["outcome"] == "error":
-                failure_string = test["setup"]["longrepr"]
             elif "setup" in test and "longrepr" in test["setup"]:
                 failure_string = test["setup"]["longrepr"]
             elif "call" in test and "longrepr" in test["call"]:
@@ -76,17 +88,6 @@ def get_pytest_info(path_to_logs, repo_name, branch_name):
 
 
 def get_coverage_info(path_to_logs, repo_name, branch_name):
-    # coverage_fp = open(os.path.join(path_to_logs, pytest_hash, "coverage.json"))
-    # for filename, file_coverage in json.load(coverage_fp)["files"].items():
-    #     if not any(relevant_function.startswith(filename) for relevant_function in relevant_functions):
-    #         continue
-    #     for funcname, func_coverage in file_coverage["functions"].items():
-    #         if f"{filename}::{funcname}" not in relevant_functions: continue
-    #         pycov_info[testname][f"{filename}::{funcname}"] = {
-    #                 "implementation": submission_info["function_impls"][f"{filename}::{funcname}"],
-    #                 "executed_lines": func_coverage["executed_lines"],
-    #                 "executed_branches": func_coverage["executed_branches"]
-    #         }
     raise NotImplementedError
 
 
@@ -119,7 +120,7 @@ def get_blank_repo_metrics(
                 print(f"{e}: Trouble opening {filename}")
                 continue
 
-            filename = filename[len(blank_source_code_folder):].lstrip(" /")
+            filename = filename[len(blank_source_code_folder) :].lstrip(" /")
             try:
                 code_tree = ast.parse(code)
             except Exception as e:
@@ -162,145 +163,212 @@ def get_blank_repo_metrics(
     return blank_repo_metrics
 
 
-def render_mds(subfolder="docs"):
-    all_submissions = {}
+leaderboard_header = """\n\n## Leaderboard ({split})
+| Name | Repos Resolved (/{num_repos}) | Total Tests Passed (/{total_num_tests}) | Test Duration (s) | Date | Analysis | Github |
+|------|:-------------------------:|:--------------------:|:--------------------:|:----------:|----|----| """
 
-    method_repo_pytests = {}
-    for branch_name in glob.glob(os.path.join(analysis_files_path, "*")):
-        branch_name = os.path.basename(branch_name)
-        if branch_name in {"blank", "repos", "submission_repos"}:
-            continue
-        all_submissions[branch_name] = {}
-        for repo_file in glob.glob(
-            os.path.join(analysis_files_path, branch_name, "*.json")
-        ):
+submission_table_header = """# Submission Name: **{display_name}** (split: {split})
 
-            repo_metrics_output_file = os.path.join(
-                analysis_files_path, branch_name, repo_file
-            )
-            repo_metrics = json.load(open(repo_metrics_output_file))
-            repo_name = os.path.basename(repo_file[: -len(".json")])
-
-            all_submissions[branch_name][repo_name] = {}
-
-            method_repo_pytests[
-                f"{branch_name}_{repo_name}"
-            ] = f"# Submission Name: {branch_name}\n# Repository: {repo_name}"
-            if "pytest_results" in repo_metrics:
-                repo_metrics = repo_metrics["pytest_results"]
-            for pytest_group, pytest_info in repo_metrics.items():
-                pytest_group = os.path.basename(pytest_group.strip("/"))
-                patch_diff = (
-                    f"""\n\n### Patch diff\n```diff\n{pytest_info['patch_diff']}```"""
-                )
-                if "failed_to_run" in pytest_info:
-                    all_submissions[branch_name][repo_name][pytest_group] = {
-                        "failed_to_run": pytest_info["failed_to_run"]
-                    }
-                    method_repo_pytests[
-                        f"{branch_name}_{repo_name}"
-                    ] += f"""\n## Failed to run pytests\n```\n{pytest_info['failed_to_run']}\n```"""
-                else:
-                    all_submissions[branch_name][repo_name][pytest_group] = {
-                        "summary": pytest_info["summary"],
-                        "duration": pytest_info["duration"],
-                    }
-                    method_repo_pytests[
-                        f"{branch_name}_{repo_name}"
-                    ] += f"""\n## Pytest Summary: {pytest_group}
+| Repository | Resolved | Pass Rate | Test Duration (s) | Analysis | Github Link |
+|------------|---------|:-----:|:-----:|-----|-----|"""
+
+pytest_summary_table_header = """\n## Pytest Summary for test `{pytest_group}`
 | status   | count |
 |:---------|:-----:|
 """
-                    for category, count in pytest_info["summary"].items():
-                        if category not in {"duration"}:
-                            method_repo_pytests[
-                                f"{branch_name}_{repo_name}"
-                            ] += f"""| {category} | {count} |\n"""
-                        else:
-                            method_repo_pytests[
-                                f"{branch_name}_{repo_name}"
-                            ] += f"""| {category} | {float(count):.2f}s |\n"""
-
-                    method_repo_pytests[
-                        f"{branch_name}_{repo_name}"
-                    ] += f"\n## Failed pytest outputs: {pytest_group}\n\n"
-                    for testname, failure in pytest_info["failures"].items():
-                        shortened_testname = os.path.basename(testname)
-                        method_repo_pytests[f"{branch_name}_{repo_name}"] += (
-                            f"### {shortened_testname}\n\n<details><summary> <pre>{shortened_testname}"
-                            f"</pre></summary><pre>\n{failure['failure_string']}\n</pre>\n</details>\n"
-                        )
 
-            back_button = f"[back to {branch_name} summary]({f'analysis_{branch_name}'})\n\n"
-            with open(
-                os.path.join(subfolder, f"analysis_{branch_name}_{repo_name}.md"), "w"
-            ) as wf:
-                wf.write(
-                    back_button
-                    + method_repo_pytests[f"{branch_name}_{repo_name}"]
-                    + patch_diff
+
+def render_mds(overwrite_previous, subfolder="docs"):
+    leaderboard = {}
+
+    split_to_total_tests = {
+        "lite": 3628,
+        "all": 140926,
+    }  # hard-coded to skip running it later
+    for split in tqdm.tqdm(["lite", "all"]):
+        num_repos = len(SPLIT[split])
+        # total_num_tests = 0
+        # for repo_name in SPLIT[split]:
+        #     repo_tests = subprocess.run(['commit0', 'get-tests', repo_name], capture_output=True, text=True).stdout.strip()
+        #     total_num_tests += len(repo_tests.splitlines())
+        leaderboard[split] = leaderboard_header.format(
+            split=split,
+            num_repos=num_repos,
+            total_num_tests=split_to_total_tests[split],
+        )
+
+    for org_path in tqdm.tqdm(glob.glob(os.path.join(analysis_files_path, "*"))):
+        org_name = os.path.basename(org_path)
+        if org_name in {"blank", "repos", "submission_repos"}:
+            continue
+        for branch_path in glob.glob(os.path.join(org_path, "*.json")):
+            cum_tests_passed = 0
+            repos_resolved = 0
+            total_duration = 0.0
+            branch_metrics = json.load(open(branch_path))
+            submission_info = branch_metrics["submission_info"]
+            split = submission_info["split"]
+            org_name = submission_info["org_name"]
+            project_page_link = submission_info["project_page"]
+            display_name = submission_info["display_name"]
+            submission_date = submission_info["submission_date"]
+            branch_name = submission_info["branch"]
+            org_branch_filepath = os.path.join(
+                subfolder, f"analysis_{org_name}_{branch_name}.md"
+            )
+            write_submission = True
+            if os.path.exists(org_branch_filepath) and not overwrite_previous:
+                write_submission = False
+
+            if write_submission:
+                submission_page = submission_table_header.format(
+                    display_name=display_name, split=split
                 )
 
-    # Render general page. Has buttons to all methods
-    leaderboard = """
-|  | Name   |  Summary |  |
-|--|--------|----------|--|"""
-    # Render method page. Per method, buttons to all repos.
-    method_to_repos = {}
-    # Render method & repo page. Has "back" button.
-    for branch_name, branch_info in all_submissions.items():
-        cum_pytests = {"passed": 0}
-        method_to_repos[branch_name] = """
-| | Repository | Summary | |
-|-|------------|---------|-|"""
-        total_duration = 0.0
-        for repo_name, repo_test_info in branch_info.items():
-            for testname, test_info in repo_test_info.items():
-                if "failed_to_run" in test_info:
-                    summary_pytests_string = "failure"
-                else:
-                    total_duration += test_info["duration"]
-                    summary_pytests_string = (
-                        f"`{testname}`: {test_info['summary']['passed']} / "
-                        f"{test_info['summary']['collected']} ; duration: {test_info['duration']:.2f}s"
+            for repo_name, repo_pytest_results in branch_metrics.items():
+                if repo_name == "submission_info":
+                    continue
+                if write_submission:
+                    submission_repo_page = f"# **{display_name}**: {repo_name}"
+                    org_branch_repo_filepath = os.path.join(
+                        subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md"
                     )
-                    for category, count in test_info["summary"].items():
-                        if category not in cum_pytests:
-                            cum_pytests[category] = 0
-                        if isinstance(count, int):
-                            cum_pytests[category] += int(count)
-                        elif isinstance(count, float):
-                            cum_pytests[category] += float(count)
-                method_to_repos[branch_name] += (
-                    f"\n||[{repo_name}]({f'analysis_{branch_name}_{repo_name}'})|"
-                    f"{summary_pytests_string}||"
-                )
-                break  # assume we ran all tests. will add functionality for checking diff tests later, as we need it.
-        summary_pytests_string = (
-            f"{cum_pytests['passed']} / {cum_pytests['collected']} ; duration: {total_duration:.2f}s"
-        )
-        leaderboard += f"\n||[{branch_name}]({f'analysis_{branch_name}'})|{summary_pytests_string}||"
-        back_button = f"[back to all submissions]({f'analysis'})\n\n"
-        with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), "w") as wf:
-            wf.write(back_button + "\n" + method_to_repos[branch_name])
-    with open(os.path.join(subfolder, "analysis.md"), "w") as wf:
-        wf.write(leaderboard)
+                if isinstance(repo_pytest_results, str):
+                    submission_repo_page = f"# **{display_name}**: {repo_name}\n\n## Failed to clone\n\n{repo_pytest_results}"
+                    org_branch_repo_filepath = os.path.join(
+                        subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md"
+                    )
+                    github_hyperlink = (
+                        f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                    )
+                    if branch_name == "reference":
+                        github_hyperlink = f"{project_page_link}/{repo_name}"
+                    submission_page = submission_table_header.format(
+                        display_name=display_name, split=split
+                    ) + (
+                        f"\n| {repo_name} | No; Failed to clone. | - | - | "
+                        f"[Analysis](/{f'analysis_{org_name}_{branch_name}_{repo_name}'}) | "
+                        f"[Github]({github_hyperlink}) |"
+                    )
+                    back_button = f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
+                    with open(org_branch_repo_filepath, "w") as wf:
+                        wf.write(back_button + submission_repo_page)
+                    continue
+
+                for pytest_group, pytest_info in repo_pytest_results.items():
+                    pytest_group = os.path.basename(pytest_group.strip("/"))
+                    patch_diff = f"""\n\n## Patch diff\n```diff\n{pytest_info['patch_diff']}```"""
+                    if "failed_to_run" in pytest_info:
+                        resolved = False
+                        if write_submission:
+                            submission_repo_page += (
+                                f"\n## Failed to run pytests for test `{pytest_group}`\n"
+                                f"```\n{pytest_info['failed_to_run']}\n```"
+                            )
+                            pytest_details = "Pytest failed"
+                            duration = "Failed."
+                    else:
+                        resolved = False
+                        if "passed" in pytest_info["summary"]:
+                            if "skipped" in pytest_info["summary"]:
+                                resolved = pytest_info["summary"]["passed"] + pytest_info["summary"]["skipped"] == pytest_info["summary"]["total"]
+                            else:
+                                resolved = pytest_info["summary"]["passed"] == pytest_info["summary"]["total"]
+                        if write_submission:
+                            submission_repo_page += pytest_summary_table_header.format(
+                                pytest_group=pytest_group
+                            )
+                            for category, count in pytest_info["summary"].items():
+                                if category not in {"duration"}:
+                                    submission_repo_page += (
+                                        f"""| {category} | {count} |\n"""
+                                    )
+                                else:
+                                    submission_repo_page += (
+                                        f"""| {category} | {float(count):.2f}s |\n"""
+                                    )
+
+                            submission_repo_page += "\n## Failed pytests:\n\n"
+                            for testname, failure in pytest_info["failures"].items():
+                                shortened_testname = os.path.basename(testname)
+                                submission_repo_page += (
+                                    f"### {shortened_testname}\n\n<details><summary> <pre>{shortened_testname}"
+                                    f"</pre></summary><pre>\n{failure['failure_string']}\n</pre>\n</details>\n"
+                                )
+                        cum_tests_passed += pytest_info["summary"]["passed"]
+                        total_duration += pytest_info["duration"]
+                        repos_resolved += int(resolved)
+                        if write_submission:
+                            pytest_details = f"{pytest_info['summary']['passed']} / {pytest_info['summary']['total']}"
+                            duration = f"{pytest_info['duration']:.2f}"
+                    break
+                if write_submission:
+                    github_hyperlink = (
+                        f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                    )
+                    if branch_name == "reference":
+                        github_hyperlink = f"{project_page_link}/{repo_name}"
+                    submission_page += (
+                        f"\n| {repo_name} | {'Yes' if resolved else 'No'} | {pytest_details} | "
+                        f"{duration} | [Analysis](/{f'analysis_{org_name}_{branch_name}_{repo_name}'}) | "
+                        f"[Github]({github_hyperlink}) |"
+                    )
+                    back_button = f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
+                    with open(org_branch_repo_filepath, "w") as wf:
+                        wf.write(back_button + submission_repo_page + patch_diff)
+            if write_submission:
+                back_button = f"[back to all submissions](/{f'analysis'})\n\n"
+                with open(org_branch_filepath, "w") as wf:
+                    wf.write(back_button + "\n" + submission_page)
+            analysis_link = f"[Analysis](/{f'analysis_{org_name}_{branch_name}'})"
+            github_link = f"[Github]({project_page_link})"
+            leaderboard[split] += (
+                f"\n|{display_name}|"
+                f"{repos_resolved}|"
+                f"{cum_tests_passed}|"
+                f"{total_duration:.2f}|"
+                f"{submission_date}|"
+                f"{analysis_link}|"
+                f"{github_link}|"
+            )
+
+    leaderboard_filepath = os.path.join(subfolder, "analysis.md")
+    with open(leaderboard_filepath, "w") as wf:
+        wf.write(leaderboard["lite"] + "\n\n" + leaderboard["all"])
 
 
 def get_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--do_setup", action="store_true")
-    parser.add_argument("--get_blank_details", action="store_true")
-    parser.add_argument("--get_reference_details", action="store_true")
-    parser.add_argument("--keep_previous_eval", action="store_true")
-    parser.add_argument("--analyze_submissions", action="store_true")
+    parser.add_argument(
+        "--do_setup", action="store_true", help="Run commit0 setup with specified split"
+    )
+    parser.add_argument(
+        "--get_blank_details",
+        action="store_true",
+        help="Get difficulty metrics of blank repository",
+    )
+    parser.add_argument(
+        "--get_reference_details",
+        action="store_true",
+        help="Get pytest results from reference",
+    )
+    parser.add_argument(
+        "--analyze_submissions",
+        action="store_true",
+        help="Get pytest results from submissions with split",
+    )
     parser.add_argument("--render_webpages", action="store_true")
-
-    parser.add_argument("--split", type=str, default="lite")
+    parser.add_argument("--split", type=str, help="all or lite")
 
     parser.add_argument(
         "--tokenizer_name", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct"
     )
+    parser.add_argument(
+        "--overwrite_previous_eval",
+        action="store_true",
+        help="Overwrite cached pytest info"
+        # TODO add finer granularity so can specify which ones to overwrite
+    )
 
     return parser.parse_args()
 
@@ -320,14 +388,14 @@ def main(args):
                 f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml"
             )
         branch_name = "blank"
-        if not args.keep_previous_eval:
+        if args.overwrite_previous_eval:
             if os.path.exists(os.path.join(analysis_files_path, branch_name)):
                 shutil.rmtree(os.path.join(analysis_files_path, branch_name))
         os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True)
         tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name)
         for example in dataset:
             repo_name = example["repo"].split("/")[-1]
-            if args.split != "all" and repo_name not in SPLIT[args.split]:
+            if repo_name not in SPLIT[args.split]:
                 continue
 
             repo_metrics_output_file = os.path.join(
@@ -350,73 +418,114 @@ def main(args):
             json.dump(repo_metrics, open(repo_metrics_output_file, "w"), indent=4)
 
     if args.get_reference_details:
+        branch_name = "reference"
+        org_name = f"commit0_{args.split}"
+        commit0_dot_file_path = os.path.join(
+            analysis_files_path, "repos", org_name, branch_name, ".commit0.yaml"
+        )
+        submission_repos_path = os.path.join(
+            analysis_files_path, "repos", org_name, branch_name
+        )
         if args.do_setup:
             os.system(
-                f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos "
-                f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml"
+                f"commit0 setup {args.split} --base-dir {submission_repos_path} "
+                f"--commit0-dot-file-path {commit0_dot_file_path}"
             )
-        branch_name = "reference"
-        os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True)
-        if not args.keep_previous_eval:
-            for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
-                if os.path.exists(os.path.join(repo_log_path, branch_name)):
-                    shutil.rmtree(os.path.join(repo_log_path, branch_name))
-        os.system(
-            "commit0 evaluate --reference "
-            f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml"
+        submission_metrics_output_file = os.path.join(
+            analysis_files_path, org_name, f"{branch_name}.json"
         )
+        submission_details = {
+            "submission_info": {
+                "org_name": org_name,
+                "branch": branch_name,
+                "display_name": "Reference (Gold)",
+                "submission_date": "NA",
+                "split": args.split,
+                "project_page": "https://github.com/commit-0",
+            }
+        }
 
+        os.makedirs(os.path.join(analysis_files_path, org_name), exist_ok=True)
+        need_re_eval = False
+        for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
+            if os.path.exists(os.path.join(repo_log_path, branch_name)):
+                if args.overwrite_previous_eval:
+                    shutil.rmtree(os.path.join(repo_log_path, branch_name))
+            else:
+                need_re_eval = True
+        if args.overwrite_previous_eval or need_re_eval:
+            os.system(
+                "commit0 evaluate --reference "
+                f"--commit0-dot-file-path {commit0_dot_file_path}"
+            )
         # get coverage and pytest info for each repo
         for example in dataset:
             repo_name = example["repo"].split("/")[-1]
-            if args.split != "all" and repo_name not in SPLIT[args.split]:
+            if repo_name not in SPLIT[args.split]:
                 continue
 
-            repo_metrics_output_file = os.path.join(
-                analysis_files_path, branch_name, f"{repo_name}.json"
-            )
-
             path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}"
             pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name)
-            json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4)
+            submission_details[repo_name] = pytest_results
+        json.dump(
+            submission_details, open(submission_metrics_output_file, "w"), indent=4
+        )
+        print(f"Saved pytest info to {submission_metrics_output_file}")
 
     if args.analyze_submissions:
-        commit0_dot_file_path = os.path.join(
-            analysis_files_path, "submission_repos", ".commit0.yaml"
-        )
-        if not args.keep_previous_eval:
-            for subfolder in glob.glob(os.path.join(analysis_files_path, "*")):
-                if os.path.basename(subfolder.rstrip("/")) not in {"blank", "reference", "repos", "submission_repos"}:
-                    try:
-                        print(f"Clearing {subfolder}")
-                        shutil.rmtree(subfolder)
-                    except Exception as e:
-                        print(f"{e}: when removing {subfolder}")
-
-        for submission in submission_dataset:
-            branch_name = submission["name"]
-            os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True)
-            if not args.keep_previous_eval:
-                for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
-                    if os.path.exists(os.path.join(repo_log_path, branch_name)):
-                        shutil.rmtree(os.path.join(repo_log_path, branch_name))
+        for submission in tqdm.tqdm(submission_dataset):
+            submission_details = {"submission_info": submission}
+            branch_name = submission["branch"]
+            org_name = submission["org_name"]
+            split = submission["split"]
+            if split != args.split:
+                continue
+            submission_metrics_output_file = os.path.join(
+                analysis_files_path, org_name, f"{branch_name}.json"
+            )
+            if (
+                os.path.exists(submission_metrics_output_file)
+                and not args.overwrite_previous_eval
+            ):
+                continue
+            submission_repos_path = os.path.join(
+                analysis_files_path, "submission_repos", org_name, branch_name
+            )
+            if os.path.exists(submission_repos_path):
+                shutil.rmtree(submission_repos_path)
+            os.makedirs(os.path.join(analysis_files_path, org_name), exist_ok=True)
+            commit0_dot_file_path = os.path.join(
+                analysis_files_path,
+                "submission_repos",
+                org_name,
+                branch_name,
+                ".commit0.yaml",
+            )
+            for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
+                if os.path.exists(os.path.join(repo_log_path, branch_name)):
+                    shutil.rmtree(os.path.join(repo_log_path, branch_name))
             for example in dataset:
                 repo_name = example["repo"].split("/")[-1]
-                if args.split != "all" and repo_name not in SPLIT[args.split]:
+                if split != "all" and repo_name not in SPLIT[split]:
                     continue
-                clone_url = f"https://github.com/test-save-commit0/{repo_name}.git"
+                clone_url = f"https://github.com/{org_name}/{repo_name}.git"
                 clone_dir = os.path.abspath(
-                    os.path.join(analysis_files_path, "submission_repos", repo_name)
+                    os.path.join(submission_repos_path, repo_name)
                 )
-                clone_repo(clone_url, clone_dir, branch_name, logger)
+                try:
+                    clone_repo(clone_url, clone_dir, branch_name, logger)
+                except Exception as e:
+                    submission_details[repo_name] = f"Error cloning: {e}"
+                    if os.path.exists(clone_dir):
+                        shutil.rmtree(clone_dir)
             # after successfully setup, write the commit0 dot file
             write_commit0_dot_file(
                 commit0_dot_file_path,
                 {
                     "dataset_name": commit0_dataset_name,
                     "dataset_split": "test",
-                    "repo_split": args.split,
-                    "base_dir": os.path.join(analysis_files_path, "submission_repos"),
+                    "repo_split": split,
+                    "base_dir": submission_repos_path,
                 },
             )
             # run pytests
@@ -426,22 +535,21 @@ def main(args):
             )
             for example in dataset:
                 repo_name = example["repo"].split("/")[-1]
-                if args.split != "all" and repo_name not in SPLIT[args.split]:
+                if split != "all" and repo_name not in SPLIT[split]:
+                    continue
+                if repo_name in submission_details:  # Failed to clone earlier, skip.
                     continue
-
-                repo_metrics_output_file = os.path.join(
-                    analysis_files_path, branch_name, f"{repo_name}.json"
-                )
-
                 path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}"
                 pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name)
-                json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4)
+                submission_details[repo_name] = pytest_results
+            json.dump(
+                submission_details, open(submission_metrics_output_file, "w"), indent=4
+            )
+            print(f"Saved pytest info to {submission_metrics_output_file}")
 
-    if not args.keep_previous_eval:
-        for analysis_file in glob.glob("docs/analysis*.md"):
-            os.unlink(analysis_file)
     if args.render_webpages:
-        render_mds()
+        # Render only updated leaderboard and new submissions
+        render_mds(args.overwrite_previous_eval)
 
 
 main(get_args())
diff --git a/docs/update_submissions_dataset.py b/docs/update_submissions_dataset.py
new file mode 100644
index 0000000..134847e
--- /dev/null
+++ b/docs/update_submissions_dataset.py
@@ -0,0 +1,12 @@
+from datasets import Dataset
+
+submissions = {
+    "org_name": ["test-save-commit0", "commit0-lite-with-test", "commit0-lite-plain", "commit0-all-plain"],
+    "branch": ["baseline", "fillin", "fillin", "fillin"],
+    "display_name": ["Claude Sonnet 3.5 - Base", "Claude Sonnet 3.5 - Fill-in + Unit Test Feedback", "Claude Sonnet 3.5 - Fill-in", "Claude Sonnet 3.5 - Fill-in"],
+    "submission_date": ["09/25/2024", "09/25/2024", "09/25/2024", "09/25/2024"],
+    "split": ["lite", "lite", "lite", "all"],
+    "project_page": ["https://github.com/test-save-commit0", "https://github.com/commit0-lite-with-test", "https://github.com/commit0-lite-plain", "https://github.com/commit0-all-plain"]
+}
+
+Dataset.from_dict(submissions).push_to_hub("celinelee/commit0_submissions")
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 5c5f1f9..2a7d83b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,7 +6,7 @@ nav:
   - Commit0: setupdist.md
   - Agent: agent.md
   - API: api.md
-  - Submission Analysis: analysis.md
+  - Leaderboard: analysis.md
 theme:
   name: material
   logo: "logo2.webp"
@@ -19,3 +19,7 @@ markdown_extensions:
   - pymdownx.snippets
   - pymdownx.highlight
   - pymdownx.superfences
+
+extra_javascript:
+  - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js
+  - javascripts/tablesort.js
\ No newline at end of file