From 5057eae8478801a698d892f207f265c6290808a0 Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Wed, 23 Mar 2022 21:03:44 -0600 Subject: [PATCH] adding basics for citelang gen (will be an action) (#9) * adding basics for citelang gen (will be an action) Signed-off-by: vsoch --- .github/workflows/main.yml | 1 + .github/workflows/test-action.yml | 27 ++++++ Dockerfile | 1 + README.md | 4 +- action/gen/action.yml | 39 +++++++++ citelang/client/__init__.py | 13 ++- citelang/client/gen.py | 26 ++++++ citelang/main/client.py | 126 ++++++---------------------- citelang/main/parser.py | 59 +++++++++++-- citelang/tests/test_client.sh | 4 + docs/getting_started/user-guide.rst | 21 +++++ examples/citelang.md | 31 +++++++ 12 files changed, 240 insertions(+), 112 deletions(-) create mode 100644 .github/workflows/test-action.yml create mode 100644 action/gen/action.yml create mode 100644 citelang/client/gen.py create mode 100644 examples/citelang.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d9d2ce4..80b6f8c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -56,6 +56,7 @@ jobs: - name: Run Tests env: + CITELANG_LIBRARIES_KEY: ${{ secrets.CITELANG_LIBRARIES_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | export PATH="/usr/share/miniconda/bin:$PATH" diff --git a/.github/workflows/test-action.yml b/.github/workflows/test-action.yml new file mode 100644 index 0000000..34f10a2 --- /dev/null +++ b/.github/workflows/test-action.yml @@ -0,0 +1,27 @@ +name: test-action + +on: + pull_request: [] + +jobs: + test-action: + name: Test CiteLang + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Install CiteLang + run: pip install -e . + + - name: Test citelang gen + uses: ./action/gen + env: + CITELANG_LIBRARIES_KEY: ${{ secrets.CITELANG_LIBRARIES_KEY }} + with: + package: citelang + manager: pypi + outfile: citelang.md + + - name: View generated file + run: cat citelang.md diff --git a/Dockerfile b/Dockerfile index 7d5221a..7e8bce4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,3 +15,4 @@ RUN pip install ipython WORKDIR /code COPY . /code RUN pip install -e .[all] +ENTRYPOINT ["citelang"] diff --git a/README.md b/README.md index 3195e15..d359a99 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,10 @@ that when you publish your software, you should publish it to an appropriate pac ## TODO + - citelang should be able to parse requirements.txt or go.sum, etc. - add graphic summary - - add GitHub actions + - finish GitHub actions (render, docs, remember to mention to use token, etc.) + - add gha to run here! ## Contributors diff --git a/action/gen/action.yml b/action/gen/action.yml new file mode 100644 index 0000000..66d2366 --- /dev/null +++ b/action/gen/action.yml @@ -0,0 +1,39 @@ +name: "citelang gen action" +description: "Generate a citelang markdown for your package" +inputs: + manager: + description: the name of the package manager + required: true + package: + description: the name of the package + required: true + outfile: + description: the name of the outputfile. + required: true + default: citelang.md + args: + description: additional arguments / parameters for citelang. + required: false + default: "" + +runs: + using: "composite" + steps: + + - name: Install CiteLang + shell: bash + run: | + which citelang + retval=$? + if [[ "${retval}" != "0" ]]; then + pip install citelang + fi + + - name: Generate CiteLang + env: + manager: ${{ inputs.manager }} + package: ${{ inputs.package }} + outfile: ${{ inputs.outfile }} + args: ${{ inputs.args }} + run: citelang gen ${manager} ${package} --outfile ${outfile} ${args} + shell: bash diff --git a/citelang/client/__init__.py b/citelang/client/__init__.py index 507c6b0..f8c622b 100644 --- a/citelang/client/__init__.py +++ b/citelang/client/__init__.py @@ -106,16 +106,23 @@ def get_parser(): credit = subparsers.add_parser( "credit", description="calculate dependency credit for a package." ) + gen = subparsers.add_parser( + "gen", description="Generate a credit markdown file for a package of choice." + ) + gen.add_argument("--outfile", "-o", help="Save to an output markdown file.") + render = subparsers.add_parser( "render", description="render a credit tree into your markdown." ) render.add_argument("filename", help="Markdown file to render software table into.") render.add_argument("--outfile", "-o", help="Save to an output json file.") - for command in [pkg, deps, graph, credit, badge]: + for command in [pkg, deps, graph, credit, badge, gen]: command.add_argument( "package", help="package manager and name to parse", nargs=2 ) + + for command in [pkg, deps, graph, credit, badge]: command.add_argument( "--json", dest="json", @@ -125,7 +132,7 @@ def get_parser(): ) command.add_argument("--outfile", "-o", help="Save to an output json file.") - for command in [graph, credit, badge, render]: + for command in [graph, credit, badge, render, gen]: command.add_argument( "--max-depth", help="maximum depth to parse tree (default is unset)" ) @@ -243,6 +250,8 @@ def help(return_code=0): from .credit import main elif args.command == "deps": from .deps import main + elif args.command == "gen": + from .gen import main elif args.command == "graph": from .graph import main elif args.command == "render": diff --git a/citelang/client/gen.py b/citelang/client/gen.py new file mode 100644 index 0000000..bc20498 --- /dev/null +++ b/citelang/client/gen.py @@ -0,0 +1,26 @@ +__author__ = "Vanessa Sochat" +__copyright__ = "Copyright 2022, Vanessa Sochat" +__license__ = "MPL 2.0" + +from citelang.main import Client +import citelang.utils as utils + + +def main(args, parser, extra, subparser): + + cli = Client(quiet=args.quiet, settings_file=args.settings_file) + result = cli.gen( + name=args.package[1], + manager=args.package[0], + use_cache=not args.no_cache, + max_depth=args.max_depth, + max_deps=args.max_depth, + min_credit=args.min_credit, + credit_split=args.credit_split, + ) + + if args.outfile: + utils.write_file(result.render(), args.outfile) + print("Saved to %s" % args.outfile) + else: + print(result.render()) diff --git a/citelang/main/client.py b/citelang/main/client.py index 3a753ac..6f5b7fa 100644 --- a/citelang/main/client.py +++ b/citelang/main/client.py @@ -24,24 +24,28 @@ def dependencies(self, manager, name, use_cache=True): pkg = package.Package(manager, name, client=self, use_cache=use_cache) return pkg.dependencies() - def render( - self, - filename, - use_cache=True, - max_depth=None, - max_deps=None, - min_credit=0.01, - credit_split=0.5, - ): + def gen(self, name, manager, *args, **kwargs): + """ + Generate a one off credit table for a named library + """ + # Generate a root for our graph + root = self._graph(name=name, manager=manager, **kwargs) + + # Prepare a parser that can generate a table + p = parser.Parser() + p.add_lib(name=name, manager=manager) + p.prepare_table({"%s:%s" % (manager, name): root}) + return p + + def render(self, filename, **kwargs): """ Given a filename with references, render the citelang table. """ p = parser.Parser(filename) - libs = p.parse() + p.parse() - # Derive each as a package credit tree roots = {} - for lib in libs: + for lib in p.libs: if "name" not in lib or "manager" not in lib: logger.warning("Skipping %s, missing name or manager." % lib) uid = "%s:%s" % (lib["manager"], lib["name"]) @@ -49,36 +53,9 @@ def render( version = lib.get("version") or lib.get("release") uid = "%s@%s" % (uid, version) lib["name"] = "%s@%s" % (lib["name"], version) - roots[uid] = self._graph( - manager=lib["manager"], - name=lib["name"], - use_cache=use_cache, - max_depth=max_depth, - max_deps=max_deps, - min_credit=min_credit, - credit_split=credit_split, - ) - - # Generate the table with multiple roots - flatten out credit - table = {} - - # Multiplier for credit depending on total packages - splitby = 1 / len(roots) - for lib, root in roots.items(): - manager = lib.split(":")[0] - for node in root.iternodes(): - if manager not in table: - table[manager] = {} - if node.name not in table[manager]: - table[manager][node.name] = { - "credit": 0, - "url": node.obj.homepage, - } - table[manager][node.name]["credit"] += node.credit * splitby - - # Add listing of packages and dependencies to parser - p.data = table - p.round_by = root.round_by + roots[uid] = self._graph(manager=lib["manager"], name=lib["name"], **kwargs) + + p.prepare_table(roots) return p def package_managers(self, use_cache=True): @@ -114,17 +91,7 @@ def check_manager(self, name, use_cache=True): managers = "\n".join(managers) logger.exit(f"{name} is not a known manager. Choices are:\n{managers}") - def graph( - self, - manager, - name, - use_cache=True, - max_depth=None, - max_deps=None, - min_credit=0.01, - credit_split=0.5, - fmt=None, - ): + def graph(self, fmt=None, *args, **kwargs): """ Generate a graph for a package. @@ -132,40 +99,14 @@ def graph( the main package gets 80%, and dependencies split 20%. We go up until the min credit 0.05 at which case we stop adding. """ - root = self._graph( - manager=manager, - name=name, - use_cache=use_cache, - max_depth=max_depth, - max_deps=max_deps, - min_credit=min_credit, - credit_split=credit_split, - ) + root = self._graph(*args, **kwargs) return results.Graph(root).graph(fmt=fmt) - def badge( - self, - manager, - name, - use_cache=True, - max_depth=None, - max_deps=None, - min_credit=0.01, - credit_split=0.5, - fmt=None, - ): + def badge(self, *args, **kwargs): """ Generate a badge for a package """ - root = self._graph( - manager=manager, - name=name, - use_cache=use_cache, - max_depth=max_depth, - max_deps=max_deps, - min_credit=min_credit, - credit_split=credit_split, - ) + root = self._graph(*args, **kwargs) return results.Badge(root) def _graph( @@ -294,28 +235,11 @@ def _graph( root.children_names = list(node_names) return root - def credit( - self, - manager, - name, - use_cache=True, - max_depth=None, - max_deps=None, - min_credit=0.05, - credit_split=0.5, - ): + def credit(self, *args, **kwargs): """ Get the credit root node, then do additional graph parsing. """ - root = self._graph( - manager=manager, - name=name, - use_cache=use_cache, - max_depth=max_depth, - max_deps=max_deps, - min_credit=min_credit, - credit_split=credit_split, - ) + root = self._graph(*args, **kwargs) return results.Tree(root) def package(self, manager, name, use_cache=True): diff --git a/citelang/main/parser.py b/citelang/main/parser.py index 2f922b3..e396d07 100644 --- a/citelang/main/parser.py +++ b/citelang/main/parser.py @@ -16,6 +16,13 @@ template_suffix = "\n> Note that credit values are rounded and expanded (so shared dependencies are represented as one record) and may not add to 1.0. Rounded values that hit zero are removed.\n" citation_regex = "\@([a-zA-Z0-9]+)\{(.*?)\}" +empty_content = """# Software Credit + + + + +- Generated by [CiteLang](https://github.com/vsoch/citelang) +""" class Parser: @@ -24,13 +31,19 @@ class Parser: a software graph per the user preferences. """ - def __init__(self, filename: str): - self.filename = os.path.abspath(filename) + def __init__(self, filename: str = None): self.data = {} + self.libs = [] self.round_by = 3 - if not os.path.exists(self.filename): - logger.exit(f"{filename} does not exist") - self.content = utils.read_file(self.filename) + self.filename = None + if filename: + self.filename = os.path.abspath(filename) + if not os.path.exists(self.filename): + logger.exit(f"{filename} does not exist") + + self.content = utils.read_file(self.filename) + else: + self.content = empty_content @property def start_block(self): @@ -60,7 +73,6 @@ def parse(self): Given a markdown file, return a list of parse packages and versions. """ self.check() - libs = [] for match in re.findall(citation_regex, self.content): if len(match) != 2: logger.warning( @@ -72,8 +84,39 @@ def parse(self): for y in [x.strip().split("=") for x in match[-1].split(",")] if len(y) == 2 } - libs.append({"manager": match[0], **args}) - return libs + self.libs.append({"manager": match[0], **args}) + + def prepare_table(self, roots): + """ + Given a set of roots, prepare table data. + """ + # Generate the table with multiple roots - flatten out credit + table = {} + + # Multiplier for credit depending on total packages + splitby = 1 / len(roots) + for lib, root in roots.items(): + manager = lib.split(":")[0] + for node in root.iternodes(): + if manager not in table: + table[manager] = {} + if node.name not in table[manager]: + table[manager][node.name] = { + "credit": 0, + "url": node.obj.homepage, + } + table[manager][node.name]["credit"] += node.credit * splitby + + # Add listing of packages and dependencies to parser + self.data = table + self.round_by = root.round_by + return table + + def add_lib(self, manager, name, **args): + """ + Manually add a library (e.g., not reading from a pre-existing file) + """ + self.libs.append({"manager": manager, "name": name, **args}) def render(self): """ diff --git a/citelang/tests/test_client.sh b/citelang/tests/test_client.sh index 0a617a0..779c7c0 100755 --- a/citelang/tests/test_client.sh +++ b/citelang/tests/test_client.sh @@ -81,6 +81,10 @@ runTest 0 $output citelang --settings-file $settings render ../../examples/paper runTest 0 $output citelang --settings-file $settings render ../../examples/paper.md --outfile paper.md cat paper.md +echo +echo "#### Testing gen " +runTest 0 $output citelang --settings-file $settings gen pypi requests + echo echo "#### Testing cache " runTest 0 $output citelang --settings-file $settings cache --help diff --git a/docs/getting_started/user-guide.rst b/docs/getting_started/user-guide.rst index a7cfaa8..0740da3 100644 --- a/docs/getting_started/user-guide.rst +++ b/docs/getting_started/user-guide.rst @@ -496,9 +496,30 @@ This will print the result to the screen! To save to output file (overwrite the $ citelang render examples/paper.md --outfile examples/paper-render.md You can see an `example rendering here `_. + +Gen (generate) +============== + +If you just want to generate a markdown file for a piece of software, you can do: + + +.. code-block:: console + + $ citelang gen pypi requests + +And of course save to an output file: + +.. code-block:: console + + $ citelang gen pypi requests --outfile examples/citelang.md + + +And akin to credit or graph, you can change the credit threshold to introduce more dependencies. +You can see an `example rendering here `_. We are thinking about also generating a graphic to embed somewhere, and associated actions for both. Let us know if you have ideas! + ****** Python ****** diff --git a/examples/citelang.md b/examples/citelang.md new file mode 100644 index 0000000..274bb84 --- /dev/null +++ b/examples/citelang.md @@ -0,0 +1,31 @@ +# Software Credit + + +|Manager|Name|Credit| +|-------|----|------| +|pypi|[requests](https://requests.readthedocs.io)|0.5| +|pypi|[PySocks](https://github.com/Anorov/PySocks)|0.075| +|pypi|[idna](https://github.com/kjd/idna)|0.075| +|pypi|[certifi](https://certifiio.readthedocs.io/en/latest/)|0.075| +|pypi|[win-inet-pton](https://github.com/hickeroar/win_inet_pton)|0.071| +|pypi|[chardet](https://github.com/chardet/chardet)|0.071| +|pypi|[charset-normalizer](https://github.com/ousret/charset_normalizer)|0.036| +|pypi|[urllib3](https://urllib3.readthedocs.io/)|0.036| +|pypi|[unicodedata2](http://github.com/mikekap/unicodedata2)|0.018| +|pypi|[pytest-randomly](https://pypi.org/project/pytest-randomly)|0.004| +|pypi|[pytest-xdist](https://pypi.org/project/pytest-xdist)|0.004| +|pypi|[coverage](https://pypi.org/project/coverage)|0.004| +|pypi|[pytest](https://pypi.org/project/pytest)|0.004| +|pypi|[ipaddress](https://github.com/phihag/ipaddress)|0.004| +|pypi|[cryptography](https://github.com/pyca/cryptography)|0.004| +|pypi|[pyOpenSSL](https://pyopenssl.org/)|0.004| +|pypi|[brotlipy](https://pypi.org/project/brotlipy)|0.004| +|pypi|[brotli](https://pypi.org/project/brotli)|0.004| +|pypi|[brotlicffi](https://pypi.org/project/brotlicffi)|0.004| + + +> Note that credit values are rounded and expanded (so shared dependencies are represented as one record) and may not add to 1.0. Rounded values that hit zero are removed. + + + +- Generated by [CiteLang](https://github.com/vsoch/citelang)