Skip to content

Commit

Permalink
Merge pull request #111 from ExpandingMan/overhaul
Browse files Browse the repository at this point in the history
massive overhaul
  • Loading branch information
aviks authored Oct 19, 2022
2 parents f9793f3 + 9acf4b0 commit d75c3e7
Show file tree
Hide file tree
Showing 47 changed files with 2,491 additions and 1,361 deletions.
37 changes: 33 additions & 4 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,43 @@ on:
schedule:
- cron: 19 22 * * *
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
- name: Check if Julia is already available in the PATH
id: julia_in_path
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ runner.arch }}
if: steps.julia_in_path.outcome != 'success'
- name: "Add the General registry via Git"
run: |
import Pkg
ENV["JULIA_PKG_SERVER"] = ""
Pkg.Registry.add("General")
shell: julia --color=yes {0}
- name: "Install CompatHelper"
run: |
import Pkg
name = "CompatHelper"
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
version = "3"
Pkg.add(; name, uuid, version)
shell: julia --color=yes {0}
- name: "Run CompatHelper"
run: |
import CompatHelper
CompatHelper.main()
shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
15 changes: 15 additions & 0 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: TagBot
on:
issue_comment:
types:
- created
workflow_dispatch:
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
50 changes: 28 additions & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,42 +1,48 @@
name: CI
on:
push:
branches:
- master
tags: '*'
pull_request:
push:
branches: [main]
tags: ['*']
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.version == 'nightly' }}
strategy:
fail-fast: false
matrix:
version:
- '1.6'
- '1'
- 'nightly'
- '1' # automatically expands to the latest stable 1.x release of Julia
os:
- ubuntu-latest
- macOS-latest
- windows-latest
arch:
- x64
exclude:
# Remove some configurations from the build matrix to reduce CI time.
# See https://github.com/marketplace/actions/setup-julia-environment
# Don't test on all versions
- {os: 'macOS-latest', version: '1.6'}
- {os: 'macOS-latest', version: 'nightly'}
- {os: 'windows-latest', version: '1.6'}
- {os: 'windows-latest', version: 'nightly'}
include:
- os: windows-latest
version: '1'
arch: x64
steps:
- uses: actions/checkout@v1
- uses: julia-actions/setup-julia@latest
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-runtest@latest
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
env:
JULIA_NUM_THREADS: 4
with:
coverage: false
docs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-docdeploy@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ deps.jl
deps
test/*.buffer
Manifest.toml
docs/build
15 changes: 9 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
name = "XGBoost"
uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
version = "1.5.2"
version = "2.0.0"

[deps]
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Term = "22787eb5-b846-44ae-b979-8e399b8463ab"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
XGBoost_jll = "a5c6f535-4255-5ca2-a466-0e519f119c46"

Expand All @@ -17,8 +21,7 @@ julia = "1.6"

[extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["SparseArrays", "Random", "Test"]
test = ["Random", "Test"]
78 changes: 8 additions & 70 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
XGBoost.jl
==========
# XGBoost.jl

[![Build Status](https://github.com/dmlc/XGBoost.jl/workflows/CI/badge.svg)](https://github.com/dmlc/XGBoost.jl/actions)
[![Latest Version](https://juliahub.com/docs/XGBoost/version.svg)](https://juliahub.com/ui/Packages/XGBoost/rSeEh/)
[![Pkg Eval](https://juliahub.com/docs/XGBoost/pkgeval.svg)](https://juliahub.com/ui/Packages/XGBoost/rSeEh/)
[![Dependents](https://juliahub.com/docs/XGBoost/deps.svg)](https://juliahub.com/ui/Packages/XGBoost/rSeEh/?t=2)
[![docs](https://img.shields.io/badge/docs-latest-blue?style=for-the-badge&logo=julia)](https://dmlc.github.io/XGBoost.jl/)

eXtreme Gradient Boosting in Julia
eXtreme Gradient Boosting in Julia.

## Abstract
This package is a Julia interface of [XGBoost](https://github.com/dmlc/xgboost).
Expand All @@ -17,76 +17,14 @@ gradient boosting packages. It supports various objective functions, including r
classification and ranking. The package is also made to be extensible, so that users are also
allowed to define their own objectives easily.

## Features
* Sparse feature format, it allows easy handling of missing values, and improve computation
efficiency.
* Advanced features, such as customized loss function, cross validation, see [demo folder](demo)
for walkthrough examples.
See the [documentation](https://dmlc.github.io/XGBoost.jl/) for more information.

## Installation
```julia
] add XGBoost
```
or
```julia
] develop "https://github.com/dmlc/XGBoost.jl.git"
] build XGBoost
```

By default, the package installs prebuilt binaries for XGBoost `v0.82.0` on Linux, MacOS and Windows. Only the linux version is built with OpenMP.


## Minimal examples

To show how XGBoost works, here is an example of dataset Mushroom

- Prepare Data

XGBoost support Julia ```Array```, ```SparseMatrixCSC```, libSVM format text and XGBoost binary
file as input. Here is an example of Mushroom classification. This example will use the function
```readlibsvm``` in [basic_walkthrough.jl](demo/basic_walkthrough.jl#L5). This function load libsvm
format text into Julia dense matrix.

```julia
using XGBoost

train_X, train_Y = readlibsvm("data/agaricus.txt.train", (6513, 126))
test_X, test_Y = readlibsvm("data/agaricus.txt.test", (1611, 126))

```

- Fit Model
```julia
num_round = 2
bst = xgboost(train_X, num_round, label = train_Y, eta = 1, max_depth = 2)
```

## Predict
```julia
pred = predict(bst, test_X)
print("test-error=", sum((pred .> 0.5) .!= test_Y) / float(size(pred)[1]), "\n")
```

## Cross-Validation
```julia
nfold = 5
param = ["max_depth" => 2,
"eta" => 1,
"objective" => "binary:logistic"]
metrics = ["auc"]
nfold_cv(train_X, num_round, nfold, label = train_Y, param = param, metrics = metrics)
```

## Feature Walkthrough
Check [demo](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/)

- [Basic walkthrough of features](demo/basic_walkthrough.jl)
- [Customize loss function, and evaluation metric](demo/custom_objective.jl)
- [Boosting from existing prediction](demo/boost_from_prediction.jl)
- [Predicting using first n trees](demo/predict_first_ntree.jl)
- [Generalized Linear Model](demo/generalized_linear_model.jl)
- [Cross validation](demo/cross_validation.jl)

This package uses [`xgboost_jll`](https://github.com/JuliaBinaryWrappers/XGBoost_jll.jl) to package
the `xgboost` binaries (will be installed automatically).

## Model Parameter Setting
Check [XGBoost Documentation](https://xgboost.readthedocs.io/en/latest/parameter.html)
## Preview
![](/assets/xgboost_demo_1.jpg)
3 changes: 3 additions & 0 deletions assets/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Test Data
This folder contains processed example dataset used for testing.
Copyright of the dataset belongs to the original copyright holder.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added assets/xgboost_demo_1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 0 additions & 2 deletions data/README.md

This file was deleted.

14 changes: 0 additions & 14 deletions demo/README.md

This file was deleted.

106 changes: 0 additions & 106 deletions demo/basic_walkthrough.jl

This file was deleted.

Loading

0 comments on commit d75c3e7

Please sign in to comment.