Skip to content

Commit

Permalink
feat: built-in content blocking based on IPIP-383 (#10161)
Browse files Browse the repository at this point in the history
Fixes #8492

This introduces "nopfs" as a preloaded plugin into Kubo
with support for denylists from ipfs/specs#383

It automatically makes Kubo watch *.deny files found in:

- /etc/ipfs/denylists
- $XDG_CONFIG_HOME/ipfs/denylists
- $IPFS_PATH/denylists

* test: Gateway.NoFetch and GatewayOverLibp2p

adds missing tests for "no fetch" gateways one can expose,
in both cases the offline mode is done by passing custom
blockservice/exchange into path resolver, which means
global path resolver that has nopfs intercept is not used,
and the content blocking does not happen on these gateways.

* fix: use offline path resolvers where appropriate

this fixes the problem described in
#10161 (comment)
by adding explicit offline path resolvers that are backed
by offline exchange, and using them in NoFetch gateways
instead of the default online ones

---------

Co-authored-by: Henrique Dias <hacdias@gmail.com>
Co-authored-by: Marcin Rataj <lidel@lidel.org>
  • Loading branch information
3 people authored Oct 28, 2023
1 parent 4f303d3 commit a0f34b1
Show file tree
Hide file tree
Showing 19 changed files with 596 additions and 65 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Featureset
- [HTTP Kubo RPC API](https://docs.ipfs.tech/reference/kubo/rpc/) (`/api/v0`) to access and control the daemon
- [Command Line Interface](https://docs.ipfs.tech/reference/kubo/cli/) based on (`/api/v0`) RPC API
- [WebUI](https://github.com/ipfs/ipfs-webui/#readme) to manage the Kubo node
- [Content blocking](/docs/content-blocking.md) support for operators of public nodes

### Other implementations

Expand Down
16 changes: 11 additions & 5 deletions core/commands/dag/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,32 @@ import (
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
"github.com/ipfs/kubo/core/commands/cmdenv"
"github.com/ipfs/kubo/core/commands/cmdutils"

cmds "github.com/ipfs/go-ipfs-cmds"
gocar "github.com/ipld/go-car"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
)

func dagExport(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error {
c, err := cid.Decode(req.Arguments[0])
// Accept CID or a content path
p, err := cmdutils.PathOrCidPath(req.Arguments[0])
if err != nil {
return fmt.Errorf(
"unable to parse root specification (currently only bare CIDs are supported): %s",
err,
)
return err
}

api, err := cmdenv.GetApi(env, req)
if err != nil {
return err
}

// Resolve path and confirm the root block is available, fail fast if not
b, err := api.Block().Stat(req.Context, p)
if err != nil {
return err
}
c := b.Path().RootCid()

pipeR, pipeW := io.Pipe()

errCh := make(chan error, 2) // we only report the 1st error
Expand Down
58 changes: 31 additions & 27 deletions core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,35 +76,39 @@ type IpfsNode struct {
PNetFingerprint libp2p.PNetFingerprint `optional:"true"` // fingerprint of private network

// Services
Peerstore pstore.Peerstore `optional:"true"` // storage for other Peer instances
Blockstore bstore.GCBlockstore // the block store (lower level)
Filestore *filestore.Filestore `optional:"true"` // the filestore blockstore
BaseBlocks node.BaseBlocks // the raw blockstore, no filestore wrapping
GCLocker bstore.GCLocker // the locker used to protect the blockstore during gc
Blocks bserv.BlockService // the block service, get/add blocks.
DAG ipld.DAGService // the merkle dag service, get/add objects.
IPLDFetcherFactory fetcher.Factory `name:"ipldFetcher"` // fetcher that paths over the IPLD data model
UnixFSFetcherFactory fetcher.Factory `name:"unixfsFetcher"` // fetcher that interprets UnixFS data
Reporter *metrics.BandwidthCounter `optional:"true"`
Discovery mdns.Service `optional:"true"`
FilesRoot *mfs.Root
RecordValidator record.Validator
Peerstore pstore.Peerstore `optional:"true"` // storage for other Peer instances
Blockstore bstore.GCBlockstore // the block store (lower level)
Filestore *filestore.Filestore `optional:"true"` // the filestore blockstore
BaseBlocks node.BaseBlocks // the raw blockstore, no filestore wrapping
GCLocker bstore.GCLocker // the locker used to protect the blockstore during gc
Blocks bserv.BlockService // the block service, get/add blocks.
DAG ipld.DAGService // the merkle dag service, get/add objects.
IPLDFetcherFactory fetcher.Factory `name:"ipldFetcher"` // fetcher that paths over the IPLD data model
UnixFSFetcherFactory fetcher.Factory `name:"unixfsFetcher"` // fetcher that interprets UnixFS data
OfflineIPLDFetcherFactory fetcher.Factory `name:"offlineIpldFetcher"` // fetcher that paths over the IPLD data model without fetching new blocks
OfflineUnixFSFetcherFactory fetcher.Factory `name:"offlineUnixfsFetcher"` // fetcher that interprets UnixFS data without fetching new blocks
Reporter *metrics.BandwidthCounter `optional:"true"`
Discovery mdns.Service `optional:"true"`
FilesRoot *mfs.Root
RecordValidator record.Validator

// Online
PeerHost p2phost.Host `optional:"true"` // the network host (server+client)
Peering *peering.PeeringService `optional:"true"`
Filters *ma.Filters `optional:"true"`
Bootstrapper io.Closer `optional:"true"` // the periodic bootstrapper
Routing irouting.ProvideManyRouter `optional:"true"` // the routing system. recommend ipfs-dht
DNSResolver *madns.Resolver // the DNS resolver
IPLDPathResolver pathresolver.Resolver `name:"ipldPathResolver"` // The IPLD path resolver
UnixFSPathResolver pathresolver.Resolver `name:"unixFSPathResolver"` // The UnixFS path resolver
Exchange exchange.Interface // the block exchange + strategy (bitswap)
Namesys namesys.NameSystem // the name system, resolves paths to hashes
Provider provider.System // the value provider system
IpnsRepub *ipnsrp.Republisher `optional:"true"`
GraphExchange graphsync.GraphExchange `optional:"true"`
ResourceManager network.ResourceManager `optional:"true"`
PeerHost p2phost.Host `optional:"true"` // the network host (server+client)
Peering *peering.PeeringService `optional:"true"`
Filters *ma.Filters `optional:"true"`
Bootstrapper io.Closer `optional:"true"` // the periodic bootstrapper
Routing irouting.ProvideManyRouter `optional:"true"` // the routing system. recommend ipfs-dht
DNSResolver *madns.Resolver // the DNS resolver
IPLDPathResolver pathresolver.Resolver `name:"ipldPathResolver"` // The IPLD path resolver
UnixFSPathResolver pathresolver.Resolver `name:"unixFSPathResolver"` // The UnixFS path resolver
OfflineIPLDPathResolver pathresolver.Resolver `name:"offlineIpldPathResolver"` // The IPLD path resolver that uses only locally available blocks
OfflineUnixFSPathResolver pathresolver.Resolver `name:"offlineUnixFSPathResolver"` // The UnixFS path resolver that uses only locally available blocks
Exchange exchange.Interface // the block exchange + strategy (bitswap)
Namesys namesys.NameSystem // the name system, resolves paths to hashes
Provider provider.System // the value provider system
IpnsRepub *ipnsrp.Republisher `optional:"true"`
GraphExchange graphsync.GraphExchange `optional:"true"`
ResourceManager network.ResourceManager `optional:"true"`

PubSub *pubsub.PubSub `optional:"true"`
PSRouter *psrouter.PubsubValueStore `optional:"true"`
Expand Down
18 changes: 16 additions & 2 deletions core/corehttp/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ func Libp2pGatewayOption() ServeOption {
return func(n *core.IpfsNode, _ net.Listener, mux *http.ServeMux) (*http.ServeMux, error) {
bserv := blockservice.New(n.Blocks.Blockstore(), offline.Exchange(n.Blocks.Blockstore()))

backend, err := gateway.NewBlocksBackend(bserv)
backend, err := gateway.NewBlocksBackend(bserv,
// GatewayOverLibp2p only returns things that are in local blockstore
// (same as Gateway.NoFetch=true), we have to pass offline path resolver
gateway.WithResolver(n.OfflineUnixFSPathResolver),
)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -111,6 +115,8 @@ func newGatewayBackend(n *core.IpfsNode) (gateway.IPFSBackend, error) {
bserv := n.Blocks
var vsRouting routing.ValueStore = n.Routing
nsys := n.Namesys
pathResolver := n.UnixFSPathResolver

if cfg.Gateway.NoFetch {
bserv = blockservice.New(bserv.Blockstore(), offline.Exchange(bserv.Blockstore()))

Expand All @@ -130,9 +136,17 @@ func newGatewayBackend(n *core.IpfsNode) (gateway.IPFSBackend, error) {
if err != nil {
return nil, fmt.Errorf("error constructing namesys: %w", err)
}

// Gateway.NoFetch=true requires offline path resolver
// to avoid fetching missing blocks during path traversal
pathResolver = n.OfflineUnixFSPathResolver
}

backend, err := gateway.NewBlocksBackend(bserv, gateway.WithValueStore(vsRouting), gateway.WithNameSystem(nsys))
backend, err := gateway.NewBlocksBackend(bserv,
gateway.WithValueStore(vsRouting),
gateway.WithNameSystem(nsys),
gateway.WithResolver(pathResolver),
)
if err != nil {
return nil, err
}
Expand Down
51 changes: 32 additions & 19 deletions core/node/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/ipfs/boxo/blockservice"
blockstore "github.com/ipfs/boxo/blockstore"
exchange "github.com/ipfs/boxo/exchange"
offline "github.com/ipfs/boxo/exchange/offline"
"github.com/ipfs/boxo/fetcher"
bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice"
"github.com/ipfs/boxo/filestore"
Expand All @@ -21,9 +22,6 @@ import (
format "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-unixfsnode"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/schema"
"go.uber.org/fx"

"github.com/ipfs/kubo/core/node/helpers"
Expand Down Expand Up @@ -87,43 +85,58 @@ func (s *syncDagService) Session(ctx context.Context) format.NodeGetter {
// FetchersOut allows injection of fetchers.
type FetchersOut struct {
fx.Out
IPLDFetcher fetcher.Factory `name:"ipldFetcher"`
UnixfsFetcher fetcher.Factory `name:"unixfsFetcher"`
IPLDFetcher fetcher.Factory `name:"ipldFetcher"`
UnixfsFetcher fetcher.Factory `name:"unixfsFetcher"`
OfflineIPLDFetcher fetcher.Factory `name:"offlineIpldFetcher"`
OfflineUnixfsFetcher fetcher.Factory `name:"offlineUnixfsFetcher"`
}

// FetchersIn allows using fetchers for other dependencies.
type FetchersIn struct {
fx.In
IPLDFetcher fetcher.Factory `name:"ipldFetcher"`
UnixfsFetcher fetcher.Factory `name:"unixfsFetcher"`
IPLDFetcher fetcher.Factory `name:"ipldFetcher"`
UnixfsFetcher fetcher.Factory `name:"unixfsFetcher"`
OfflineIPLDFetcher fetcher.Factory `name:"offlineIpldFetcher"`
OfflineUnixfsFetcher fetcher.Factory `name:"offlineUnixfsFetcher"`
}

// FetcherConfig returns a fetcher config that can build new fetcher instances
func FetcherConfig(bs blockservice.BlockService) FetchersOut {
ipldFetcher := bsfetcher.NewFetcherConfig(bs)
ipldFetcher.PrototypeChooser = dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) {
if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok {
return tlnkNd.LinkTargetNodePrototype(), nil
}
return basicnode.Prototype.Any, nil
})

ipldFetcher.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser)
unixFSFetcher := ipldFetcher.WithReifier(unixfsnode.Reify)
return FetchersOut{IPLDFetcher: ipldFetcher, UnixfsFetcher: unixFSFetcher}

// Construct offline versions which we can safely use in contexts where
// path resolution should not fetch new blocks via exchange.
offlineBs := blockservice.New(bs.Blockstore(), offline.Exchange(bs.Blockstore()))
offlineIpldFetcher := bsfetcher.NewFetcherConfig(offlineBs)
offlineIpldFetcher.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser)
offlineUnixFSFetcher := offlineIpldFetcher.WithReifier(unixfsnode.Reify)

return FetchersOut{
IPLDFetcher: ipldFetcher,
UnixfsFetcher: unixFSFetcher,
OfflineIPLDFetcher: offlineIpldFetcher,
OfflineUnixfsFetcher: offlineUnixFSFetcher,
}
}

// PathResolversOut allows injection of path resolvers
type PathResolversOut struct {
fx.Out
IPLDPathResolver pathresolver.Resolver `name:"ipldPathResolver"`
UnixFSPathResolver pathresolver.Resolver `name:"unixFSPathResolver"`
IPLDPathResolver pathresolver.Resolver `name:"ipldPathResolver"`
UnixFSPathResolver pathresolver.Resolver `name:"unixFSPathResolver"`
OfflineIPLDPathResolver pathresolver.Resolver `name:"offlineIpldPathResolver"`
OfflineUnixFSPathResolver pathresolver.Resolver `name:"offlineUnixFSPathResolver"`
}

// PathResolverConfig creates path resolvers with the given fetchers.
func PathResolverConfig(fetchers FetchersIn) PathResolversOut {
return PathResolversOut{
IPLDPathResolver: pathresolver.NewBasicResolver(fetchers.IPLDFetcher),
UnixFSPathResolver: pathresolver.NewBasicResolver(fetchers.UnixfsFetcher),
IPLDPathResolver: pathresolver.NewBasicResolver(fetchers.IPLDFetcher),
UnixFSPathResolver: pathresolver.NewBasicResolver(fetchers.UnixfsFetcher),
OfflineIPLDPathResolver: pathresolver.NewBasicResolver(fetchers.OfflineIPLDFetcher),
OfflineUnixFSPathResolver: pathresolver.NewBasicResolver(fetchers.OfflineUnixfsFetcher),
}
}

Expand Down
9 changes: 9 additions & 0 deletions docs/changelogs/v0.24.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- [Overview](#overview)
- [🔦 Highlights](#-highlights)
- [Support for content blocking](#support-for-content-blocking)
- [Gateway: the root of the CARs are no longer meaningful](#gateway-the-root-of-the-cars-are-no-longer-meaningful)
- [IPNS: improved publishing defaults](#ipns-improved-publishing-defaults)
- [IPNS: record TTL is used for caching](#ipns-record-ttl-is-used-for-caching)
Expand All @@ -16,6 +17,14 @@

### 🔦 Highlights

#### Support for content blocking

This Kubo release ships with built-in content-blocking subsystem [announced earlier this year](https://blog.ipfs.tech/2023-content-blocking-for-the-ipfs-stack/).
Content blocking is an opt-in decision made by the operator of `ipfs daemon`.
The official build does not ship with any denylists.

Learn more at [`/docs/content-blocking.md`](https://github.com/ipfs/kubo/blob/master/docs/content-blocking.md)

#### Gateway: the root of the CARs are no longer meaningful

When requesting a CAR from the gateway, the root of the CAR might no longer be
Expand Down
73 changes: 73 additions & 0 deletions docs/content-blocking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<h1 align="center">
<br>
<a href="#readme"><img src="https://github.com/ipfs-shipyard/nopfs/blob/41484a818e6542314f784da852fc41b76f2d48a6/logo.png?raw=true" alt="content blocking logo" title="content blocking in Kubo" width="200"></a>
<br>
Content Blocking in Kubo
<br>
</h1>

Kubo ships with built-in support for denylist format from [IPIP-383](https://github.com/ipfs/specs/pull/383).

## Default behavior

Official Kubo build does not ship with any denylists enabled by default.

Content blocking is an opt-in decision made by the operator of `ipfs daemon`.

## How to enable blocking

Place a `*.deny` file in one of directories:

- `$IPFS_PATH/denylists/` (`$HOME/.ipfs/denylists/` if `IPFS_PATH` is not set)
- `$XDG_CONFIG_HOME/ipfs/denylists/` (`$HOME/.config/ipfs/denylists/` if `XDG_CONFIG_HOME` is not set)
- `/etc/ipfs/denylists/` (global)

Files need to be present before starting the `ipfs daemon` in order to be watched for updates.

If a new denylist file is added, `ipfs daemon` needs to be restarted.

CLI and Gateway users will receive errors in response to request impacted by a blocklist:

```
Error: /ipfs/QmQvjk82hPkSaZsyJ8vNER5cmzKW7HyGX5XVusK7EAenCN is blocked and cannot be provided
```

End user is not informed about the exact reason, see [How to
debug](#how-to-debug) if you need to find out which line of which denylist
caused the request to be blocked.

## Denylist file format

[NOpfs](https://github.com/ipfs-shipyard/nopfs) supports the format from [IPIP-383](https://github.com/ipfs/specs/pull/383).

Clear-text rules are simple: just put content paths to block, one per line.
Paths with unicode and whitespace need to be percend-encoded:

```
/ipfs/QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR
/ipfs/bafybeihfg3d7rdltd43u3tfvncx7n5loqofbsobojcadtmokrljfthuc7y/927%20-%20Standards/927%20-%20Standards.png
```

Sensitive content paths can be double-hashed to block without revealing them.
Double-hashed list example: https://badbits.dwebops.pub/badbits.deny

See [IPIP-383](https://github.com/ipfs/specs/pull/383) for detailed format specification and more examples.

## How to suspend blocking without removing denylists

Set `IPFS_CONTENT_BLOCKING_DISABLE` environment variable to `true` and restart the daemon.


## How to debug

Debug logging of `nopfs` subsystem can be enabled with `GOLOG_LOG_LEVEL="nopfs=debug"`

All block events are logged as warnings on a separate level named `nopfs-blocks`.

To only log requests for blocked content set `GOLOG_LOG_LEVEL="nopfs-blocks=warn"`:

```
WARN (...) QmRFniDxwxoG2n4AcnGhRdjqDjCM5YeUcBE75K8WXmioH3: blocked (test.deny:9)
```


6 changes: 6 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ The above will replace implicit HTTP routers with single one, allowing for
inspection/debug of HTTP requests sent by Kubo via `while true ; do nc -l 7423; done`
or more advanced tools like [mitmproxy](https://docs.mitmproxy.org/stable/#mitmproxy).


## `IPFS_CONTENT_BLOCKING_DISABLE`

Disables the content-blocking subsystem. No denylists will be watched and no
content will be blocked.

## `LIBP2P_TCP_REUSEPORT`

Kubo tries to reuse the same source port for all connections to improve NAT
Expand Down
6 changes: 5 additions & 1 deletion docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go 1.20
replace github.com/ipfs/kubo => ./../../..

require (
github.com/ipfs/boxo v0.13.2-0.20231018081237-a50f784985dd
github.com/ipfs/boxo v0.13.2-0.20231028021353-182e86f5bb9b
github.com/ipfs/kubo v0.0.0-00010101000000-000000000000
github.com/libp2p/go-libp2p v0.31.0
github.com/multiformats/go-multiaddr v0.11.0
Expand Down Expand Up @@ -40,6 +40,7 @@ require (
github.com/facebookgo/atomicfile v0.0.0-20151019160806-2de1f203e7d5 // indirect
github.com/flynn/noise v1.0.0 // indirect
github.com/francoispqt/gojay v1.2.13 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.1 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand All @@ -60,6 +61,8 @@ require (
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/huin/goupnp v1.2.0 // indirect
github.com/ipfs-shipyard/nopfs v0.0.12-0.20231027223058-cde3b5ba964c // indirect
github.com/ipfs-shipyard/nopfs/ipfs v0.13.2-0.20231027223058-cde3b5ba964c // indirect
github.com/ipfs/bbloom v0.0.4 // indirect
github.com/ipfs/go-bitfield v1.1.0 // indirect
github.com/ipfs/go-block-format v0.2.0 // indirect
Expand Down Expand Up @@ -194,5 +197,6 @@ require (
google.golang.org/grpc v1.55.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/square/go-jose.v2 v2.5.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/blake3 v1.2.1 // indirect
)
Loading

0 comments on commit a0f34b1

Please sign in to comment.