diff --git a/cmd/monitor/streaming/main.go b/cmd/monitor/streaming/main.go index 52839783f2e..66823f100c5 100644 --- a/cmd/monitor/streaming/main.go +++ b/cmd/monitor/streaming/main.go @@ -39,7 +39,9 @@ import ( _ "github.com/erda-project/erda/modules/core/monitor/storekit/kafka/topic/initializer" _ "github.com/erda-project/erda/modules/monitor/notify/storage/notify-record" _ "github.com/erda-project/erda/modules/msp/apm/browser" - _ "github.com/erda-project/erda/modules/msp/apm/trace/storage" + _ "github.com/erda-project/erda/modules/msp/apm/trace/persist" + _ "github.com/erda-project/erda/modules/msp/apm/trace/storage/cassandra_v1" + _ "github.com/erda-project/erda/modules/msp/apm/trace/storage/elasticsearch" // providers _ "github.com/erda-project/erda-infra/providers" diff --git a/cmd/msp/main.go b/cmd/msp/main.go index 092609f9495..48be1f20089 100644 --- a/cmd/msp/main.go +++ b/cmd/msp/main.go @@ -23,10 +23,17 @@ import ( _ "github.com/erda-project/erda-infra/providers" _ "github.com/erda-project/erda-infra/providers/cassandra" _ "github.com/erda-project/erda-proto-go/core/monitor/alert/client" + _ "github.com/erda-project/erda-proto-go/core/monitor/event/client" _ "github.com/erda-project/erda-proto-go/core/monitor/metric/client" + _ "github.com/erda-project/erda-proto-go/oap/entity/client" _ "github.com/erda-project/erda-infra/providers/grpcclient" _ "github.com/erda-project/erda-proto-go/core/services/authentication/credentials/accesskey/client" + _ "github.com/erda-project/erda/modules/core/monitor/settings" + _ "github.com/erda-project/erda/modules/core/monitor/settings/retention-strategy" + _ "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/cleaner" + _ "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + _ "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/retention-strategy" _ "github.com/erda-project/erda/modules/msp/apm/adapter" _ "github.com/erda-project/erda/modules/msp/apm/alert" _ "github.com/erda-project/erda/modules/msp/apm/checker/apis" @@ -39,10 +46,11 @@ import ( _ "github.com/erda-project/erda/modules/msp/apm/checker/task" _ "github.com/erda-project/erda/modules/msp/apm/checker/task/fetcher/fixed" _ "github.com/erda-project/erda/modules/msp/apm/checker/task/fetcher/scheduled" - _ "github.com/erda-project/erda/modules/msp/apm/exception" + _ "github.com/erda-project/erda/modules/msp/apm/exception/query" _ "github.com/erda-project/erda/modules/msp/apm/metric" _ "github.com/erda-project/erda/modules/msp/apm/notifygroup" - _ "github.com/erda-project/erda/modules/msp/apm/trace" + _ "github.com/erda-project/erda/modules/msp/apm/trace/query" + _ "github.com/erda-project/erda/modules/msp/apm/trace/storage/elasticsearch" _ "github.com/erda-project/erda/modules/msp/configcenter" _ "github.com/erda-project/erda/modules/msp/credential" _ "github.com/erda-project/erda/modules/msp/instance/permission" diff --git a/conf/monitor/streaming/span_index_template.json b/conf/monitor/streaming/span_index_template.json new file mode 100644 index 00000000000..e5edaebc2db --- /dev/null +++ b/conf/monitor/streaming/span_index_template.json @@ -0,0 +1,63 @@ +{ + "index_patterns": [ + "${ERDA_SPAN_INDEX_PREFIX:erda-spans-}*" + ], + "settings": { + "number_of_shards": ${SPAN_INDEX_SHARDS:1}, + "number_of_replicas": ${SPAN_INDEX_REPLICAS:1}, + "index": { + "refresh_interval": "15s", + "translog.durability": "async", + "translog.sync_interval": "15s", + "translog.flush_threshold_size": "1024mb" + } + }, + "mappings": { + "spans": { + "dynamic": "true", + "properties": { + "trace_id": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "span_id": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "parent_span_id": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "operation_name": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "start_time": { + "type": "date" + }, + "end_time": { + "type": "date" + }, + "tags": { + "type": "object" + } + } + } + } +} \ No newline at end of file diff --git a/conf/monitor/streaming/streaming.yaml b/conf/monitor/streaming/streaming.yaml index 4e4d08e4ea1..64551092ec8 100644 --- a/conf/monitor/streaming/streaming.yaml +++ b/conf/monitor/streaming/streaming.yaml @@ -282,6 +282,82 @@ metric-persist: generate_meta: true machine_summary: true +# elasticsearch for span +elasticsearch@span: + _enable: ${SPAN_ELASTICSEARCH_ENABLE:false} + urls: "${SPAN_ELASTICSEARCH_URL:http://localhost:9200}" + security: ${SPAN_ELASTICSEARCH_SECURITY_ENABLE:false} + username: "${SPAN_ELASTICSEARCH_SECURITY_USERNAME}" + password: "${SPAN_ELASTICSEARCH_SECURITY_PASSWORD}" + +elasticsearch.index.initializer@span: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + templates: + - name: "erda-spans" + path: "${CONFIG_PATH}/span_index_template.json" + +elasticsearch.index.loader@span: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + load_mode: "LoadFromElasticSearchOnly" + index_reload_interval: "1m" + match: + - prefix: "erda-spans-" + patterns: + - "-{number}" + - ".-{number}" + +elasticsearch.index.creator@span: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + patterns: + - first_index: "erda-spans--000001" + alias: "erda-spans--rollover" + - first_index: "erda-spans-.-000001" + alias: "erda-spans-.-rollover" + remove_conflicting_indices: true + +elasticsearch.index.rollover@span: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + check_interval: "30s" + body_file: "${CONFIG_PATH}/index_rollover.json" + patterns: + - index: "erda-spans--{number}" + alias: "erda-spans--rollover" + - index: "erda-spans-.-{number}" + alias: "erda-spans-.-rollover" + +storage-retention-strategy@span: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + load_from_database: true + ttl_reload_interval: "3m" + default_ttl: "${LOG_TTL:168h}" + +span-storage-elasticsearch: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + write_timeout: "1m" + index_type: "spans" + +span-persist: + _enable: ${WRITE_SPAN_TO_ES_ENABLE:false} + spot_input: + topics: "${SPOT_TRACE_TOPICS:spot-trace}" + group: "${TRACE_GROUP_ID:spot-monitor-trace-dev}" + parallelism: ${SPOT_SPOTSPAN_CONSUMERS:3} + options: + auto.offset.reset: "${KAFKA_AUTO_OFFSET_RESET:latest}" + auto.commit.interval.ms: "${KAFKA_AUTO_COMMIT_INTERVAL_MS:1000}" + oap_input: + topics: "${OAP_TRACE_TOPICS:msp-jaeger-trace}" + group: "${TRACE_GROUP_ID:spot-monitor-trace-dev}" + parallelism: ${SPOT_OAPSPAN_CONSUMERS:3} + options: + auto.offset.reset: "${KAFKA_AUTO_OFFSET_RESET:latest}" + auto.commit.interval.ms: "${KAFKA_AUTO_COMMIT_INTERVAL_MS:1000}" + id_keys: "${SPAN_ID_KEYS:TERMINUS_DEFINE_TAG,terminus_define_tag,MESOS_TASK_ID,mesos_task_id}" + read_timeout: "5s" + buffer_size: ${SPAN_BATCH_SIZE:50} + parallelism: ${SPAN_PERSIST_PARALLELISM:3} + print_invalid_span: false + browser-analytics: _enable: ${BROWSER_ENABLE:true} input: @@ -300,7 +376,7 @@ browser-analytics: ipdb: "${CONFIG_PATH}/ipdata.dat" trace-storage: - _enable: ${TRACE_ENABLE:true} + _enable: ${WRITE_SPAN_TO_CASSANDRA_ENABLE:true} spot_input: topics: "${SPOT_TRACE_TOPICS:spot-trace}" group: "${TRACE_GROUP_ID:spot-monitor-trace-dev}" diff --git a/conf/msp/index_rollover_min.json b/conf/msp/index_rollover_min.json new file mode 100644 index 00000000000..f5cd05436fa --- /dev/null +++ b/conf/msp/index_rollover_min.json @@ -0,0 +1,5 @@ +{ + "conditions": { + "max_size": "${INDEX_ROLLOVER_MIN_SIZE:256mb}" + } +} \ No newline at end of file diff --git a/conf/msp/msp.yaml b/conf/msp/msp.yaml index f285593e8d5..8623bb941bc 100644 --- a/conf/msp/msp.yaml +++ b/conf/msp/msp.yaml @@ -49,6 +49,57 @@ cassandra: password: ${CASSANDRA_SECURITY_PASSWORD} timeout: "${CASSANDRA_TIMEOUT:3s}" +# span +elasticsearch@span: + _enable: ${SPAN_ELASTICSEARCH_ENABLE:false} + urls: "${SPAN_ES_URL:http://localhost:9200}" + security: ${SPAN_ES_SECURITY_ENABLE:false} + username: "${SPAN_ES_SECURITY_USERNAME}" + password: "${SPAN_ES_SECURITY_PASSWORD}" + +elasticsearch.index.loader@span: + _enable: ${QUERY_SPAN_FROM_ES_ENABLE:true} + load_mode: "LoadWithCache" + index_reload_interval: "1m" + query_index_time_range: true + cache_key_prefix: "es-index-span" + match: + - prefix: "erda-spans-" + patterns: + - "-{number}" + - ".-{number}" +storage-retention-strategy@span: + _enable: ${QUERY_SPAN_FROM_ES_ENABLE:true} + default_ttl: "${SPAN_TTL:168h}" + load_from_database: false + ttl_reload_interval: "3m" +elasticsearch.index.retention-strategy@span: + _enable: ${QUERY_SPAN_FROM_ES_ENABLE:true} + key_patterns: + - "erda-spans-.-{number}" +elasticsearch.index.cleaner@span: + _enable: ${QUERY_SPAN_FROM_ES_ENABLE:true} + check_interval: "30m" + print_onluy: true + disk_clean: + enable: ${SPAN_DISK_CLEAN_ENABLE:true} + check_interval: "1m" + high_disk_usage_percent: ${HIGH_DISK_USAGE_PERCENT:80} # 触发磁盘清理的容量占比 + low_disk_usage_percent: ${LOW_DISK_USAGE_PERCENT:70} # 触发磁盘清理时,尽量清理到的目标容量占比 + min_indices_store: "${MIN_INDICES_STORE_PERCENT:10GB}" # 磁盘使用率高时,保证 索引最少可以占用的容量 + min_indices_store_percent: ${MIN_INDICES_STORE_PERCENT:10} # 磁盘使用率高时,保证 索引最少可以占用总磁盘总量的百分比 + rollover_body_file: "${CONFIG_PATH}/index_rollover_min.json" + rollover_alias_patterns: + - index: "erda-spans--{number}" + alias: "erda-spans--rollover" + - index: "erda-spans-.-{number}" + alias: "erda-spans-.-rollover" +span-storage-elasticsearch: + _enable: ${QUERY_SPAN_FROM_ES_ENABLE:true} + query_timeout: "1m" + read_page_size: 200 + + i18n: common: - conf/common/i18n/common.yml @@ -70,6 +121,14 @@ grpc-client@erda.core.monitor.alert: addr: "${MONITOR_GRPC_ADDR:monitor:7080}" erda.core.monitor.alert-client: +grpc-client@erda.core.monitor.event: + addr: "${MONITOR_GRPC_ADDR:monitor:7080}" +# addr: "${MONITOR_GRPC_ADDR_LOCAL:localhost:7080}" +erda.core.monitor.event-client: + +grpc-client@erda.oap.entity: + addr: "${MONITOR_GRPC_ADDR:monitor:7080}" +erda.oap.entity-client: erda.msp.apm.alert: micro_service_filter_tags: "${MICRO_SERVICE_FILTER_TAGS:_metric_name,_metric_scope,_metric_scope_id}" @@ -126,13 +185,15 @@ erda.msp.apm.checker.task.plugins.http: erda.msp.apm.checker.task: default_periodic_worker_interval: "30s" -erda.msp.apm.trace: +erda.msp.apm.trace.query: + query_source: "${TRACE_QUERY_SOURCE:cassandra,elasticsearch}" cassandra: keyspace: name: "spot_prod" auto: false # auto generate keyspace -erda.msp.apm.exception: +erda.msp.apm.exception.query: + query_source: "${EXCEPTION_QUERY_SOURCE:cassandra,elasticsearch}" cassandra: keyspace: name: "spot_prod" diff --git a/modules/core/monitor/entity/persist/provider.go b/modules/core/monitor/entity/persist/provider.go index 6af09909942..56922977b9b 100644 --- a/modules/core/monitor/entity/persist/provider.go +++ b/modules/core/monitor/entity/persist/provider.go @@ -81,7 +81,8 @@ func (p *provider) Init(ctx servicehub.Context) error { func init() { servicehub.Register("entity-persist", &servicehub.Spec{ - ConfigFunc: func() interface{} { return &config{} }, + Dependencies: []string{"kafka.topic.initializer"}, + ConfigFunc: func() interface{} { return &config{} }, Creator: func() servicehub.Provider { return &provider{} }, diff --git a/modules/core/monitor/log/persist/v1/provider.go b/modules/core/monitor/log/persist/v1/provider.go index 44083a68a85..6beec9b8af4 100644 --- a/modules/core/monitor/log/persist/v1/provider.go +++ b/modules/core/monitor/log/persist/v1/provider.go @@ -96,7 +96,8 @@ func (p *provider) Run(ctx context.Context) error { func init() { servicehub.Register("log-persist-v1", &servicehub.Spec{ - ConfigFunc: func() interface{} { return &config{} }, + Dependencies: []string{"kafka.topic.initializer"}, + ConfigFunc: func() interface{} { return &config{} }, Creator: func() servicehub.Provider { return &provider{} }, diff --git a/modules/msp/apm/exception/erda-error/persist/consume.go b/modules/msp/apm/exception/erda-error/persist/consume.go new file mode 100644 index 00000000000..073a34c9f06 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/persist/consume.go @@ -0,0 +1,65 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "encoding/json" + "time" + + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +func (p *provider) decodeError(key, value []byte, topic *string, timestamp time.Time) (interface{}, error) { + data := &exception.Erda_error{} + if err := json.Unmarshal(value, data); err != nil { + p.stats.DecodeError(value, err) + if p.Cfg.PrintInvalidError { + p.Log.Warnf("unknown format error data: %s", string(value)) + } else { + p.Log.Warnf("failed to decode error: %v", err) + } + return nil, err + } + + if err := p.validator.Validate(data); err != nil { + p.stats.ValidateError(data) + if p.Cfg.PrintInvalidError { + p.Log.Warnf("invalid error data: %s", string(value)) + } else { + p.Log.Warnf("invalid error: %v", err) + } + return nil, err + } + if err := p.metadata.Process(data); err != nil { + p.stats.MetadataError(data, err) + p.Log.Errorf("failed to process error metadata: %v", err) + } + return data, nil +} + +func (p *provider) handleReadError(err error) error { + p.Log.Errorf("failed to read error from kafka: %s", err) + return nil // return nil to continue read +} + +func (p *provider) handleWriteError(list []interface{}, err error) error { + p.Log.Errorf("failed to write error into storage: %s", err) + return nil // return nil to continue consume +} + +func (p *provider) confirmErrorHandler(err error) error { + p.Log.Errorf("failed to confirm error from kafka: %s", err) + return err // return error to exit +} diff --git a/modules/msp/apm/exception/erda-error/persist/metadata.go b/modules/msp/apm/exception/erda-error/persist/metadata.go new file mode 100644 index 00000000000..87faf0d1595 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/persist/metadata.go @@ -0,0 +1,34 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +type MetadataProcessor interface { + Process(data *exception.Erda_error) error +} + +func newMetadataProcessor(cfg *config) MetadataProcessor { + return NopMetadataProcessor +} + +type nopMetadataProcessor struct{} + +func (*nopMetadataProcessor) Process(data *exception.Erda_error) error { return nil } + +// NopMetadataProcessor . +var NopMetadataProcessor MetadataProcessor = &nopMetadataProcessor{} diff --git a/modules/msp/apm/exception/erda-error/persist/provider.go b/modules/msp/apm/exception/erda-error/persist/provider.go new file mode 100644 index 00000000000..b70ad821bc6 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/persist/provider.go @@ -0,0 +1,101 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "context" + "fmt" + "time" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/kafka" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-error/storage" +) + +type ( + config struct { + Input kafka.BatchReaderConfig `file:"input"` + Parallelism int `file:"parallelism" default:"1"` + BufferSize int `file:"buffer_size" default:"1024"` + ReadTimeout time.Duration `file:"read_timeout" default:"5s"` + IDKeys []string `file:"id_keys"` + PrintInvalidError bool `file:"print_invalid_error" default:"false"` + } + provider struct { + Cfg *config + Log logs.Logger + Kafka kafka.Interface `autowired:"kafka"` + StorageWriter storage.Storage `autowired:"error-storage-writer"` + + storage storage.Storage + stats Statistics + validator Validator + metadata MetadataProcessor + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + + p.validator = newValidator(p.Cfg) + if runner, ok := p.validator.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("error validator")) + } + + p.metadata = newMetadataProcessor(p.Cfg) + if runner, ok := p.metadata.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("error metadata processor")) + } + + p.stats = sharedStatistics + + // add consumer task + for i := 0; i < p.Cfg.Parallelism; i++ { + //spot + ctx.AddTask(func(ctx context.Context) error { + r, err := p.Kafka.NewBatchReader(&p.Cfg.Input, kafka.WithReaderDecoder(p.decodeError)) + if err != nil { + return err + } + defer r.Close() + + w, err := p.StorageWriter.NewWriter(ctx) + if err != nil { + return err + } + defer w.Close() + return storekit.BatchConsume(ctx, r, w, &storekit.BatchConsumeOptions{ + BufferSize: p.Cfg.BufferSize, + ReadTimeout: p.Cfg.ReadTimeout, + ReadErrorHandler: p.handleReadError, + WriteErrorHandler: p.handleWriteError, + ConfirmErrorHandler: p.confirmErrorHandler, + Statistics: p.stats, + }) + }, servicehub.WithTaskName(fmt.Sprintf("spot-error-consumer(%d)", i))) + } + return nil +} + +func init() { + servicehub.Register("error-persist", &servicehub.Spec{ + Dependencies: []string{"kafka.topic.initializer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { + return &provider{} + }, + }) +} diff --git a/modules/msp/apm/exception/erda-error/persist/statistics.go b/modules/msp/apm/exception/erda-error/persist/statistics.go new file mode 100644 index 00000000000..79ce43904ca --- /dev/null +++ b/modules/msp/apm/exception/erda-error/persist/statistics.go @@ -0,0 +1,160 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +// Statistics . +type Statistics interface { + storekit.ConsumeStatistics + + DecodeError(value []byte, err error) + ValidateError(data *exception.Erda_error) + MetadataError(data *exception.Erda_error, err error) +} + +type statistics struct { + readErrors prometheus.Counter + readBytes *prometheus.CounterVec + writeErrors *prometheus.CounterVec + confirmErrors *prometheus.CounterVec + success *prometheus.CounterVec + + decodeErrors prometheus.Counter + validateErrors *prometheus.CounterVec + metadataError *prometheus.CounterVec +} + +var sharedStatistics = newStatistics() + +func newStatistics() Statistics { + const subSystem = "error_persist" + s := &statistics{ + readErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "read_errors", + Subsystem: subSystem, + }, + ), + readBytes: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "read_bytes", + Subsystem: subSystem, + }, distinguishingKeys, + ), + writeErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "write_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + confirmErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "confirm_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + success: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "success", + Subsystem: subSystem, + }, distinguishingKeys, + ), + decodeErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "decode_errors", + Subsystem: subSystem, + }, + ), + validateErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "validate_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + } + + // only register once + prometheus.MustRegister( + s.readErrors, + s.readBytes, + s.writeErrors, + s.confirmErrors, + s.success, + s.decodeErrors, + s.validateErrors, + ) + return s +} + +func (s *statistics) ReadError(err error) { + s.readErrors.Inc() +} + +func (s *statistics) DecodeError(value []byte, err error) { + s.decodeErrors.Inc() +} + +func (s *statistics) WriteError(list []interface{}, err error) { + for _, item := range list { + s.writeErrors.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_error))...).Inc() + } +} + +func (s *statistics) ConfirmError(list []interface{}, err error) { + for _, item := range list { + s.confirmErrors.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_error))...).Inc() + } +} + +func (s *statistics) Success(list []interface{}) { + for _, item := range list { + s.success.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_error))...).Inc() + } +} + +func (s *statistics) ValidateError(data *exception.Erda_error) { + s.validateErrors.WithLabelValues(getStatisticsLabels(data)...).Inc() +} + +func (*statistics) MetadataError(data *exception.Erda_error, err error) {} + +var distinguishingKeys = []string{ + "org_name", "cluster_name", + "scope", "scope_id", +} + +func getStatisticsLabels(data *exception.Erda_error) []string { + var scope, scopeID string + + if app, ok := data.Tags["application_name"]; ok { + scope = "app" + if project, ok := data.Tags["project_name"]; ok { + scopeID = project + "/" + app + } else { + scopeID = app + } + } + return []string{ + data.Tags["org_name"], + data.Tags["cluster_name"], + scope, scopeID, + } +} diff --git a/modules/msp/apm/exception/erda-error/persist/validate.go b/modules/msp/apm/exception/erda-error/persist/validate.go new file mode 100644 index 00000000000..b3740bc1148 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/persist/validate.go @@ -0,0 +1,56 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "errors" + + "github.com/erda-project/erda/bundle" + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +// Validator . +type Validator interface { + Validate(s *exception.Erda_error) error +} + +type nopValidator struct{} + +func (*nopValidator) Validate(*exception.Erda_error) error { return nil } + +// NopValidator . +var NopValidator Validator = &nopValidator{} + +func newValidator(cfg *config) Validator { + return &validator{ + bdl: bundle.New(bundle.WithCoreServices(), bundle.WithDOP()), + } +} + +type validator struct { + bdl *bundle.Bundle +} + +var ( + // ErrIDEmpty . + ErrIDEmpty = errors.New("id empty") +) + +func (v *validator) Validate(e *exception.Erda_error) error { + if len(e.ErrorId) <= 0 { + return ErrIDEmpty + } + return nil +} diff --git a/modules/msp/apm/exception/erda-error/storage/elasticsearch/iterator.go b/modules/msp/apm/exception/erda-error/storage/elasticsearch/iterator.go new file mode 100644 index 00000000000..fd9425cd3cb --- /dev/null +++ b/modules/msp/apm/exception/erda-error/storage/elasticsearch/iterator.go @@ -0,0 +1,274 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "encoding/json" + "io" + "strconv" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-error/storage" +) + +func (p *provider) getSearchSource(sel *storage.Selector) *elastic.SearchSource { + searchSource := elastic.NewSearchSource() + query := elastic.NewBoolQuery() + if len(sel.ErrorId) > 0 { + query = query.Filter(elastic.NewQueryStringQuery("error_id:" + sel.ErrorId)) + } + if len(sel.TerminusKey) > 0 { + query = query.Filter(elastic.NewQueryStringQuery("terminus_key:" + sel.TerminusKey)) + } + + return searchSource.Query(query) +} + +func (p *provider) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + // TODO check org + indices := p.Loader.Indices(ctx, sel.StartTime, sel.EndTime, loader.KeyPath{ + Recursive: true, + }) + return &scrollIterator{ + ctx: ctx, + sel: sel, + searchSource: p.getSearchSource(sel), + client: p.client, + timeout: p.Cfg.QueryTimeout, + pageSize: p.Cfg.ReadPageSize, + indices: indices, + }, nil +} + +type iteratorDir int8 + +const ( + iteratorInitial = iota + iteratorForward + iteratorBackward +) + +type scrollIterator struct { + log logs.Logger + ctx context.Context + sel *storage.Selector + searchSource *elastic.SearchSource + client *elastic.Client + timeout time.Duration + pageSize int + indices []string + + scrollIDs map[string]struct{} + lastScrollID string + dir iteratorDir + buffer []*exception.Erda_error + value *exception.Erda_error + size int64 + err error + closed bool +} + +func (it *scrollIterator) First() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Last() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Next() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorBackward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Prev() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorForward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Value() storekit.Data { return it.value } +func (it *scrollIterator) Error() error { + if it.err == io.EOF { + return nil + } + return it.err +} + +func (it *scrollIterator) release() (err error) { + var list []string + for id := range it.scrollIDs { + if len(id) > 0 { + list = append(list, id) + } + } + if len(list) > 0 { + _, err = it.client.ClearScroll(list...).Do(context.TODO()) + if err != nil { + it.log.Errorf("failed to clear scroll: %s", err) + } + } + it.scrollIDs, it.lastScrollID = nil, "" + it.buffer = nil + it.value = nil + return nil +} + +func (it *scrollIterator) fetch(dir iteratorDir) error { + + if len(it.indices) <= 0 { + it.err = io.EOF + return it.err + } + minutes := int64(it.timeout.Minutes()) + if minutes < 1 { + minutes = 1 + } + keepalive := strconv.FormatInt(minutes, 10) + "m" + + it.dir = dir + it.buffer = nil + for it.err == nil && len(it.buffer) <= 0 { + func() error { + // do query + ctx, cancel := context.WithTimeout(it.ctx, it.timeout) + defer cancel() + var resp *elastic.SearchResult + if len(it.lastScrollID) <= 0 { + var ascending bool + if it.dir != iteratorBackward { + ascending = true + } + + resp, it.err = it.client.Scroll(it.indices...).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + SearchSource(it.searchSource).Size(it.pageSize).Sort("timestamp", ascending).Do(ctx) + if it.err != nil { + return it.err + } + } else { + resp, it.err = it.client.Scroll(it.indices...).ScrollId(it.lastScrollID).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + Size(it.pageSize).Do(ctx) + if it.err != nil { + return it.err + } + } + + // save scrollID + if it.scrollIDs == nil { + it.scrollIDs = make(map[string]struct{}) + } + if resp != nil { + it.scrollIDs[resp.ScrollId] = struct{}{} + it.lastScrollID = resp.ScrollId + } + if resp == nil || resp.Hits == nil || len(resp.Hits.Hits) <= 0 { + it.err = io.EOF + return it.err + } + + // parse result + it.buffer = parseHits(resp.Hits.Hits) + it.size = resp.Hits.TotalHits + return nil + }() + } + return nil +} + +func (it *scrollIterator) yield() bool { + if len(it.buffer) > 0 { + it.value = it.buffer[0] + it.buffer = it.buffer[1:] + return true + } + return false +} + +func (it *scrollIterator) Close() error { + it.closed = true + it.release() + return nil +} + +func (it *scrollIterator) checkClosed() bool { + if it.closed { + if it.err == nil { + it.err = storekit.ErrIteratorClosed + } + return true + } + return false +} + +func parseHits(hits []*elastic.SearchHit) (list []*exception.Erda_error) { + for _, hit := range hits { + if hit.Source == nil { + continue + } + data, err := parseData(*hit.Source) + if err != nil { + continue + } + list = append(list, data) + } + return list +} + +func parseData(bytes []byte) (*exception.Erda_error, error) { + var data exception.Erda_error + err := json.Unmarshal(bytes, &data) + if err != nil { + return nil, err + } + return &data, nil +} diff --git a/modules/msp/apm/exception/erda-error/storage/elasticsearch/provider.go b/modules/msp/apm/exception/erda-error/storage/elasticsearch/provider.go new file mode 100644 index 00000000000..cb6ce0a4044 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/storage/elasticsearch/provider.go @@ -0,0 +1,105 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "fmt" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/elasticsearch" + "github.com/erda-project/erda/modules/core/monitor/settings/retention-strategy" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/creator" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-error/storage" +) + +type ( + config struct { + QueryTimeout time.Duration `file:"query_timeout" default:"1m"` + WriteTimeout time.Duration `file:"write_timeout" default:"1m"` + ReadPageSize int `file:"read_page_size" default:"1024"` + IndexType string `file:"index_type" default:"errors"` + } + provider struct { + Cfg *config + Log logs.Logger + ES1 elasticsearch.Interface `autowired:"elasticsearch@error" optional:"true"` + ES2 elasticsearch.Interface `autowired:"elasticsearch" optional:"true"` + Loader loader.Interface `autowired:"elasticsearch.index.loader@error"` + Creator creator.Interface `autowired:"elasticsearch.index.creator@error" optional:"true"` + Retention retention.Interface `autowired:"storage-retention-strategy@error" optional:"true"` + es elasticsearch.Interface + client *elastic.Client + queryTimeout string + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + if p.ES1 != nil { + p.es = p.ES1 + } else if p.ES2 != nil { + p.es = p.ES2 + } else { + return fmt.Errorf("elasticsearch is required") + } + p.client = p.es.Client() + if p.Retention != nil { + ctx.AddTask(func(c context.Context) error { + p.Retention.Loading(ctx) + return nil + }) + } + return nil +} + +var _ storage.Storage = (*provider)(nil) + +func (p *provider) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + if p.Creator == nil || p.Retention == nil { + return nil, fmt.Errorf("elasticsearch.index.creator@error and storage-retention-strategy@error is required for Writer") + } + w := p.es.NewWriter(&elasticsearch.WriteOptions{ + Timeout: p.Cfg.WriteTimeout, + Enc: func(val interface{}) (index, id, typ string, body interface{}, err error) { + data := val.(*exception.Erda_error) + var wait <-chan error + wait, index = p.Creator.Ensure(data.Tags["org_name"]) + if wait != nil { + select { + case <-wait: + case <-ctx.Done(): + return "", "", "", nil, storekit.ErrExitConsume + } + } + return index, data.ErrorId, p.Cfg.IndexType, data, nil + }, + }) + return w, nil +} + +func init() { + servicehub.Register("error-storage-elasticsearch", &servicehub.Spec{ + Services: []string{"error-storage-elasticsearch-reader", "error-storage-writer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { return &provider{} }, + }) +} diff --git a/modules/msp/apm/exception/erda-error/storage/elasticsearch/statistics.go b/modules/msp/apm/exception/erda-error/storage/elasticsearch/statistics.go new file mode 100644 index 00000000000..21bdc58a883 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/storage/elasticsearch/statistics.go @@ -0,0 +1,46 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +// +//import ( +// "context" +// "fmt" +// "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" +// "github.com/erda-project/erda/modules/msp/apm/exception/erda-error/storage" +//) +// +//func (p *provider) Count(ctx context.Context, sel *storage.Selector) int64 { +// indices := p.Loader.Indices(ctx, sel.StartTime, sel.EndTime, loader.KeyPath{ +// Recursive: true, +// }) +// fmt.Println(indices) +// +// if len(indices) <= 0 { +// return 0 +// } +// +// // do query +// ctx, cancel := context.WithTimeout(ctx, p.Cfg.QueryTimeout) +// defer cancel() +// +// count, err := p.client.Count(indices...). +// IgnoreUnavailable(true).AllowNoIndices(true).Q("timestamp:[" + string(sel.StartTime) + " TO " + string(sel.EndTime) + "] AND error_id:" + sel.ErrorId).Do(ctx) +// if err != nil { +// return 0 +// } +// +// return count +//} diff --git a/modules/msp/apm/exception/erda-error/storage/storage.go b/modules/msp/apm/exception/erda-error/storage/storage.go new file mode 100644 index 00000000000..f136392c207 --- /dev/null +++ b/modules/msp/apm/exception/erda-error/storage/storage.go @@ -0,0 +1,38 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" +) + +type ( + // Selector . + Selector struct { + StartTime int64 + EndTime int64 + TerminusKey string + ErrorId string + } + + // Storage . + Storage interface { + NewWriter(ctx context.Context) (storekit.BatchWriter, error) + Iterator(ctx context.Context, sel *Selector) (storekit.Iterator, error) + //Count(ctx context.Context, sel *Selector) int64 + } +) diff --git a/modules/msp/apm/exception/erda-event/persist/consume.go b/modules/msp/apm/exception/erda-event/persist/consume.go new file mode 100644 index 00000000000..78f48199b78 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/persist/consume.go @@ -0,0 +1,65 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "encoding/json" + "time" + + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +func (p *provider) decodeEvent(key, value []byte, topic *string, timestamp time.Time) (interface{}, error) { + data := &exception.Erda_event{} + if err := json.Unmarshal(value, data); err != nil { + p.stats.DecodeError(value, err) + if p.Cfg.PrintInvalidEvent { + p.Log.Warnf("unknown format event data: %s", string(value)) + } else { + p.Log.Warnf("failed to decode event: %v", err) + } + return nil, err + } + + if err := p.validator.Validate(data); err != nil { + p.stats.ValidateError(data) + if p.Cfg.PrintInvalidEvent { + p.Log.Warnf("invalid event data: %s", string(value)) + } else { + p.Log.Warnf("invalid event: %v", err) + } + return nil, err + } + if err := p.metadata.Process(data); err != nil { + p.stats.MetadataError(data, err) + p.Log.Errorf("failed to process event metadata: %v", err) + } + return data, nil +} + +func (p *provider) handleReadError(err error) error { + p.Log.Errorf("failed to read event from kafka: %s", err) + return nil // return nil to continue read +} + +func (p *provider) handleWriteError(list []interface{}, err error) error { + p.Log.Errorf("failed to write event into storage: %s", err) + return nil // return nil to continue consume +} + +func (p *provider) confirmErrorHandler(err error) error { + p.Log.Errorf("failed to confirm event from kafka: %s", err) + return err // return error to exit +} diff --git a/modules/msp/apm/exception/erda-event/persist/metadata.go b/modules/msp/apm/exception/erda-event/persist/metadata.go new file mode 100644 index 00000000000..9d00fcd21d6 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/persist/metadata.go @@ -0,0 +1,34 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +type MetadataProcessor interface { + Process(data *exception.Erda_event) error +} + +func newMetadataProcessor(cfg *config) MetadataProcessor { + return NopMetadataProcessor +} + +type nopMetadataProcessor struct{} + +func (*nopMetadataProcessor) Process(data *exception.Erda_event) error { return nil } + +// NopMetadataProcessor . +var NopMetadataProcessor MetadataProcessor = &nopMetadataProcessor{} diff --git a/modules/msp/apm/exception/erda-event/persist/provider.go b/modules/msp/apm/exception/erda-event/persist/provider.go new file mode 100644 index 00000000000..b60a66fea80 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/persist/provider.go @@ -0,0 +1,101 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "context" + "fmt" + "time" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/kafka" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-event/storage" +) + +type ( + config struct { + Input kafka.BatchReaderConfig `file:"input"` + Parallelism int `file:"parallelism" default:"1"` + BufferSize int `file:"buffer_size" default:"1024"` + ReadTimeout time.Duration `file:"read_timeout" default:"5s"` + IDKeys []string `file:"id_keys"` + PrintInvalidEvent bool `file:"print_invalid_event" default:"false"` + } + provider struct { + Cfg *config + Log logs.Logger + Kafka kafka.Interface `autowired:"kafka"` + StorageWriter storage.Storage `autowired:"error-event-storage-writer"` + + storage storage.Storage + stats Statistics + validator Validator + metadata MetadataProcessor + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + + p.validator = newValidator(p.Cfg) + if runner, ok := p.validator.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("event validator")) + } + + p.metadata = newMetadataProcessor(p.Cfg) + if runner, ok := p.metadata.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("event metadata processor")) + } + + p.stats = sharedStatistics + + // add consumer task + for i := 0; i < p.Cfg.Parallelism; i++ { + //spot + ctx.AddTask(func(ctx context.Context) error { + r, err := p.Kafka.NewBatchReader(&p.Cfg.Input, kafka.WithReaderDecoder(p.decodeEvent)) + if err != nil { + return err + } + defer r.Close() + + w, err := p.StorageWriter.NewWriter(ctx) + if err != nil { + return err + } + defer w.Close() + return storekit.BatchConsume(ctx, r, w, &storekit.BatchConsumeOptions{ + BufferSize: p.Cfg.BufferSize, + ReadTimeout: p.Cfg.ReadTimeout, + ReadErrorHandler: p.handleReadError, + WriteErrorHandler: p.handleWriteError, + ConfirmErrorHandler: p.confirmErrorHandler, + Statistics: p.stats, + }) + }, servicehub.WithTaskName(fmt.Sprintf("spot-error-event-consumer(%d)", i))) + } + return nil +} + +func init() { + servicehub.Register("error-event-persist", &servicehub.Spec{ + Dependencies: []string{"kafka.topic.initializer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { + return &provider{} + }, + }) +} diff --git a/modules/msp/apm/exception/erda-event/persist/statistics.go b/modules/msp/apm/exception/erda-event/persist/statistics.go new file mode 100644 index 00000000000..98ba7ef2cc5 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/persist/statistics.go @@ -0,0 +1,160 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +// Statistics . +type Statistics interface { + storekit.ConsumeStatistics + + DecodeError(value []byte, err error) + ValidateError(data *exception.Erda_event) + MetadataError(data *exception.Erda_event, err error) +} + +type statistics struct { + readErrors prometheus.Counter + readBytes *prometheus.CounterVec + writeErrors *prometheus.CounterVec + confirmErrors *prometheus.CounterVec + success *prometheus.CounterVec + + decodeErrors prometheus.Counter + validateErrors *prometheus.CounterVec + metadataError *prometheus.CounterVec +} + +var sharedStatistics = newStatistics() + +func newStatistics() Statistics { + const subSystem = "error_event_persist" + s := &statistics{ + readErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "read_errors", + Subsystem: subSystem, + }, + ), + readBytes: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "read_bytes", + Subsystem: subSystem, + }, distinguishingKeys, + ), + writeErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "write_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + confirmErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "confirm_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + success: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "success", + Subsystem: subSystem, + }, distinguishingKeys, + ), + decodeErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "decode_errors", + Subsystem: subSystem, + }, + ), + validateErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "validate_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + } + + // only register once + prometheus.MustRegister( + s.readErrors, + s.readBytes, + s.writeErrors, + s.confirmErrors, + s.success, + s.decodeErrors, + s.validateErrors, + ) + return s +} + +func (s *statistics) ReadError(err error) { + s.readErrors.Inc() +} + +func (s *statistics) DecodeError(value []byte, err error) { + s.decodeErrors.Inc() +} + +func (s *statistics) WriteError(list []interface{}, err error) { + for _, item := range list { + s.writeErrors.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_event))...).Inc() + } +} + +func (s *statistics) ConfirmError(list []interface{}, err error) { + for _, item := range list { + s.confirmErrors.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_event))...).Inc() + } +} + +func (s *statistics) Success(list []interface{}) { + for _, item := range list { + s.success.WithLabelValues(getStatisticsLabels(item.(*exception.Erda_event))...).Inc() + } +} + +func (s *statistics) ValidateError(data *exception.Erda_event) { + s.validateErrors.WithLabelValues(getStatisticsLabels(data)...).Inc() +} + +func (*statistics) MetadataError(data *exception.Erda_event, err error) {} + +var distinguishingKeys = []string{ + "org_name", "cluster_name", + "scope", "scope_id", +} + +func getStatisticsLabels(data *exception.Erda_event) []string { + var scope, scopeID string + + if app, ok := data.Tags["application_name"]; ok { + scope = "app" + if project, ok := data.Tags["project_name"]; ok { + scopeID = project + "/" + app + } else { + scopeID = app + } + } + return []string{ + data.Tags["org_name"], + data.Tags["cluster_name"], + scope, scopeID, + } +} diff --git a/modules/msp/apm/exception/erda-event/persist/validate.go b/modules/msp/apm/exception/erda-event/persist/validate.go new file mode 100644 index 00000000000..7d3f3a98674 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/persist/validate.go @@ -0,0 +1,56 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "errors" + + "github.com/erda-project/erda/bundle" + "github.com/erda-project/erda/modules/msp/apm/exception" +) + +// Validator . +type Validator interface { + Validate(s *exception.Erda_event) error +} + +type nopValidator struct{} + +func (*nopValidator) Validate(*exception.Erda_event) error { return nil } + +// NopValidator . +var NopValidator Validator = &nopValidator{} + +func newValidator(cfg *config) Validator { + return &validator{ + bdl: bundle.New(bundle.WithCoreServices(), bundle.WithDOP()), + } +} + +type validator struct { + bdl *bundle.Bundle +} + +var ( + // ErrIDEmpty . + ErrIDEmpty = errors.New("id empty") +) + +func (v *validator) Validate(e *exception.Erda_event) error { + if len(e.EventId) <= 0 { + return ErrIDEmpty + } + return nil +} diff --git a/modules/msp/apm/exception/erda-event/storage/elasticsearch/iterator.go b/modules/msp/apm/exception/erda-event/storage/elasticsearch/iterator.go new file mode 100644 index 00000000000..94a3adb268e --- /dev/null +++ b/modules/msp/apm/exception/erda-event/storage/elasticsearch/iterator.go @@ -0,0 +1,277 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "encoding/json" + "io" + "strconv" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-event/storage" +) + +func (p *provider) getSearchSource(sel *storage.Selector) *elastic.SearchSource { + searchSource := elastic.NewSearchSource() + query := elastic.NewBoolQuery() + if len(sel.ErrorId) > 0 { + query = query.Filter(elastic.NewQueryStringQuery("error_id:" + sel.ErrorId)) + } + if len(sel.EventId) > 0 { + query = query.Filter(elastic.NewQueryStringQuery("event_id:" + sel.EventId)) + } + if len(sel.TerminusKey) > 0 { + query = query.Filter(elastic.NewQueryStringQuery("tags.terminus_key:" + sel.TerminusKey)) + } + + return searchSource.Query(query) +} + +func (p *provider) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + // TODO check org + indices := p.Loader.Indices(ctx, sel.StartTime, sel.EndTime, loader.KeyPath{ + Recursive: true, + }) + return &scrollIterator{ + ctx: ctx, + sel: sel, + searchSource: p.getSearchSource(sel), + client: p.client, + timeout: p.Cfg.QueryTimeout, + pageSize: p.Cfg.ReadPageSize, + indices: indices, + }, nil +} + +type iteratorDir int8 + +const ( + iteratorInitial = iota + iteratorForward + iteratorBackward +) + +type scrollIterator struct { + log logs.Logger + ctx context.Context + sel *storage.Selector + searchSource *elastic.SearchSource + client *elastic.Client + timeout time.Duration + pageSize int + indices []string + + scrollIDs map[string]struct{} + lastScrollID string + dir iteratorDir + buffer []*exception.Erda_event + value *exception.Erda_event + size int64 + err error + closed bool +} + +func (it *scrollIterator) First() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Last() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Next() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorBackward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Prev() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorForward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Value() storekit.Data { return it.value } +func (it *scrollIterator) Error() error { + if it.err == io.EOF { + return nil + } + return it.err +} + +func (it *scrollIterator) release() (err error) { + var list []string + for id := range it.scrollIDs { + if len(id) > 0 { + list = append(list, id) + } + } + if len(list) > 0 { + _, err = it.client.ClearScroll(list...).Do(context.TODO()) + if err != nil { + it.log.Errorf("failed to clear scroll: %s", err) + } + } + it.scrollIDs, it.lastScrollID = nil, "" + it.buffer = nil + it.value = nil + return nil +} + +func (it *scrollIterator) fetch(dir iteratorDir) error { + + if len(it.indices) <= 0 { + it.err = io.EOF + return it.err + } + minutes := int64(it.timeout.Minutes()) + if minutes < 1 { + minutes = 1 + } + keepalive := strconv.FormatInt(minutes, 10) + "m" + + it.dir = dir + it.buffer = nil + for it.err == nil && len(it.buffer) <= 0 { + func() error { + // do query + ctx, cancel := context.WithTimeout(it.ctx, it.timeout) + defer cancel() + var resp *elastic.SearchResult + if len(it.lastScrollID) <= 0 { + var ascending bool + if it.dir != iteratorBackward { + ascending = true + } + + resp, it.err = it.client.Scroll(it.indices...).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + SearchSource(it.searchSource).Size(it.pageSize).Sort("timestamp", ascending).Do(ctx) + if it.err != nil { + return it.err + } + } else { + resp, it.err = it.client.Scroll(it.indices...).ScrollId(it.lastScrollID).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + Size(it.pageSize).Do(ctx) + if it.err != nil { + return it.err + } + } + + // save scrollID + if it.scrollIDs == nil { + it.scrollIDs = make(map[string]struct{}) + } + if resp != nil { + it.scrollIDs[resp.ScrollId] = struct{}{} + it.lastScrollID = resp.ScrollId + } + if resp == nil || resp.Hits == nil || len(resp.Hits.Hits) <= 0 { + it.err = io.EOF + return it.err + } + + // parse result + it.buffer = parseHits(resp.Hits.Hits) + it.size = resp.Hits.TotalHits + return nil + }() + } + return nil +} + +func (it *scrollIterator) yield() bool { + if len(it.buffer) > 0 { + it.value = it.buffer[0] + it.buffer = it.buffer[1:] + return true + } + return false +} + +func (it *scrollIterator) Close() error { + it.closed = true + it.release() + return nil +} + +func (it *scrollIterator) checkClosed() bool { + if it.closed { + if it.err == nil { + it.err = storekit.ErrIteratorClosed + } + return true + } + return false +} + +func parseHits(hits []*elastic.SearchHit) (list []*exception.Erda_event) { + for _, hit := range hits { + if hit.Source == nil { + continue + } + data, err := parseData(*hit.Source) + if err != nil { + continue + } + list = append(list, data) + } + return list +} + +func parseData(bytes []byte) (*exception.Erda_event, error) { + var data exception.Erda_event + err := json.Unmarshal(bytes, &data) + if err != nil { + return nil, err + } + return &data, nil +} diff --git a/modules/msp/apm/exception/erda-event/storage/elasticsearch/provider.go b/modules/msp/apm/exception/erda-event/storage/elasticsearch/provider.go new file mode 100644 index 00000000000..c349d41d7db --- /dev/null +++ b/modules/msp/apm/exception/erda-event/storage/elasticsearch/provider.go @@ -0,0 +1,105 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "fmt" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/elasticsearch" + "github.com/erda-project/erda/modules/core/monitor/settings/retention-strategy" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/creator" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-event/storage" +) + +type ( + config struct { + QueryTimeout time.Duration `file:"query_timeout" default:"1m"` + WriteTimeout time.Duration `file:"write_timeout" default:"1m"` + ReadPageSize int `file:"read_page_size" default:"1024"` + IndexType string `file:"index_type" default:"error-events"` + } + provider struct { + Cfg *config + Log logs.Logger + ES1 elasticsearch.Interface `autowired:"elasticsearch@error_event" optional:"true"` + ES2 elasticsearch.Interface `autowired:"elasticsearch" optional:"true"` + Loader loader.Interface `autowired:"elasticsearch.index.loader@error_event"` + Creator creator.Interface `autowired:"elasticsearch.index.creator@error_event" optional:"true"` + Retention retention.Interface `autowired:"storage-retention-strategy@error_event" optional:"true"` + client *elastic.Client + es elasticsearch.Interface + queryTimeout string + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + if p.ES1 != nil { + p.es = p.ES1 + } else if p.ES2 != nil { + p.es = p.ES2 + } else { + return fmt.Errorf("elasticsearch is required") + } + p.client = p.es.Client() + if p.Retention != nil { + ctx.AddTask(func(c context.Context) error { + p.Retention.Loading(ctx) + return nil + }) + } + return nil +} + +var _ storage.Storage = (*provider)(nil) + +func (p *provider) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + if p.Creator == nil || p.Retention == nil { + return nil, fmt.Errorf("elasticsearch.index.creator@error_event and storage-retention-strategy@error_event is required for Writer") + } + w := p.es.NewWriter(&elasticsearch.WriteOptions{ + Timeout: p.Cfg.WriteTimeout, + Enc: func(val interface{}) (index, id, typ string, body interface{}, err error) { + data := val.(*exception.Erda_event) + var wait <-chan error + wait, index = p.Creator.Ensure(data.Tags["org_name"]) + if wait != nil { + select { + case <-wait: + case <-ctx.Done(): + return "", "", "", nil, storekit.ErrExitConsume + } + } + return index, data.EventId, p.Cfg.IndexType, data, nil + }, + }) + return w, nil +} + +func init() { + servicehub.Register("error-event-storage-elasticsearch", &servicehub.Spec{ + Services: []string{"error-event-storage-elasticsearch-reader", "error-event-storage-writer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { return &provider{} }, + }) +} diff --git a/modules/msp/apm/exception/erda-event/storage/elasticsearch/statistics.go b/modules/msp/apm/exception/erda-event/storage/elasticsearch/statistics.go new file mode 100644 index 00000000000..69e481b604f --- /dev/null +++ b/modules/msp/apm/exception/erda-event/storage/elasticsearch/statistics.go @@ -0,0 +1,45 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "strconv" + + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-event/storage" +) + +func (p *provider) Count(ctx context.Context, sel *storage.Selector) int64 { + indices := p.Loader.Indices(ctx, sel.StartTime, sel.EndTime, loader.KeyPath{ + Recursive: true, + }) + + if len(indices) <= 0 { + return 0 + } + + // do query + ctx, cancel := context.WithTimeout(ctx, p.Cfg.QueryTimeout) + defer cancel() + + count, err := p.client.Count(indices...). + IgnoreUnavailable(true).AllowNoIndices(true).Q("timestamp:[" + strconv.FormatInt(sel.StartTime, 10) + " TO " + strconv.FormatInt(sel.EndTime, 10) + "] AND error_id:" + sel.ErrorId).Do(ctx) + if err != nil { + return 0 + } + + return count +} diff --git a/modules/msp/apm/exception/erda-event/storage/storage.go b/modules/msp/apm/exception/erda-event/storage/storage.go new file mode 100644 index 00000000000..d7a773cf0a5 --- /dev/null +++ b/modules/msp/apm/exception/erda-event/storage/storage.go @@ -0,0 +1,39 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" +) + +type ( + // Selector . + Selector struct { + StartTime int64 + EndTime int64 + EventId string + ErrorId string + TerminusKey string + } + + // Storage . + Storage interface { + NewWriter(ctx context.Context) (storekit.BatchWriter, error) + Iterator(ctx context.Context, sel *Selector) (storekit.Iterator, error) + Count(ctx context.Context, sel *Selector) int64 + } +) diff --git a/modules/msp/apm/exception/erda_error.go b/modules/msp/apm/exception/erda_error.go new file mode 100644 index 00000000000..ad12d2cd395 --- /dev/null +++ b/modules/msp/apm/exception/erda_error.go @@ -0,0 +1,24 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exception + +type Erda_error struct { + TerminusKey string `json:"terminus_key"` + ApplicationId string `json:"application_id"` + ServiceName string `json:"service_name"` + ErrorId string `json:"error_id"` + Timestamp int64 `json:"timestamp"` + Tags map[string]string `json:"tags"` +} diff --git a/modules/msp/apm/exception/erda_event.go b/modules/msp/apm/exception/erda_event.go new file mode 100644 index 00000000000..9c755ffe416 --- /dev/null +++ b/modules/msp/apm/exception/erda_event.go @@ -0,0 +1,27 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exception + +type Erda_event struct { + EventId string `json:"event_id"` + Timestamp int64 `json:"timestamp"` + RequestId string `json:"request_id"` + ErrorId string `json:"error_id"` + Stacks []string `json:"stacks"` + Tags map[string]string `json:"tags"` + MetaData map[string]string `json:"meta_data"` + RequestContext map[string]string `json:"request_context"` + RequestHeaders map[string]string `json:"request_headers"` +} diff --git a/modules/msp/apm/exception/exception.service.go b/modules/msp/apm/exception/exception.service.go deleted file mode 100644 index f26caef4488..00000000000 --- a/modules/msp/apm/exception/exception.service.go +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2021 Terminus, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package exception - -import ( - context "context" - "encoding/json" - "time" - - "github.com/recallsong/go-utils/conv" - "google.golang.org/protobuf/types/known/structpb" - - pb "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" -) - -type exceptionService struct { - p *provider -} - -func (s *exceptionService) GetExceptions(ctx context.Context, req *pb.GetExceptionsRequest) (*pb.GetExceptionsResponse, error) { - - iter := s.p.cassandraSession.Session().Query("SELECT * FROM error_description_v2 where terminus_key=? ALLOW FILTERING", req.ScopeID).Iter() - - var exceptions []*pb.Exception - for { - row := make(map[string]interface{}) - if !iter.MapScan(row) { - break - } - exception := pb.Exception{} - tags := row["tags"].(map[string]string) - exception.Id = row["error_id"].(string) - exception.ScopeID = conv.ToString(row["terminus_key"]) - exception.ClassName = conv.ToString(tags["class"]) - exception.Method = conv.ToString(tags["method"]) - exception.Type = conv.ToString(tags["type"]) - exception.ExceptionMessage = conv.ToString(tags["exception_message"]) - exception.File = conv.ToString(tags["file"]) - exception.ServiceName = conv.ToString(tags["service_name"]) - exception.ApplicationID = conv.ToString(tags["application_id"]) - exception.RuntimeID = conv.ToString(tags["runtime_id"]) - layout := "2006-01-02 15:04:05" - - stat := "SELECT timestamp,count FROM error_count WHERE error_id= ? AND timestamp >= ? AND timestamp <= ? ORDER BY timestamp ASC" - iterCount := s.p.cassandraSession.Session().Query(stat, exception.Id, req.StartTime*1e6, req.EndTime*1e6).Iter() - count := int64(0) - index := 0 - for { - rowCount := make(map[string]interface{}) - if !iterCount.MapScan(rowCount) { - break - } - if index == 0 { - exception.CreateTime = time.Unix(conv.ToInt64(rowCount["timestamp"], 0)/1e9, 10).Format(layout) - } - count += conv.ToInt64(rowCount["count"], 0) - index++ - if index == iterCount.NumRows() { - exception.UpdateTime = time.Unix(conv.ToInt64(rowCount["timestamp"], 0)/1e9, 10).Format(layout) - } - } - exception.EventCount = count - if exception.EventCount > 0 { - exceptions = append(exceptions, &exception) - } - } - - return &pb.GetExceptionsResponse{Data: exceptions}, nil -} - -func (s *exceptionService) GetExceptionEventIds(ctx context.Context, req *pb.GetExceptionEventIdsRequest) (*pb.GetExceptionEventIdsResponse, error) { - iter := s.p.cassandraSession.Session().Query("SELECT event_id FROM error_event_mapping WHERE error_id= ? limit ?", req.ExceptionID, 999).Iter() - - var data []string - for { - row := make(map[string]interface{}) - if !iter.MapScan(row) { - break - } - data = append(data, conv.ToString(row["event_id"])) - } - return &pb.GetExceptionEventIdsResponse{Data: data}, nil -} - -func (s *exceptionService) GetExceptionEvent(ctx context.Context, req *pb.GetExceptionEventRequest) (*pb.GetExceptionEventResponse, error) { - iter := s.p.cassandraSession.Session().Query("SELECT * FROM error_events WHERE event_id = ?", req.ExceptionEventID).Iter() - event := pb.ExceptionEvent{} - for { - row := make(map[string]interface{}) - if !iter.MapScan(row) { - break - } - event.Tags = row["tags"].(map[string]string) - if conv.ToString(event.Tags["terminus_key"]) != req.ScopeID { - continue - } - event.Id = conv.ToString(row["event_id"]) - event.ExceptionID = conv.ToString(row["error_id"]) - event.RequestID = conv.ToString(row["request_id"]) - event.RequestSampled = conv.ToBool(event.Tags["request_sampled"], false) - event.Metadata = row["meta_data"].(map[string]string) - event.RequestContext = row["request_context"].(map[string]string) - event.RequestHeaders = row["request_headers"].(map[string]string) - event.Timestamp = row["timestamp"].(int64) / int64(time.Millisecond) - var stacks []*pb.Stacks - for _, info := range row["stacks"].([]string) { - var stack pb.Stacks - var stackMap map[string]*structpb.Value - if err := json.Unmarshal([]byte(info), &stackMap); err != nil { - continue - } - stack.Stack = stackMap - stacks = append(stacks, &stack) - } - event.Stacks = stacks - } - return &pb.GetExceptionEventResponse{Data: &event}, nil -} diff --git a/modules/msp/apm/exception/query/error_event_list_storage.go b/modules/msp/apm/exception/query/error_event_list_storage.go new file mode 100644 index 00000000000..6c8b32d2402 --- /dev/null +++ b/modules/msp/apm/exception/query/error_event_list_storage.go @@ -0,0 +1,110 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-event/storage" +) + +// ErrorEvent . +type ErrorEvent = interface{} + +// ErrorEventListIterator . +type ErrorEventListIterator struct { + list []ErrorEvent + i int + data ErrorEvent +} + +// NewErrorEventErrorListIterator . +func NewErrorEventErrorListIterator(list ...ErrorEvent) storekit.Iterator { + return &ErrorEventListIterator{list: list, i: -1} +} + +// First . +func (it *ErrorEventListIterator) First() bool { + if len(it.list) <= 0 { + return false + } + it.i = 0 + it.data = it.list[it.i] + return true +} + +// Last . +func (it *ErrorEventListIterator) Last() bool { + if len(it.list) <= 0 { + return false + } + it.i = len(it.list) - 1 + it.data = it.list[it.i] + return true + +} + +// Next . +func (it *ErrorEventListIterator) Next() bool { + if it.i < 0 { + return it.First() + } + if it.i >= len(it.list)-1 { + return false + } + it.i++ + it.data = it.list[it.i] + return true +} + +// Prev . +func (it *ErrorEventListIterator) Prev() bool { + if it.i < 0 { + return it.Last() + } + if it.i <= 0 { + return false + } + it.i-- + it.data = it.list[it.i] + return true +} + +// Value . +func (it *ErrorEventListIterator) Value() ErrorEvent { return it.data } + +// Error . +func (it *ErrorEventListIterator) Error() error { return nil } + +// Close . +func (it *ErrorEventListIterator) Close() error { return nil } + +type errorEventListStorage struct { + exceptionEvent *exception.Erda_event +} + +func (s *errorEventListStorage) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + return storekit.DefaultNopWriter, nil +} + +func (s *errorEventListStorage) Count(ctx context.Context, sel *storage.Selector) int64 { + return int64(1) +} + +func (s *errorEventListStorage) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + return NewErrorEventErrorListIterator(s.exceptionEvent), nil +} diff --git a/modules/msp/apm/exception/query/error_list_storage.go b/modules/msp/apm/exception/query/error_list_storage.go new file mode 100644 index 00000000000..5ad650ead64 --- /dev/null +++ b/modules/msp/apm/exception/query/error_list_storage.go @@ -0,0 +1,110 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/modules/msp/apm/exception/erda-error/storage" +) + +// Error . +type Error = interface{} + +// ErrorListIterator . +type ErrorListIterator struct { + list []Error + i int + data Error +} + +// NewErrorListIterator . +func NewErrorListIterator(list ...Error) storekit.Iterator { + return &ErrorListIterator{list: list, i: -1} +} + +// First . +func (it *ErrorListIterator) First() bool { + if len(it.list) <= 0 { + return false + } + it.i = 0 + it.data = it.list[it.i] + return true +} + +// Last . +func (it *ErrorListIterator) Last() bool { + if len(it.list) <= 0 { + return false + } + it.i = len(it.list) - 1 + it.data = it.list[it.i] + return true + +} + +// Next . +func (it *ErrorListIterator) Next() bool { + if it.i < 0 { + return it.First() + } + if it.i >= len(it.list)-1 { + return false + } + it.i++ + it.data = it.list[it.i] + return true +} + +// Prev . +func (it *ErrorListIterator) Prev() bool { + if it.i < 0 { + return it.Last() + } + if it.i <= 0 { + return false + } + it.i-- + it.data = it.list[it.i] + return true +} + +// Value . +func (it *ErrorListIterator) Value() Error { return it.data } + +// Error . +func (it *ErrorListIterator) Error() error { return nil } + +// Close . +func (it *ErrorListIterator) Close() error { return nil } + +type errorListStorage struct { + exception *exception.Erda_error +} + +func (s *errorListStorage) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + return storekit.DefaultNopWriter, nil +} + +//func (s *errorListStorage) Count(ctx context.Context, traceId string) int64 { +// return int64(1) +//} + +func (s *errorListStorage) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + return NewErrorListIterator(s.exception), nil +} diff --git a/modules/msp/apm/exception/query/exception.service.go b/modules/msp/apm/exception/query/exception.service.go new file mode 100644 index 00000000000..d096b429e01 --- /dev/null +++ b/modules/msp/apm/exception/query/exception.service.go @@ -0,0 +1,393 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + "encoding/json" + "strings" + "time" + + "github.com/gocql/gocql" + "github.com/recallsong/go-utils/conv" + "google.golang.org/protobuf/types/known/structpb" + + eventpb "github.com/erda-project/erda-proto-go/core/monitor/event/pb" + metricpb "github.com/erda-project/erda-proto-go/core/monitor/metric/pb" + "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" + entitypb "github.com/erda-project/erda-proto-go/oap/entity/pb" + "github.com/erda-project/erda/modules/msp/apm/exception" + "github.com/erda-project/erda/pkg/common/errors" +) + +type exceptionService struct { + p *provider + Metric metricpb.MetricServiceServer + Event eventpb.EventQueryServiceServer + Entity entitypb.EntityServiceServer +} + +func (s *exceptionService) GetExceptions(ctx context.Context, req *pb.GetExceptionsRequest) (*pb.GetExceptionsResponse, error) { + var exceptions []*pb.Exception + + if strings.Contains(s.p.Cfg.QuerySource, "cassandra") { + // do cassandra query + exceptionsFromCassandra := fetchErdaErrorFromCassandra(ctx, s.Metric, s.p.cassandraSession.Session(), req) + for _, exception := range exceptionsFromCassandra { + exceptions = append(exceptions, exception) + } + } + + if strings.Contains(s.p.Cfg.QuerySource, "elasticsearch") { + // do es query + conditions := map[string]string{ + "terminusKey": req.ScopeID, + } + + entityReq := &entitypb.ListEntitiesRequest{ + Type: "error_exception", + Labels: conditions, + Limit: int64(1000), + } + exceptionsFromElasticsearch, _ := fetchErdaErrorFromES(ctx, s.Event, s.Entity, entityReq, req.StartTime, req.EndTime) + for _, exception := range exceptionsFromElasticsearch { + exceptions = append(exceptions, exception) + } + } + + return &pb.GetExceptionsResponse{Data: exceptions}, nil +} + +func (s *exceptionService) GetExceptionEventIds(ctx context.Context, req *pb.GetExceptionEventIdsRequest) (*pb.GetExceptionEventIdsResponse, error) { + var data []string + + if strings.Contains(s.p.Cfg.QuerySource, "cassandra") { + // do cassandra query + iter := s.p.cassandraSession.Session().Query("SELECT event_id FROM error_event_mapping WHERE error_id= ? limit ?", req.ExceptionID, 999).Iter() + + for { + row := make(map[string]interface{}) + if !iter.MapScan(row) { + break + } + data = append(data, conv.ToString(row["event_id"])) + } + } + if strings.Contains(s.p.Cfg.QuerySource, "elasticsearch") { + //do es query + tags := map[string]string{ + "terminusKey": req.ScopeID, + } + eventReq := &eventpb.GetEventsRequest{ + RelationId: req.ExceptionID, + RelationType: "exception", + Tags: tags, + PageNo: 1, + PageSize: 999, + Start: time.Now().Add(-time.Hour * 24 * 7).UnixNano(), + End: time.Now().UnixNano(), + } + + items, err := fetchErdaEventFromES(ctx, s.Event, eventReq) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + + for _, value := range items { + data = append(data, value.EventId) + } + } + + return &pb.GetExceptionEventIdsResponse{Data: data}, nil +} + +func (s *exceptionService) GetExceptionEvent(ctx context.Context, req *pb.GetExceptionEventRequest) (*pb.GetExceptionEventResponse, error) { + event := pb.ExceptionEvent{} + + if strings.Contains(s.p.Cfg.QuerySource, "elasticsearch") { + // do es query + + tags := map[string]string{ + "terminusKey": req.ScopeID, + } + eventReq := &eventpb.GetEventsRequest{ + EventId: req.ExceptionEventID, + Tags: tags, + PageNo: 1, + PageSize: 999, + Start: time.Now().Add(-time.Hour * 24 * 7).UnixNano(), + End: time.Now().UnixNano(), + Debug: true, + } + items, err := fetchErdaEventFromES(ctx, s.Event, eventReq) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + + if len(items) > 0 { + item := items[0] + event.Tags = item.Tags + event.Id = item.EventId + event.ExceptionID = item.ErrorId + event.RequestID = item.RequestId + event.RequestSampled = conv.ToBool(event.Tags["request_sampled"], false) + event.Metadata = item.MetaData + event.RequestContext = item.RequestContext + event.RequestHeaders = item.RequestHeaders + event.Timestamp = item.Timestamp / int64(time.Millisecond) + var stacks []*pb.Stacks + for _, info := range item.Stacks { + var stack pb.Stacks + var stackMap map[string]*structpb.Value + if err := json.Unmarshal([]byte(info), &stackMap); err != nil { + continue + } + + stack.Stack = stackMap + stacks = append(stacks, &stack) + } + event.Stacks = stacks + } + } + + if strings.Contains(s.p.Cfg.QuerySource, "cassandra") && len(event.Id) <= 0 { + // do cassandra query + event = fetchErdaEventFromCassandra(s.p.cassandraSession.Session(), req) + } + + return &pb.GetExceptionEventResponse{Data: &event}, nil +} + +func fetchErdaErrorFromCassandra(ctx context.Context, metric metricpb.MetricServiceServer, session *gocql.Session, req *pb.GetExceptionsRequest) []*pb.Exception { + iter := session.Query("SELECT * FROM error_description_v2 where terminus_key=? ALLOW FILTERING", req.ScopeID).Iter() + + var exceptions []*pb.Exception + for { + row := make(map[string]interface{}) + if !iter.MapScan(row) { + break + } + exception := pb.Exception{} + tags := row["tags"].(map[string]string) + exception.Id = row["error_id"].(string) + exception.ScopeID = conv.ToString(row["terminus_key"]) + exception.ClassName = conv.ToString(tags["class"]) + exception.Method = conv.ToString(tags["method"]) + exception.Type = conv.ToString(tags["type"]) + exception.ExceptionMessage = conv.ToString(tags["exception_message"]) + exception.File = conv.ToString(tags["file"]) + exception.ServiceName = conv.ToString(tags["service_name"]) + exception.ApplicationID = conv.ToString(tags["application_id"]) + exception.RuntimeID = conv.ToString(tags["runtime_id"]) + + fetchErdaErrorEventCount(ctx, metric, session, req, &exception) + if exception.EventCount > 0 { + exceptions = append(exceptions, &exception) + } + } + return exceptions +} + +func fetchErdaErrorEventCount(ctx context.Context, metric metricpb.MetricServiceServer, session *gocql.Session, req *pb.GetExceptionsRequest, exception *pb.Exception) { + layout := "2006-01-02 15:04:05" + count := int64(0) + + stat := "SELECT timestamp,count FROM error_count WHERE error_id= ? AND timestamp >= ? AND timestamp <= ? ORDER BY timestamp ASC" + iterCount := session.Query(stat, exception.Id, req.StartTime*1e6, req.EndTime*1e6).Iter() + index := 0 + for { + rowCount := make(map[string]interface{}) + if !iterCount.MapScan(rowCount) { + break + } + if index == 0 { + exception.CreateTime = time.Unix(conv.ToInt64(rowCount["timestamp"], 0)/1e9, 10).Format(layout) + } + count += conv.ToInt64(rowCount["count"], 0) + index++ + if index == iterCount.NumRows() { + exception.UpdateTime = time.Unix(conv.ToInt64(rowCount["timestamp"], 0)/1e9, 10).Format(layout) + } + } + + metricreq := &metricpb.QueryWithInfluxFormatRequest{ + Start: conv.ToString(req.StartTime * 1e6), // or timestamp + End: conv.ToString(req.EndTime * 1e6), // or timestamp + Statement: `SELECT timestamp, count::field FROM error_count WHERE error_id::tag=$error_id ORDER BY timestamp`, + Params: map[string]*structpb.Value{ + "error_id": structpb.NewStringValue(exception.Id), + }, + } + + resp, err := metric.QueryWithInfluxFormat(ctx, metricreq) + if err != nil { + exception.UpdateTime = exception.CreateTime + } else { + rows := resp.Results[0].Series[0].Rows + for index, row := range rows { + if index == 0 && exception.CreateTime == "" { + exception.CreateTime = time.Unix(int64(row.Values[0].GetNumberValue())/1e9, 10).Format(layout) + } + if index == len(rows)-1 { + exception.UpdateTime = time.Unix(int64(row.Values[0].GetNumberValue())/1e9, 10).Format(layout) + } + count = count + int64(row.Values[1].GetNumberValue()) + } + } + + exception.EventCount = count +} + +func fetchErdaEventFromCassandra(session *gocql.Session, req *pb.GetExceptionEventRequest) pb.ExceptionEvent { + iter := session.Query("SELECT * FROM error_events WHERE event_id = ?", req.ExceptionEventID).Iter() + event := pb.ExceptionEvent{} + for { + row := make(map[string]interface{}) + if !iter.MapScan(row) { + break + } + event.Tags = row["tags"].(map[string]string) + if conv.ToString(event.Tags["terminus_key"]) != req.ScopeID { + continue + } + event.Id = conv.ToString(row["event_id"]) + event.ExceptionID = conv.ToString(row["error_id"]) + event.RequestID = conv.ToString(row["request_id"]) + event.RequestSampled = conv.ToBool(event.Tags["request_sampled"], false) + event.Metadata = row["meta_data"].(map[string]string) + event.RequestContext = row["request_context"].(map[string]string) + event.RequestHeaders = row["request_headers"].(map[string]string) + event.Timestamp = row["timestamp"].(int64) / int64(time.Millisecond) + var stacks []*pb.Stacks + for _, info := range row["stacks"].([]string) { + var stack pb.Stacks + var stackMap map[string]*structpb.Value + if err := json.Unmarshal([]byte(info), &stackMap); err != nil { + continue + } + stack.Stack = stackMap + stacks = append(stacks, &stack) + } + event.Stacks = stacks + } + return event +} + +func fetchErdaErrorFromES(ctx context.Context, Event eventpb.EventQueryServiceServer, Entity entitypb.EntityServiceServer, req *entitypb.ListEntitiesRequest, startTime int64, endTime int64) (exceptions []*pb.Exception, err error) { + listEntity, err := Entity.ListEntities(ctx, req) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + + for _, value := range listEntity.Data.List { + exception := pb.Exception{} + tags := value.Values + exception.Id = value.Key + exception.ScopeID = value.Labels["terminusKey"] + exception.ClassName = tags["class"].GetStringValue() + exception.Method = tags["method"].GetStringValue() + exception.Type = tags["type"].GetStringValue() + exception.ExceptionMessage = tags["exception_message"].GetStringValue() + exception.File = tags["file"].GetStringValue() + exception.ServiceName = tags["service_name"].GetStringValue() + exception.ApplicationID = tags["application_id"].GetStringValue() + exception.RuntimeID = tags["runtime_id"].GetStringValue() + layout := "2006-01-02 15:04:05" + + eventReq := &eventpb.GetEventsRequest{ + RelationId: value.Key, + RelationType: "exception", + Start: startTime * 1e6, + End: endTime * 1e6, + PageNo: 1, + PageSize: 10000, + Debug: false, + } + items, err := fetchErdaEventFromES(ctx, Event, eventReq) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + count := int64(0) + for index, item := range items { + if index == 0 { + exception.CreateTime = time.Unix(item.Timestamp/1e9, 10).Format(layout) + } + if index == len(items)-1 { + exception.UpdateTime = time.Unix(item.Timestamp/1e9, 10).Format(layout) + } + count++ + } + exception.EventCount = count + if exception.EventCount > 0 { + exceptions = append(exceptions, &exception) + } + } + + return exceptions, nil +} + +type ErdaErrors []*exception.Erda_error + +func (s ErdaErrors) Len() int { return len(s) } +func (s ErdaErrors) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s ErdaErrors) Less(i, j int) bool { + return s[i].Timestamp < s[j].Timestamp +} + +type ErdaEvents []*exception.Erda_event + +func (s ErdaEvents) Len() int { return len(s) } +func (s ErdaEvents) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s ErdaEvents) Less(i, j int) bool { + return s[i].Timestamp < s[j].Timestamp +} + +func fetchErdaEventFromES(ctx context.Context, Event eventpb.EventQueryServiceServer, req *eventpb.GetEventsRequest) (list []*exception.Erda_event, err error) { + eventsResp, err := Event.GetEvents(ctx, req) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + for _, item := range eventsResp.Data.Items { + erdaEvent := &exception.Erda_event{} + erdaEvent.EventId = item.EventID + erdaEvent.Timestamp = int64(item.TimeUnixNano) + erdaEvent.ErrorId = item.Relations.ResID + erdaEvent.RequestId = item.Attributes["requestId"] + + var stacks []string + json.Unmarshal([]byte(item.Message), &stacks) + erdaEvent.Stacks = stacks + + tagsMap := make(map[string]string) + json.Unmarshal([]byte(item.Attributes["tags"]), &tagsMap) + erdaEvent.Tags = tagsMap + + requestContextMap := make(map[string]string) + json.Unmarshal([]byte(item.Attributes["requestContext"]), &requestContextMap) + erdaEvent.RequestContext = requestContextMap + + requestHeadersMap := make(map[string]string) + json.Unmarshal([]byte(item.Attributes["requestHeaders"]), &requestHeadersMap) + erdaEvent.RequestHeaders = requestHeadersMap + + metaDataMap := make(map[string]string) + json.Unmarshal([]byte(item.Attributes["metaData"]), &metaDataMap) + erdaEvent.MetaData = metaDataMap + + list = append(list, erdaEvent) + } + + return list, nil +} diff --git a/modules/msp/apm/exception/exception.service_test.go b/modules/msp/apm/exception/query/exception.service_test.go similarity index 62% rename from modules/msp/apm/exception/exception.service_test.go rename to modules/msp/apm/exception/query/exception.service_test.go index 7d5759047ee..7ac016176d2 100644 --- a/modules/msp/apm/exception/exception.service_test.go +++ b/modules/msp/apm/exception/query/exception.service_test.go @@ -12,15 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package exception +package query import ( - context "context" - reflect "reflect" - testing "testing" + "context" + "reflect" + "testing" - servicehub "github.com/erda-project/erda-infra/base/servicehub" - pb "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" + "github.com/golang/mock/gomock" + + "github.com/erda-project/erda-infra/base/servicehub" + eventpb "github.com/erda-project/erda-proto-go/core/monitor/event/pb" + "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" + commonPb "github.com/erda-project/erda-proto-go/oap/common/pb" + entitypb "github.com/erda-project/erda-proto-go/oap/entity/pb" + oapPb "github.com/erda-project/erda-proto-go/oap/event/pb" ) func Test_exceptionService_GetExceptions(t *testing.T) { @@ -193,3 +199,73 @@ func Test_exceptionService_GetExceptionEvent(t *testing.T) { }) } } + +//go:generate mockgen -destination=./mock_event_query_grpc.go -package query -source=../../../../../api/proto-go/core/monitor/event/pb/event_query_grpc.pb.go EventQueryServiceServer +//go:generate mockgen -destination=./mock_entity_query_grpc.go -package query -source=../../../../../api/proto-go/oap/entity/pb/entity_grpc.pb.go EntityServiceServer +func TestExceptionService_fetchErdaErrorFromES(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + entityGrpcServer := NewMockEntityServiceServer(ctrl) + exceptionEntity := entitypb.Entity{ + Id: "error_exception/0f82da3be2e1c7070c269471fa7aa4a5", + Type: "error_exception", + Key: "0f82da3be2e1c7070c269471fa7aa4a5", + Values: nil, + Labels: nil, + CreateTimeUnixNano: 1635845334935110000, + UpdateTimeUnixNano: 1635851825720883700, + } + entityList := entitypb.EntityList{ + List: []*entitypb.Entity{&exceptionEntity}, + Total: 1, + } + listEntitiesResponse := entitypb.ListEntitiesResponse{ + Data: &entityList, + } + entityGrpcServer.EXPECT().ListEntities(gomock.Any(), gomock.Any()).Return(&listEntitiesResponse, nil) + + eventGrpcServer := NewMockEventQueryServiceServer(ctrl) + att := make(map[string]string) + att["terminusKey"] = "fc1f8c074e46a9df505a15c1a94d62cc" + spanEvent := oapPb.Event{ + EventID: "335415fe-0c9f-4905-ab7f-434032a5c3ab", + Severity: "", + Name: "exception", + Kind: 0, + TimeUnixNano: 1635845334935610000, + Relations: &commonPb.Relation{ + TraceID: "", + ResID: "0f82da3be2e1c7070c269471fa7aa4a5", + ResType: "exception", + ResourceKeys: nil, + }, + Attributes: att, + Message: "", + } + eventsResult := eventpb.GetEventsResult{ + Items: []*oapPb.Event{&spanEvent}, + } + eventsResponse := eventpb.GetEventsResponse{ + Data: &eventsResult, + } + eventGrpcServer.EXPECT().GetEvents(gomock.Any(), gomock.Any()).Return(&eventsResponse, nil) + + conditions := map[string]string{ + "terminusKey": "fc1f8c074e46a9df505a15c1a94d62cc", + } + entityReq := &entitypb.ListEntitiesRequest{ + Type: "error_exception", + Labels: conditions, + Limit: int64(1000), + } + + items, err := fetchErdaErrorFromES(context.Background(), eventGrpcServer, entityGrpcServer, entityReq, 1635845334935, 1635851825720) + + if err != nil { + t.Errorf("should not throw error") + } + if items == nil || len(items) != 1 { + t.Errorf("assert result failed") + } + +} diff --git a/modules/msp/apm/exception/query/mock_entity_query_grpc.go b/modules/msp/apm/exception/query/mock_entity_query_grpc.go new file mode 100644 index 00000000000..5a730477466 --- /dev/null +++ b/modules/msp/apm/exception/query/mock_entity_query_grpc.go @@ -0,0 +1,200 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ../../../../../api/proto-go/oap/entity/pb/entity_grpc.pb.go + +// Package query is a generated GoMock package. +package query + +import ( + context "context" + reflect "reflect" + + pb "github.com/erda-project/erda-proto-go/oap/entity/pb" + gomock "github.com/golang/mock/gomock" + grpc "google.golang.org/grpc" +) + +// MockEntityServiceClient is a mock of EntityServiceClient interface. +type MockEntityServiceClient struct { + ctrl *gomock.Controller + recorder *MockEntityServiceClientMockRecorder +} + +// MockEntityServiceClientMockRecorder is the mock recorder for MockEntityServiceClient. +type MockEntityServiceClientMockRecorder struct { + mock *MockEntityServiceClient +} + +// NewMockEntityServiceClient creates a new mock instance. +func NewMockEntityServiceClient(ctrl *gomock.Controller) *MockEntityServiceClient { + mock := &MockEntityServiceClient{ctrl: ctrl} + mock.recorder = &MockEntityServiceClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockEntityServiceClient) EXPECT() *MockEntityServiceClientMockRecorder { + return m.recorder +} + +// GetEntity mocks base method. +func (m *MockEntityServiceClient) GetEntity(ctx context.Context, in *pb.GetEntityRequest, opts ...grpc.CallOption) (*pb.GetEntityResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetEntity", varargs...) + ret0, _ := ret[0].(*pb.GetEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetEntity indicates an expected call of GetEntity. +func (mr *MockEntityServiceClientMockRecorder) GetEntity(ctx, in interface{}, opts ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetEntity", reflect.TypeOf((*MockEntityServiceClient)(nil).GetEntity), varargs...) +} + +// ListEntities mocks base method. +func (m *MockEntityServiceClient) ListEntities(ctx context.Context, in *pb.ListEntitiesRequest, opts ...grpc.CallOption) (*pb.ListEntitiesResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "ListEntities", varargs...) + ret0, _ := ret[0].(*pb.ListEntitiesResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListEntities indicates an expected call of ListEntities. +func (mr *MockEntityServiceClientMockRecorder) ListEntities(ctx, in interface{}, opts ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListEntities", reflect.TypeOf((*MockEntityServiceClient)(nil).ListEntities), varargs...) +} + +// RemoveEntity mocks base method. +func (m *MockEntityServiceClient) RemoveEntity(ctx context.Context, in *pb.RemoveEntityRequest, opts ...grpc.CallOption) (*pb.RemoveEntityResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "RemoveEntity", varargs...) + ret0, _ := ret[0].(*pb.RemoveEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RemoveEntity indicates an expected call of RemoveEntity. +func (mr *MockEntityServiceClientMockRecorder) RemoveEntity(ctx, in interface{}, opts ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveEntity", reflect.TypeOf((*MockEntityServiceClient)(nil).RemoveEntity), varargs...) +} + +// SetEntity mocks base method. +func (m *MockEntityServiceClient) SetEntity(ctx context.Context, in *pb.SetEntityRequest, opts ...grpc.CallOption) (*pb.SetEntityResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "SetEntity", varargs...) + ret0, _ := ret[0].(*pb.SetEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// SetEntity indicates an expected call of SetEntity. +func (mr *MockEntityServiceClientMockRecorder) SetEntity(ctx, in interface{}, opts ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetEntity", reflect.TypeOf((*MockEntityServiceClient)(nil).SetEntity), varargs...) +} + +// MockEntityServiceServer is a mock of EntityServiceServer interface. +type MockEntityServiceServer struct { + ctrl *gomock.Controller + recorder *MockEntityServiceServerMockRecorder +} + +// MockEntityServiceServerMockRecorder is the mock recorder for MockEntityServiceServer. +type MockEntityServiceServerMockRecorder struct { + mock *MockEntityServiceServer +} + +// NewMockEntityServiceServer creates a new mock instance. +func NewMockEntityServiceServer(ctrl *gomock.Controller) *MockEntityServiceServer { + mock := &MockEntityServiceServer{ctrl: ctrl} + mock.recorder = &MockEntityServiceServerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockEntityServiceServer) EXPECT() *MockEntityServiceServerMockRecorder { + return m.recorder +} + +// GetEntity mocks base method. +func (m *MockEntityServiceServer) GetEntity(arg0 context.Context, arg1 *pb.GetEntityRequest) (*pb.GetEntityResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetEntity", arg0, arg1) + ret0, _ := ret[0].(*pb.GetEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetEntity indicates an expected call of GetEntity. +func (mr *MockEntityServiceServerMockRecorder) GetEntity(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetEntity", reflect.TypeOf((*MockEntityServiceServer)(nil).GetEntity), arg0, arg1) +} + +// ListEntities mocks base method. +func (m *MockEntityServiceServer) ListEntities(arg0 context.Context, arg1 *pb.ListEntitiesRequest) (*pb.ListEntitiesResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListEntities", arg0, arg1) + ret0, _ := ret[0].(*pb.ListEntitiesResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListEntities indicates an expected call of ListEntities. +func (mr *MockEntityServiceServerMockRecorder) ListEntities(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListEntities", reflect.TypeOf((*MockEntityServiceServer)(nil).ListEntities), arg0, arg1) +} + +// RemoveEntity mocks base method. +func (m *MockEntityServiceServer) RemoveEntity(arg0 context.Context, arg1 *pb.RemoveEntityRequest) (*pb.RemoveEntityResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemoveEntity", arg0, arg1) + ret0, _ := ret[0].(*pb.RemoveEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RemoveEntity indicates an expected call of RemoveEntity. +func (mr *MockEntityServiceServerMockRecorder) RemoveEntity(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveEntity", reflect.TypeOf((*MockEntityServiceServer)(nil).RemoveEntity), arg0, arg1) +} + +// SetEntity mocks base method. +func (m *MockEntityServiceServer) SetEntity(arg0 context.Context, arg1 *pb.SetEntityRequest) (*pb.SetEntityResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetEntity", arg0, arg1) + ret0, _ := ret[0].(*pb.SetEntityResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// SetEntity indicates an expected call of SetEntity. +func (mr *MockEntityServiceServerMockRecorder) SetEntity(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetEntity", reflect.TypeOf((*MockEntityServiceServer)(nil).SetEntity), arg0, arg1) +} diff --git a/modules/msp/apm/exception/query/mock_event_query_grpc.go b/modules/msp/apm/exception/query/mock_event_query_grpc.go new file mode 100644 index 00000000000..3b2d4eae5eb --- /dev/null +++ b/modules/msp/apm/exception/query/mock_event_query_grpc.go @@ -0,0 +1,95 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ../../../../../api/proto-go/core/monitor/event/pb/event_query_grpc.pb.go + +// Package query is a generated GoMock package. +package query + +import ( + context "context" + reflect "reflect" + + pb "github.com/erda-project/erda-proto-go/core/monitor/event/pb" + gomock "github.com/golang/mock/gomock" + grpc "google.golang.org/grpc" +) + +// MockEventQueryServiceClient is a mock of EventQueryServiceClient interface. +type MockEventQueryServiceClient struct { + ctrl *gomock.Controller + recorder *MockEventQueryServiceClientMockRecorder +} + +// MockEventQueryServiceClientMockRecorder is the mock recorder for MockEventQueryServiceClient. +type MockEventQueryServiceClientMockRecorder struct { + mock *MockEventQueryServiceClient +} + +// NewMockEventQueryServiceClient creates a new mock instance. +func NewMockEventQueryServiceClient(ctrl *gomock.Controller) *MockEventQueryServiceClient { + mock := &MockEventQueryServiceClient{ctrl: ctrl} + mock.recorder = &MockEventQueryServiceClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockEventQueryServiceClient) EXPECT() *MockEventQueryServiceClientMockRecorder { + return m.recorder +} + +// GetEvents mocks base method. +func (m *MockEventQueryServiceClient) GetEvents(ctx context.Context, in *pb.GetEventsRequest, opts ...grpc.CallOption) (*pb.GetEventsResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetEvents", varargs...) + ret0, _ := ret[0].(*pb.GetEventsResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetEvents indicates an expected call of GetEvents. +func (mr *MockEventQueryServiceClientMockRecorder) GetEvents(ctx, in interface{}, opts ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetEvents", reflect.TypeOf((*MockEventQueryServiceClient)(nil).GetEvents), varargs...) +} + +// MockEventQueryServiceServer is a mock of EventQueryServiceServer interface. +type MockEventQueryServiceServer struct { + ctrl *gomock.Controller + recorder *MockEventQueryServiceServerMockRecorder +} + +// MockEventQueryServiceServerMockRecorder is the mock recorder for MockEventQueryServiceServer. +type MockEventQueryServiceServerMockRecorder struct { + mock *MockEventQueryServiceServer +} + +// NewMockEventQueryServiceServer creates a new mock instance. +func NewMockEventQueryServiceServer(ctrl *gomock.Controller) *MockEventQueryServiceServer { + mock := &MockEventQueryServiceServer{ctrl: ctrl} + mock.recorder = &MockEventQueryServiceServerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockEventQueryServiceServer) EXPECT() *MockEventQueryServiceServerMockRecorder { + return m.recorder +} + +// GetEvents mocks base method. +func (m *MockEventQueryServiceServer) GetEvents(arg0 context.Context, arg1 *pb.GetEventsRequest) (*pb.GetEventsResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetEvents", arg0, arg1) + ret0, _ := ret[0].(*pb.GetEventsResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetEvents indicates an expected call of GetEvents. +func (mr *MockEventQueryServiceServerMockRecorder) GetEvents(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetEvents", reflect.TypeOf((*MockEventQueryServiceServer)(nil).GetEvents), arg0, arg1) +} diff --git a/modules/msp/apm/exception/provider.go b/modules/msp/apm/exception/query/provider.go similarity index 65% rename from modules/msp/apm/exception/provider.go rename to modules/msp/apm/exception/query/provider.go index 1d9d24234bf..95ce0963175 100644 --- a/modules/msp/apm/exception/provider.go +++ b/modules/msp/apm/exception/query/provider.go @@ -12,21 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -package exception +package query import ( "fmt" - logs "github.com/erda-project/erda-infra/base/logs" - servicehub "github.com/erda-project/erda-infra/base/servicehub" - transport "github.com/erda-project/erda-infra/pkg/transport" + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/pkg/transport" "github.com/erda-project/erda-infra/providers/cassandra" - pb "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" + eventpb "github.com/erda-project/erda-proto-go/core/monitor/event/pb" + metricpb "github.com/erda-project/erda-proto-go/core/monitor/metric/pb" + "github.com/erda-project/erda-proto-go/msp/apm/exception/pb" + entitypb "github.com/erda-project/erda-proto-go/oap/entity/pb" "github.com/erda-project/erda/pkg/common/apis" ) type config struct { - Cassandra cassandra.SessionConfig `file:"cassandra"` + Cassandra cassandra.SessionConfig `file:"cassandra"` + QuerySource string `file:"query_source"` } // +provider @@ -37,6 +41,9 @@ type provider struct { Cassandra cassandra.Interface `autowired:"cassandra"` exceptionService *exceptionService cassandraSession *cassandra.Session + Metric metricpb.MetricServiceServer `autowired:"erda.core.monitor.metric.MetricService"` + Entity entitypb.EntityServiceServer `autowired:"erda.oap.entity.EntityService"` + Event eventpb.EventQueryServiceServer `autowired:"erda.core.monitor.event.EventQueryService"` } func (p *provider) Init(ctx servicehub.Context) error { @@ -45,7 +52,12 @@ func (p *provider) Init(ctx servicehub.Context) error { return fmt.Errorf("fail to create cassandra session: %s", err) } p.cassandraSession = session - p.exceptionService = &exceptionService{p} + p.exceptionService = &exceptionService{ + p: p, + Metric: p.Metric, + Entity: p.Entity, + Event: p.Event, + } if p.Register != nil { pb.RegisterExceptionServiceImp(p.Register, p.exceptionService, apis.Options()) } @@ -61,7 +73,7 @@ func (p *provider) Provide(ctx servicehub.DependencyContext, args ...interface{} } func init() { - servicehub.Register("erda.msp.apm.exception", &servicehub.Spec{ + servicehub.Register("erda.msp.apm.exception.query", &servicehub.Spec{ Services: pb.ServiceNames(), Types: pb.Types(), OptionalDependencies: []string{"service-register"}, diff --git a/modules/msp/apm/trace/persist/consume.go b/modules/msp/apm/trace/persist/consume.go new file mode 100644 index 00000000000..b5890b5210a --- /dev/null +++ b/modules/msp/apm/trace/persist/consume.go @@ -0,0 +1,195 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "time" + + oap "github.com/erda-project/erda-proto-go/oap/trace/pb" + metrics "github.com/erda-project/erda/modules/core/monitor/metric" + "github.com/erda-project/erda/modules/msp/apm/trace" +) + +func (p *provider) decodeSpotSpan(key, value []byte, topic *string, timestamp time.Time) (interface{}, error) { + data := &metrics.Metric{} + if err := json.Unmarshal(value, data); err != nil { + p.stats.DecodeError(value, err) + if p.Cfg.PrintInvalidSpan { + p.Log.Warnf("unknown format spot span data: %s", string(value)) + } else { + p.Log.Warnf("failed to decode spot span: %v", err) + } + return nil, err + } + + span, _ := metricToSpan(data) + + if err := p.validator.Validate(span); err != nil { + p.stats.ValidateError(span) + if p.Cfg.PrintInvalidSpan { + p.Log.Warnf("invalid spot span data: %s", string(value)) + } else { + p.Log.Warnf("invalid spot span: %v", err) + } + return nil, err + } + if err := p.metadata.Process(span); err != nil { + p.stats.MetadataError(span, err) + p.Log.Errorf("failed to process spot span metadata: %v", err) + } + return span, nil +} + +func (p *provider) decodeOapSpan(key, value []byte, topic *string, timestamp time.Time) (interface{}, error) { + data := &oap.Span{} + if err := json.Unmarshal(value, data); err != nil { + p.stats.DecodeError(value, err) + if p.Cfg.PrintInvalidSpan { + p.Log.Warnf("unknown format oap span data: %s", string(value)) + } else { + p.Log.Warnf("failed to decode oap span: %v", err) + } + return nil, err + } + + span := &trace.Span{ + OperationName: data.Name, + StartTime: int64(data.StartTimeUnixNano), + EndTime: int64(data.EndTimeUnixNano), + TraceId: data.TraceID, + SpanId: data.SpanID, + ParentSpanId: data.ParentSpanID, + Tags: data.Attributes, + } + + if err := p.validator.Validate(span); err != nil { + p.stats.ValidateError(span) + if p.Cfg.PrintInvalidSpan { + p.Log.Warnf("invalid oap span data: %s", string(value)) + } else { + p.Log.Warnf("invalid oap span: %v", err) + } + return nil, err + } + if err := p.metadata.Process(span); err != nil { + p.stats.MetadataError(span, err) + p.Log.Errorf("failed to process oap span metadata: %v", err) + } + return span, nil +} + +func (p *provider) handleReadError(err error) error { + p.Log.Errorf("failed to read spans from kafka: %s", err) + return nil // return nil to continue read +} + +func (p *provider) handleWriteError(list []interface{}, err error) error { + p.Log.Errorf("failed to write into storage: %s", err) + return nil // return nil to continue consume +} + +func (p *provider) confirmErrorHandler(err error) error { + p.Log.Errorf("failed to confirm span from kafka: %s", err) + return err // return error to exit +} + +// metricToSpan . +func metricToSpan(metric *metrics.Metric) (*trace.Span, error) { + var span trace.Span + span.Tags = metric.Tags + + traceID, ok := metric.Tags["trace_id"] + if !ok { + return nil, errors.New("trace_id cannot be null") + } + span.TraceId = traceID + + spanID, ok := metric.Tags["span_id"] + if !ok { + return nil, errors.New("span_id cannot be null") + } + span.SpanId = spanID + + parentSpanID, _ := metric.Tags["parent_span_id"] + span.ParentSpanId = parentSpanID + + opName, ok := metric.Tags["operation_name"] + if !ok { + return nil, errors.New("operation_name cannot be null") + } + span.OperationName = opName + + value, ok := metric.Fields["start_time"] + if !ok { + return nil, errors.New("start_time cannot be null") + } + startTime, err := toInt64(value) + if err != nil { + return nil, fmt.Errorf("invalid start_time: %s", value) + } + span.StartTime = startTime + + value, ok = metric.Fields["end_time"] + if !ok { + return nil, errors.New("end_time cannot be null") + } + endTime, err := toInt64(value) + if err != nil { + return nil, fmt.Errorf("invalid end_time: %s", value) + } + span.EndTime = endTime + return &span, nil +} + +// toInt64 . +func toInt64(obj interface{}) (int64, error) { + switch val := obj.(type) { + case int: + return int64(val), nil + case int8: + return int64(val), nil + case int16: + return int64(val), nil + case int32: + return int64(val), nil + case int64: + return val, nil + case uint: + return int64(val), nil + case uint8: + return int64(val), nil + case uint16: + return int64(val), nil + case uint32: + return int64(val), nil + case uint64: + return int64(val), nil + case float32: + return int64(val), nil + case float64: + return int64(val), nil + case string: + v, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return 0, err + } + return v, nil + } + return 0, fmt.Errorf("invalid type") +} diff --git a/modules/msp/apm/trace/persist/metadata.go b/modules/msp/apm/trace/persist/metadata.go new file mode 100644 index 00000000000..66f23f46ffb --- /dev/null +++ b/modules/msp/apm/trace/persist/metadata.go @@ -0,0 +1,35 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/erda-project/erda/modules/msp/apm/trace" +) + +// MetadataProcessor . +type MetadataProcessor interface { + Process(data *trace.Span) error +} + +func newMetadataProcessor(cfg *config) MetadataProcessor { + return NopMetadataProcessor +} + +type nopMetadataProcessor struct{} + +func (*nopMetadataProcessor) Process(data *trace.Span) error { return nil } + +// NopMetadataProcessor . +var NopMetadataProcessor MetadataProcessor = &nopMetadataProcessor{} diff --git a/modules/msp/apm/trace/persist/provider.go b/modules/msp/apm/trace/persist/provider.go new file mode 100644 index 00000000000..20034a89b4b --- /dev/null +++ b/modules/msp/apm/trace/persist/provider.go @@ -0,0 +1,125 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "context" + "fmt" + "time" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/kafka" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +type ( + config struct { + SpotInput kafka.BatchReaderConfig `file:"spot_input"` + OapInput kafka.BatchReaderConfig `file:"oap_input"` + Parallelism int `file:"parallelism" default:"1"` + BufferSize int `file:"buffer_size" default:"1024"` + ReadTimeout time.Duration `file:"read_timeout" default:"5s"` + IDKeys []string `file:"id_keys"` + PrintInvalidSpan bool `file:"print_invalid_span" default:"false"` + } + provider struct { + Cfg *config + Log logs.Logger + Kafka kafka.Interface `autowired:"kafka"` + StorageWriter storage.Storage `autowired:"span-storage-writer"` + + storage storage.Storage + stats Statistics + validator Validator + metadata MetadataProcessor + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + + p.validator = newValidator(p.Cfg) + if runner, ok := p.validator.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("span validator")) + } + + p.metadata = newMetadataProcessor(p.Cfg) + if runner, ok := p.metadata.(servicehub.ProviderRunnerWithContext); ok { + ctx.AddTask(runner.Run, servicehub.WithTaskName("span metadata processor")) + } + + p.stats = sharedStatistics + + // add consumer task + for i := 0; i < p.Cfg.Parallelism; i++ { + //spot + ctx.AddTask(func(ctx context.Context) error { + r, err := p.Kafka.NewBatchReader(&p.Cfg.SpotInput, kafka.WithReaderDecoder(p.decodeSpotSpan)) + if err != nil { + return err + } + defer r.Close() + + w, err := p.StorageWriter.NewWriter(ctx) + if err != nil { + return err + } + defer w.Close() + return storekit.BatchConsume(ctx, r, w, &storekit.BatchConsumeOptions{ + BufferSize: p.Cfg.BufferSize, + ReadTimeout: p.Cfg.ReadTimeout, + ReadErrorHandler: p.handleReadError, + WriteErrorHandler: p.handleWriteError, + ConfirmErrorHandler: p.confirmErrorHandler, + Statistics: p.stats, + }) + }, servicehub.WithTaskName(fmt.Sprintf("spotspan-consumer(%d)", i))) + + //oap + ctx.AddTask(func(ctx context.Context) error { + r, err := p.Kafka.NewBatchReader(&p.Cfg.OapInput, kafka.WithReaderDecoder(p.decodeOapSpan)) + if err != nil { + return err + } + defer r.Close() + + w, err := p.StorageWriter.NewWriter(ctx) + if err != nil { + return err + } + defer w.Close() + return storekit.BatchConsume(ctx, r, w, &storekit.BatchConsumeOptions{ + BufferSize: p.Cfg.BufferSize, + ReadTimeout: p.Cfg.ReadTimeout, + ReadErrorHandler: p.handleReadError, + WriteErrorHandler: p.handleWriteError, + ConfirmErrorHandler: p.confirmErrorHandler, + Statistics: p.stats, + }) + }, servicehub.WithTaskName(fmt.Sprintf("oapspan-consumer(%d)", i))) + } + return nil +} + +func init() { + servicehub.Register("span-persist", &servicehub.Spec{ + Dependencies: []string{"kafka.topic.initializer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { + return &provider{} + }, + }) +} diff --git a/modules/msp/apm/trace/persist/statistics.go b/modules/msp/apm/trace/persist/statistics.go new file mode 100644 index 00000000000..3ad12374e00 --- /dev/null +++ b/modules/msp/apm/trace/persist/statistics.go @@ -0,0 +1,163 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/trace" +) + +// Statistics . +type Statistics interface { + storekit.ConsumeStatistics + + DecodeError(value []byte, err error) + ValidateError(data *trace.Span) + MetadataError(data *trace.Span, err error) +} + +type statistics struct { + readErrors prometheus.Counter + readBytes *prometheus.CounterVec + writeErrors *prometheus.CounterVec + confirmErrors *prometheus.CounterVec + success *prometheus.CounterVec + + decodeErrors prometheus.Counter + validateErrors *prometheus.CounterVec + metadataError *prometheus.CounterVec +} + +var sharedStatistics = newStatistics() + +func newStatistics() Statistics { + const subSystem = "span_persist" + s := &statistics{ + readErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "read_errors", + Subsystem: subSystem, + }, + ), + readBytes: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "read_bytes", + Subsystem: subSystem, + }, distinguishingKeys, + ), + writeErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "write_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + confirmErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "confirm_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + success: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "success", + Subsystem: subSystem, + }, distinguishingKeys, + ), + decodeErrors: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "decode_errors", + Subsystem: subSystem, + }, + ), + validateErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "validate_errors", + Subsystem: subSystem, + }, distinguishingKeys, + ), + } + + // only register once + prometheus.MustRegister( + s.readErrors, + s.readBytes, + s.writeErrors, + s.confirmErrors, + s.success, + s.decodeErrors, + s.validateErrors, + ) + return s +} + +func (s *statistics) ReadError(err error) { + s.readErrors.Inc() +} + +func (s *statistics) DecodeError(value []byte, err error) { + s.decodeErrors.Inc() +} + +func (s *statistics) WriteError(list []interface{}, err error) { + for _, item := range list { + s.writeErrors.WithLabelValues(getStatisticsLabels(item.(*trace.Span))...).Inc() + } +} + +func (s *statistics) ConfirmError(list []interface{}, err error) { + for _, item := range list { + s.confirmErrors.WithLabelValues(getStatisticsLabels(item.(*trace.Span))...).Inc() + } +} + +func (s *statistics) Success(list []interface{}) { + for _, item := range list { + s.success.WithLabelValues(getStatisticsLabels(item.(*trace.Span))...).Inc() + } +} + +func (s *statistics) ValidateError(data *trace.Span) { + s.validateErrors.WithLabelValues(getStatisticsLabels(data)...).Inc() +} + +func (*statistics) MetadataError(data *trace.Span, err error) {} + +var distinguishingKeys = []string{ + "span_kind", "span_layer", + "org_name", "cluster_name", + "scope", "scope_id", +} + +func getStatisticsLabels(data *trace.Span) []string { + var scope, scopeID string + + if app, ok := data.Tags["application_name"]; ok { + scope = "app" + if project, ok := data.Tags["project_name"]; ok { + scopeID = project + "/" + app + } else { + scopeID = app + } + } + return []string{ + data.Tags["span_kind"], + data.Tags["span_layer"], + data.Tags["org_name"], + data.Tags["cluster_name"], + scope, scopeID, + } +} diff --git a/modules/msp/apm/trace/persist/validate.go b/modules/msp/apm/trace/persist/validate.go new file mode 100644 index 00000000000..85ef2e17be8 --- /dev/null +++ b/modules/msp/apm/trace/persist/validate.go @@ -0,0 +1,56 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package persist + +import ( + "errors" + + "github.com/erda-project/erda/bundle" + "github.com/erda-project/erda/modules/msp/apm/trace" +) + +// Validator . +type Validator interface { + Validate(s *trace.Span) error +} + +type nopValidator struct{} + +func (*nopValidator) Validate(*trace.Span) error { return nil } + +// NopValidator . +var NopValidator Validator = &nopValidator{} + +func newValidator(cfg *config) Validator { + return &validator{ + bdl: bundle.New(bundle.WithCoreServices(), bundle.WithDOP()), + } +} + +type validator struct { + bdl *bundle.Bundle +} + +var ( + // ErrIDEmpty . + ErrIDEmpty = errors.New("id empty") +) + +func (v *validator) Validate(s *trace.Span) error { + if len(s.TraceId) <= 0 { + return ErrIDEmpty + } + return nil +} diff --git a/modules/msp/apm/trace/query/mock_storage.go b/modules/msp/apm/trace/query/mock_storage.go new file mode 100644 index 00000000000..ab1429aa743 --- /dev/null +++ b/modules/msp/apm/trace/query/mock_storage.go @@ -0,0 +1,81 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ../storage/storage.go + +// Package query is a generated GoMock package. +package query + +import ( + context "context" + reflect "reflect" + + storekit "github.com/erda-project/erda/modules/core/monitor/storekit" + storage "github.com/erda-project/erda/modules/msp/apm/trace/storage" + gomock "github.com/golang/mock/gomock" +) + +// MockStorage is a mock of Storage interface. +type MockStorage struct { + ctrl *gomock.Controller + recorder *MockStorageMockRecorder +} + +// MockStorageMockRecorder is the mock recorder for MockStorage. +type MockStorageMockRecorder struct { + mock *MockStorage +} + +// NewMockStorage creates a new mock instance. +func NewMockStorage(ctrl *gomock.Controller) *MockStorage { + mock := &MockStorage{ctrl: ctrl} + mock.recorder = &MockStorageMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockStorage) EXPECT() *MockStorageMockRecorder { + return m.recorder +} + +// Count mocks base method. +func (m *MockStorage) Count(ctx context.Context, traceId string) int64 { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Count", ctx, traceId) + ret0, _ := ret[0].(int64) + return ret0 +} + +// Count indicates an expected call of Count. +func (mr *MockStorageMockRecorder) Count(ctx, traceId interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Count", reflect.TypeOf((*MockStorage)(nil).Count), ctx, traceId) +} + +// Iterator mocks base method. +func (m *MockStorage) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Iterator", ctx, sel) + ret0, _ := ret[0].(storekit.Iterator) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Iterator indicates an expected call of Iterator. +func (mr *MockStorageMockRecorder) Iterator(ctx, sel interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Iterator", reflect.TypeOf((*MockStorage)(nil).Iterator), ctx, sel) +} + +// NewWriter mocks base method. +func (m *MockStorage) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "NewWriter", ctx) + ret0, _ := ret[0].(storekit.BatchWriter) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// NewWriter indicates an expected call of NewWriter. +func (mr *MockStorageMockRecorder) NewWriter(ctx interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewWriter", reflect.TypeOf((*MockStorage)(nil).NewWriter), ctx) +} diff --git a/modules/msp/apm/trace/provider.go b/modules/msp/apm/trace/query/provider.go similarity index 87% rename from modules/msp/apm/trace/provider.go rename to modules/msp/apm/trace/query/provider.go index 6f78398c9c5..a889a70556c 100644 --- a/modules/msp/apm/trace/provider.go +++ b/modules/msp/apm/trace/query/provider.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package trace +package query import ( "fmt" @@ -27,11 +27,13 @@ import ( metricpb "github.com/erda-project/erda-proto-go/core/monitor/metric/pb" "github.com/erda-project/erda-proto-go/msp/apm/trace/pb" "github.com/erda-project/erda/modules/msp/apm/trace/db" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" "github.com/erda-project/erda/pkg/common/apis" ) type config struct { - Cassandra cassandra.SessionConfig `file:"cassandra"` + Cassandra cassandra.SessionConfig `file:"cassandra"` + QuerySource string `file:"query_source"` } // +provider @@ -44,6 +46,7 @@ type provider struct { Metric metricpb.MetricServiceServer `autowired:"erda.core.monitor.metric.MetricService"` DB *gorm.DB `autowired:"mysql-client"` Cassandra cassandra.Interface `autowired:"cassandra"` + StorageReader storage.Storage `autowired:"span-storage-elasticsearch-reader"` cassandraSession *cassandra.Session } @@ -59,6 +62,7 @@ func (p *provider) Init(ctx servicehub.Context) error { p: p, i18n: p.I18n, traceRequestHistoryDB: &db.TraceRequestHistoryDB{DB: p.DB}, + StorageReader: p.StorageReader, } if p.Register != nil { pb.RegisterTraceServiceImp(p.Register, p.traceService, apis.Options()) @@ -75,7 +79,7 @@ func (p *provider) Provide(ctx servicehub.DependencyContext, args ...interface{} } func init() { - servicehub.Register("erda.msp.apm.trace", &servicehub.Spec{ + servicehub.Register("erda.msp.apm.trace.query", &servicehub.Spec{ Services: pb.ServiceNames(), Types: pb.Types(), OptionalDependencies: []string{"service-register"}, diff --git a/modules/msp/apm/trace/query/span_list_storage.go b/modules/msp/apm/trace/query/span_list_storage.go new file mode 100644 index 00000000000..41e1c4d0b9f --- /dev/null +++ b/modules/msp/apm/trace/query/span_list_storage.go @@ -0,0 +1,110 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/trace" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +// Data . +type Data = interface{} + +// ListIterator . +type ListIterator struct { + list []Data + i int + data Data +} + +// NewListIterator . +func NewListIterator(list ...Data) storekit.Iterator { + return &ListIterator{list: list, i: -1} +} + +// First . +func (it *ListIterator) First() bool { + if len(it.list) <= 0 { + return false + } + it.i = 0 + it.data = it.list[it.i] + return true +} + +// Last . +func (it *ListIterator) Last() bool { + if len(it.list) <= 0 { + return false + } + it.i = len(it.list) - 1 + it.data = it.list[it.i] + return true + +} + +// Next . +func (it *ListIterator) Next() bool { + if it.i < 0 { + return it.First() + } + if it.i >= len(it.list)-1 { + return false + } + it.i++ + it.data = it.list[it.i] + return true +} + +// Prev . +func (it *ListIterator) Prev() bool { + if it.i < 0 { + return it.Last() + } + if it.i <= 0 { + return false + } + it.i-- + it.data = it.list[it.i] + return true +} + +// Value . +func (it *ListIterator) Value() Data { return it.data } + +// Error . +func (it *ListIterator) Error() error { return nil } + +// Close . +func (it *ListIterator) Close() error { return nil } + +type listStorage struct { + span *trace.Span +} + +func (s *listStorage) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + return storekit.DefaultNopWriter, nil +} + +func (s *listStorage) Count(ctx context.Context, traceId string) int64 { + return int64(1) +} + +func (s *listStorage) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + return NewListIterator(s.span), nil +} diff --git a/modules/msp/apm/trace/trace.service.go b/modules/msp/apm/trace/query/trace.service.go similarity index 88% rename from modules/msp/apm/trace/trace.service.go rename to modules/msp/apm/trace/query/trace.service.go index b4d352ee60c..c5445654670 100644 --- a/modules/msp/apm/trace/trace.service.go +++ b/modules/msp/apm/trace/query/trace.service.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package trace +package query import ( "bytes" @@ -24,11 +24,13 @@ import ( "math" "net/http" "net/url" + "sort" "strconv" "strings" "time" "github.com/asaskevich/govalidator" + "github.com/gocql/gocql" uuid "github.com/satori/go.uuid" "google.golang.org/protobuf/types/known/structpb" @@ -36,19 +38,21 @@ import ( "github.com/erda-project/erda-infra/providers/i18n" metricpb "github.com/erda-project/erda-proto-go/core/monitor/metric/pb" "github.com/erda-project/erda-proto-go/msp/apm/trace/pb" + "github.com/erda-project/erda/modules/msp/apm/trace" "github.com/erda-project/erda/modules/msp/apm/trace/core/common" "github.com/erda-project/erda/modules/msp/apm/trace/core/debug" "github.com/erda-project/erda/modules/msp/apm/trace/core/query" "github.com/erda-project/erda/modules/msp/apm/trace/db" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" "github.com/erda-project/erda/pkg/common/apis" "github.com/erda-project/erda/pkg/common/errors" - mathpkg "github.com/erda-project/erda/pkg/math" ) type traceService struct { p *provider i18n i18n.Translator traceRequestHistoryDB *db.TraceRequestHistoryDB + StorageReader storage.Storage } var EventFieldSet = set.NewSet("error", "stack", "event", "message", "error_kind", "error_object") @@ -78,8 +82,52 @@ func (s *traceService) GetSpans(ctx context.Context, req *pb.GetSpansRequest) (* if req.Limit <= 0 || req.Limit > 10000 { req.Limit = 10000 } - iter := s.p.cassandraSession.Session().Query("SELECT * FROM spans WHERE trace_id = ? limit ?", req.TraceID, req.Limit).Iter() spanTree := make(query.SpanTree) + var spans []*pb.Span + + if strings.Contains(s.p.Cfg.QuerySource, "cassandra") { + // do cassandra query + cassandraSpans := s.fetchSpanFromCassandra(s.p.cassandraSession.Session(), req.TraceID, req.Limit) + for _, span := range cassandraSpans { + spans = append(spans, span) + } + } + + if strings.Contains(s.p.Cfg.QuerySource, "elasticsearch") { + // do es query + elasticsearchSpans, _ := fetchSpanFromES(ctx, s.StorageReader, storage.Selector{ + TraceId: req.TraceID, + }, true, int(req.GetLimit())) + for _, value := range elasticsearchSpans { + var span pb.Span + span.Id = value.SpanId + span.TraceId = value.TraceId + span.OperationName = value.OperationName + span.ParentSpanId = value.ParentSpanId + span.StartTime = value.StartTime + span.EndTime = value.EndTime + span.Tags = value.Tags + spans = append(spans, &span) + } + } + + sort.Sort(Spans(spans)) + for _, span := range spans { + if len(spanTree) >= int(req.GetLimit()) { + break + } + spanTree[span.Id] = span + } + + response, err := s.handleSpanResponse(spanTree) + if err != nil { + return nil, errors.NewInternalServerError(err) + } + return response, nil +} +func (s *traceService) fetchSpanFromCassandra(session *gocql.Session, traceId string, limit int64) []*pb.Span { + iter := session.Query("SELECT * FROM spans WHERE trace_id = ? limit ?", traceId, limit).Iter() + var items []*pb.Span for { row := make(map[string]interface{}) if !iter.MapScan(row) { @@ -93,14 +141,54 @@ func (s *traceService) GetSpans(ctx context.Context, req *pb.GetSpansRequest) (* span.StartTime = row["start_time"].(int64) span.EndTime = row["end_time"].(int64) span.Tags = row["tags"].(map[string]string) - spanTree[span.Id] = &span + items = append(items, &span) } + return items +} - response, err := s.handleSpanResponse(spanTree) +func fetchSpanFromES(ctx context.Context, storage storage.Storage, sel storage.Selector, forward bool, limit int) (list []*trace.Span, err error) { + it, err := storage.Iterator(ctx, &sel) if err != nil { return nil, errors.NewInternalServerError(err) } - return response, nil + defer it.Close() + + if forward { + for it.Next() { + span, ok := it.Value().(*trace.Span) + if !ok { + continue + } + list = append(list, span) + if len(list) >= limit { + break + } + } + } else { + for it.Prev() { + span, ok := it.Value().(*trace.Span) + if !ok { + continue + } + list = append(list, span) + if len(list) >= limit { + break + } + } + } + if it.Error() != nil { + return nil, errors.NewInternalServerError(err) + } + + return list, it.Error() +} + +type Spans []*pb.Span + +func (s Spans) Len() int { return len(s) } +func (s Spans) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s Spans) Less(i, j int) bool { + return s[i].StartTime < s[j].StartTime } func getSpanProcessAnalysisDashboard(metricType string) string { @@ -229,21 +317,28 @@ func (s *traceService) handleSpanResponse(spanTree query.SpanTree) (*pb.GetSpans if traceEndTime == 0 || traceEndTime < span.EndTime { traceEndTime = span.EndTime } - span.Duration = mathpkg.AbsInt64(span.EndTime - span.StartTime) - span.SelfDuration = mathpkg.AbsInt64(span.Duration - childSpanDuration(id, spanTree)) + span.Duration = positiveInt64(span.EndTime - span.StartTime) + span.SelfDuration = positiveInt64(span.Duration - childSpanDuration(id, spanTree)) spans = append(spans, span) } serviceCount := int64(len(services)) - return &pb.GetSpansResponse{Spans: spans, ServiceCount: serviceCount, Depth: depth, Duration: mathpkg.AbsInt64(traceEndTime - traceStartTime), SpanCount: spanCount}, nil + return &pb.GetSpansResponse{Spans: spans, ServiceCount: serviceCount, Depth: depth, Duration: positiveInt64(traceEndTime - traceStartTime), SpanCount: spanCount}, nil +} + +func positiveInt64(v int64) int64 { + if v > 0 { + return v + } + return 0 } func childSpanDuration(id string, spanTree query.SpanTree) int64 { duration := int64(0) for _, span := range spanTree { if span.ParentSpanId == id { - duration += span.EndTime - span.StartTime + duration += positiveInt64(span.EndTime - span.StartTime) } } return duration @@ -261,9 +356,19 @@ func calculateDepth(depth int64, span *pb.Span, spanTree query.SpanTree) int64 { } func (s *traceService) GetSpanCount(ctx context.Context, traceID string) (int64, error) { - count := 0 - s.p.cassandraSession.Session().Query("SELECT COUNT(trace_id) FROM spans WHERE trace_id = ?", traceID).Iter().Scan(&count) - return int64(count), nil + var cassandraCount, elasticsearchCount int64 + + if strings.Contains(s.p.Cfg.QuerySource, "cassandra") { + // do cassandra query + s.p.cassandraSession.Session().Query("SELECT COUNT(trace_id) FROM spans WHERE trace_id = ?", traceID).Iter().Scan(&cassandraCount) + } + + if strings.Contains(s.p.Cfg.QuerySource, "elasticsearch") { + // do cassandra query + elasticsearchCount = s.StorageReader.Count(ctx, traceID) + } + + return cassandraCount + elasticsearchCount, nil } func (s *traceService) GetTraces(ctx context.Context, req *pb.GetTracesRequest) (*pb.GetTracesResponse, error) { diff --git a/modules/msp/apm/trace/trace.service_test.go b/modules/msp/apm/trace/query/trace.service_test.go similarity index 96% rename from modules/msp/apm/trace/trace.service_test.go rename to modules/msp/apm/trace/query/trace.service_test.go index b3f5926389f..de43e777ffa 100644 --- a/modules/msp/apm/trace/trace.service_test.go +++ b/modules/msp/apm/trace/query/trace.service_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package trace +package query import ( "context" @@ -30,12 +30,15 @@ import ( "github.com/erda-project/erda-infra/providers/i18n" metricpb "github.com/erda-project/erda-proto-go/core/monitor/metric/pb" "github.com/erda-project/erda-proto-go/msp/apm/trace/pb" + "github.com/erda-project/erda/modules/msp/apm/trace" "github.com/erda-project/erda/modules/msp/apm/trace/core/common" "github.com/erda-project/erda/modules/msp/apm/trace/core/debug" "github.com/erda-project/erda/modules/msp/apm/trace/core/query" "github.com/erda-project/erda/modules/msp/apm/trace/db" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" ) +//go:generate mockgen -destination=./mock_storage.go -package query -source=../storage/storage.go Storage func Test_traceService_GetSpans(t *testing.T) { type args struct { ctx context.Context @@ -93,6 +96,50 @@ func Test_traceService_GetSpans(t *testing.T) { } } +func Test_traceService_fetchSpanFromES(t *testing.T) { + s1 := &trace.Span{ + TraceId: "s1TraceId", + SpanId: "s1SpanId", + ParentSpanId: "s1ParentSpanId", + OperationName: "s1OperationName", + StartTime: 1, + EndTime: 1, + Tags: map[string]string{"tagk.s1a": "tagv.s1a", "tagk.s1b": "tagv.s1b"}, + } + ss := &listStorage{ + span: s1, + } + + tests := []struct { + name string + ctx context.Context + storage storage.Storage + sel storage.Selector + forward bool + limit int + want []*trace.Span + }{{ + "case 1", + context.TODO(), + ss, + storage.Selector{TraceId: "s1TraceId"}, + true, + 1, + []*trace.Span{s1}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + if got, err := fetchSpanFromES(tt.ctx, ss, tt.sel, tt.forward, tt.limit); !reflect.DeepEqual(got, tt.want) || err != nil { + t.Errorf("fetchSpanFromES() = %v, want %v", got, tt.want) + } + }) + } + +} + func Test_traceService_GetTraces(t *testing.T) { type args struct { ctx context.Context diff --git a/modules/msp/apm/trace/span.go b/modules/msp/apm/trace/span.go new file mode 100644 index 00000000000..cbbe0db8de2 --- /dev/null +++ b/modules/msp/apm/trace/span.go @@ -0,0 +1,25 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package trace + +type Span struct { + TraceId string `json:"trace_id"` + SpanId string `json:"span_id"` + ParentSpanId string `json:"parent_span_id"` + OperationName string `json:"operation_name"` + StartTime int64 `json:"start_time"` + EndTime int64 `json:"end_time"` + Tags map[string]string `json:"tags"` +} diff --git a/modules/msp/apm/trace/storage/consumer.go b/modules/msp/apm/trace/storage/cassandra_v1/consumer.go similarity index 86% rename from modules/msp/apm/trace/storage/consumer.go rename to modules/msp/apm/trace/storage/cassandra_v1/consumer.go index 3cf329b2187..9631df19787 100644 --- a/modules/msp/apm/trace/storage/consumer.go +++ b/modules/msp/apm/trace/storage/cassandra_v1/consumer.go @@ -12,16 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -package storage +package cassandra_v1 import ( + "bytes" "encoding/json" "errors" "fmt" + "hash/fnv" "strconv" "time" "github.com/gocql/gocql" + "github.com/recallsong/go-utils/reflectx" "github.com/erda-project/erda-infra/providers/cassandra" oap "github.com/erda-project/erda-proto-go/oap/trace/pb" @@ -81,11 +84,14 @@ func (p *provider) getStatement(data interface{}) (string, []interface{}, error) if !ok { return "", nil, fmt.Errorf("value %#v must be Span", data) } + + // PRIMARY KEY is (trace_id, start_time), avoid the same start_time in the same trace_id + startTime, endTime := getTimeRange(span) const cql = `INSERT INTO spans (trace_id, start_time, end_time, operation_name, parent_span_id, span_id, tags) VALUES (?, ?, ?, ?, ?, ?, ?) USING TTL ?;` return cql, []interface{}{ span.TraceID, - span.StartTime, - span.EndTime, + startTime, + endTime, span.OperationName, span.ParentSpanID, span.SpanID, @@ -94,18 +100,39 @@ func (p *provider) getStatement(data interface{}) (string, []interface{}, error) }, nil } +const millisecond = int64(time.Millisecond) +const timeTailMask = millisecond / 10 + +func getTimeRange(span *monitor.Span) (int64, int64) { + startTime, endTime := span.StartTime, span.EndTime + if startTime%millisecond == 0 { + tail := int64(convertToIntID(span.SpanID)) % timeTailMask + startTime = startTime + tail + endTime = endTime + tail + } + return startTime, endTime +} + +func convertToIntID(id string) uint32 { + hash := fnv.New32() + hash.Write(reflectx.StringToBytes(id)) + return hash.Sum32() +} + func (p *provider) spotSpanConsumer(key []byte, value []byte, topic *string, timestamp time.Time) error { // write spot span to cassandra metric := &metrics.Metric{} - if err := json.Unmarshal(value, metric); err != nil { + dec := json.NewDecoder(bytes.NewReader(value)) + dec.UseNumber() + if err := dec.Decode(&metric); err != nil { return err } span, err := metricToSpan(metric) if err != nil { return err } - //metric = toSpan(span) - //err = p.output.kafka.Write(metric) + // metric = toSpan(span) + // err = p.output.kafka.Write(metric) if err != nil { p.Log.Errorf("fail to push kafka: %s", err) return err @@ -209,6 +236,8 @@ func toInt64(obj interface{}) (int64, error) { return int64(val), nil case float64: return int64(val), nil + case json.Number: + return val.Int64() case string: v, err := strconv.ParseInt(val, 10, 64) if err != nil { diff --git a/modules/msp/apm/trace/storage/consumer_test.go b/modules/msp/apm/trace/storage/cassandra_v1/consumer_test.go similarity index 62% rename from modules/msp/apm/trace/storage/consumer_test.go rename to modules/msp/apm/trace/storage/cassandra_v1/consumer_test.go index 9daf1c0fe3f..c80ed42382a 100644 --- a/modules/msp/apm/trace/storage/consumer_test.go +++ b/modules/msp/apm/trace/storage/cassandra_v1/consumer_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package storage +package cassandra_v1 import ( "fmt" @@ -23,6 +23,7 @@ import ( "github.com/stretchr/testify/assert" metrics "github.com/erda-project/erda/modules/core/monitor/metric" + "github.com/erda-project/erda/modules/pkg/monitor" ) // TestMetricToSpan . @@ -144,3 +145,78 @@ func TestToInt64(t *testing.T) { } assert.Equal(t, num, int64(10)) } + +func Test_getTimeRange(t *testing.T) { + tests := []struct { + name string + span *monitor.Span + wantStartTime int64 + wantEndTime int64 + }{ + { + span: &monitor.Span{ + SpanID: "bc703bc4-9ba4-40d5-a092-533183290cb0", + StartTime: 1635906581184000000, + EndTime: 1635906581186000000, + }, + wantStartTime: 1635906581184064675, + wantEndTime: 1635906581186064675, + }, + { + span: &monitor.Span{ + SpanID: "165c3f71-730b-4843-8da7-d000b08575b4", + StartTime: 1635906581185000000, + EndTime: 1635906581185000000, + }, + wantStartTime: 1635906581185019809, + wantEndTime: 1635906581185019809, + }, + { + span: &monitor.Span{ + SpanID: "dc76bc0a-40f3-4dbc-9f26-962fb3bd7556", + StartTime: 1635906581232000000, + EndTime: 1635906581237000000, + }, + wantStartTime: 1635906581232019363, + wantEndTime: 1635906581237019363, + }, + { + span: &monitor.Span{ + SpanID: "314287dd-bdaf-4ea3-9caa-035655b82355", + StartTime: 1635906581232000000, + EndTime: 1635906581237000000, + }, + wantStartTime: 1635906581232043715, + wantEndTime: 1635906581237043715, + }, + { + span: &monitor.Span{ + SpanID: "314287dd-bdaf-4ea3-9caa-035655b82355", + StartTime: 1635906581232000001, + EndTime: 1635906581237000001, + }, + wantStartTime: 1635906581232000001, + wantEndTime: 1635906581237000001, + }, + { + span: &monitor.Span{ + SpanID: "314287dd-bdaf-4ea3-9caa-035655b82355", + StartTime: 100 * millisecond, + EndTime: 200 * millisecond, + }, + wantStartTime: 100043715, + wantEndTime: 200043715, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + startTime, endTime := getTimeRange(tt.span) + if startTime != tt.wantStartTime { + t.Errorf("getTimePair() got startTime = %v, want %v", startTime, tt.wantStartTime) + } + if endTime != tt.wantEndTime { + t.Errorf("getTimePair() got endTime = %v, want %v", endTime, tt.wantEndTime) + } + }) + } +} diff --git a/modules/msp/apm/trace/storage/provider.go b/modules/msp/apm/trace/storage/cassandra_v1/provider.go similarity index 99% rename from modules/msp/apm/trace/storage/provider.go rename to modules/msp/apm/trace/storage/cassandra_v1/provider.go index ac2aeb473e2..67107d1519e 100644 --- a/modules/msp/apm/trace/storage/provider.go +++ b/modules/msp/apm/trace/storage/cassandra_v1/provider.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package storage +package cassandra_v1 import ( "fmt" diff --git a/modules/msp/apm/trace/storage/cassandra_v2/convert.go b/modules/msp/apm/trace/storage/cassandra_v2/convert.go new file mode 100644 index 00000000000..dcd95956769 --- /dev/null +++ b/modules/msp/apm/trace/storage/cassandra_v2/convert.go @@ -0,0 +1,38 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cassandra_v2 + +import "github.com/erda-project/erda-proto-go/msp/apm/trace/pb" + +func convertToPbSpans(list []*SavedSpan) []interface{} { + spans := make([]interface{}, 0, len(list)) + for _, log := range list { + data := wrapToPbSpan(log) + spans = append(spans, data) + } + return spans +} + +func wrapToPbSpan(ss *SavedSpan) *pb.Span { + return &pb.Span{ + Id: ss.SpanId, + TraceId: ss.TraceId, + OperationName: ss.OperationName, + ParentSpanId: ss.ParentSpanId, + StartTime: ss.StartTime, + EndTime: ss.EndTime, + Tags: ss.Tags, + } +} diff --git a/modules/msp/apm/trace/storage/cassandra_v2/iterator.go b/modules/msp/apm/trace/storage/cassandra_v2/iterator.go new file mode 100644 index 00000000000..b5e17e7742a --- /dev/null +++ b/modules/msp/apm/trace/storage/cassandra_v2/iterator.go @@ -0,0 +1,181 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cassandra_v2 + +import ( + "context" + "io" + + "github.com/scylladb/gocqlx/qb" + + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +func (p *provider) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + var cmps []qb.Cmp + values := make(qb.M) + cmps = append(cmps, qb.Eq("trace_id")) + values["trace_id"] = sel.TraceId + table := DefaultSpanTable + + return &spanIterator{ + ctx: ctx, + sel: sel, + queryFunc: p.queryFunc, + table: table, + cmps: cmps, + values: values, + pageSize: uint(p.Cfg.ReadPageSize), + }, nil + +} + +type iteratorDir int8 + +const ( + iteratorInitial = iota + iteratorForward + iteratorBackward +) + +type spanIterator struct { + ctx context.Context + sel *storage.Selector + queryFunc func(builder *qb.SelectBuilder, binding qb.M, dest interface{}) error + + table string + cmps []qb.Cmp + values qb.M + pageSize uint + + dir iteratorDir + + buffer []interface{} + value interface{} + err error + closed bool +} + +func (it *spanIterator) First() bool { + if it.checkClosed() { + return false + } + it.fetch(iteratorForward) + return it.yield() +} + +func (it *spanIterator) Last() bool { + if it.checkClosed() { + return false + } + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *spanIterator) Next() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorBackward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorForward) + return it.yield() +} + +func (it *spanIterator) Prev() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorForward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *spanIterator) Value() storekit.Data { return it.value } +func (it *spanIterator) Error() error { + if it.err == io.EOF { + return nil + } + return it.err +} + +func (it *spanIterator) yield() bool { + if len(it.buffer) > 0 { + it.value = it.buffer[0] + it.buffer = it.buffer[1:] + return true + } + return false +} + +func (it *spanIterator) Close() error { + it.closed = true + return nil +} + +func (it *spanIterator) checkClosed() bool { + if it.closed { + if it.err == nil { + it.err = storekit.ErrIteratorClosed + } + return true + } + select { + case <-it.ctx.Done(): + if it.err == nil { + it.err = storekit.ErrIteratorClosed + } + return true + default: + } + return false +} + +func (it *spanIterator) fetch(dir iteratorDir) error { + it.buffer = nil + order := qb.ASC + it.dir = dir + if it.dir == iteratorBackward { + order = qb.DESC + } + for it.err == nil && len(it.buffer) <= 0 { + var spans []*SavedSpan + + builder := qb.Select(it.table).Where(it.cmps...). + OrderBy("start_time", order).Limit(uint(it.pageSize)) + err := it.queryFunc(builder, it.values, &spans) + if err != nil { + return err + } + + it.buffer = convertToPbSpans(spans) + if it.err != nil { + return it.err + } + } + return nil +} diff --git a/modules/msp/apm/trace/storage/cassandra_v2/model.go b/modules/msp/apm/trace/storage/cassandra_v2/model.go new file mode 100644 index 00000000000..8ddb552e92f --- /dev/null +++ b/modules/msp/apm/trace/storage/cassandra_v2/model.go @@ -0,0 +1,28 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cassandra_v2 + +type SavedSpan struct { + TraceId string `db:"trace_id"` + SpanId string `db:"span_id"` + ParentSpanId string `db:"parent_span_id"` + OperationName string `db:"operation_name"` + StartTime int64 `db:"start_time"` + EndTime int64 `db:"end_time"` + Tags map[string]string `db:"tags"` +} + +// DefaultSpanTable . +var DefaultSpanTable = "spot_prod.spans" diff --git a/modules/msp/apm/trace/storage/cassandra_v2/provider.go b/modules/msp/apm/trace/storage/cassandra_v2/provider.go new file mode 100644 index 00000000000..631d9179699 --- /dev/null +++ b/modules/msp/apm/trace/storage/cassandra_v2/provider.go @@ -0,0 +1,70 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cassandra_v2 + +import ( + "context" + "fmt" + + "github.com/scylladb/gocqlx" + "github.com/scylladb/gocqlx/qb" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/cassandra" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +type ( + config struct { + Cassandra cassandra.SessionConfig `file:"cassandra"` + ReadPageSize int `file:"read_page_size" default:"1024"` + } + provider struct { + Cfg *config + Log logs.Logger + Cassandra cassandra.Interface `autowired:"cassandra"` + + queryFunc func(builder *qb.SelectBuilder, binding qb.M, dest interface{}) error + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + session, err := p.Cassandra.NewSession(&p.Cfg.Cassandra) + if err != nil { + return fmt.Errorf("fail to create cassandra session: %s", err) + } + p.queryFunc = func(builder *qb.SelectBuilder, binding qb.M, dest interface{}) error { + stmt, names := builder.ToCql() + cql := gocqlx.Query(session.Session().Query(stmt), names).BindMap(binding) + return cql.SelectRelease(dest) + } + return nil +} + +var _ storage.Storage = (*provider)(nil) + +func (p *provider) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + return nil, storekit.ErrOpNotSupported +} + +func init() { + servicehub.Register("span-storage-cassandra", &servicehub.Spec{ + Services: []string{"span-storage-cassandra-reader"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { return &provider{} }, + }) +} diff --git a/modules/msp/apm/trace/storage/cassandra_v2/statistics.go b/modules/msp/apm/trace/storage/cassandra_v2/statistics.go new file mode 100644 index 00000000000..0718a5dc6fb --- /dev/null +++ b/modules/msp/apm/trace/storage/cassandra_v2/statistics.go @@ -0,0 +1,37 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cassandra_v2 + +import ( + "context" + + "github.com/scylladb/gocqlx/qb" +) + +func (p *provider) Count(ctx context.Context, traceId string) int64 { + var count int64 + + var cmps []qb.Cmp + values := make(qb.M) + cmps = append(cmps, qb.Eq("trace_id")) + values["trace_id"] = traceId + + builder := qb.Select(DefaultSpanTable).Where(cmps...).Count("trace_id") + err := p.queryFunc(builder, values, &count) + if err != nil { + return 0 + } + return count +} diff --git a/modules/msp/apm/trace/storage/elasticsearch/iterator.go b/modules/msp/apm/trace/storage/elasticsearch/iterator.go new file mode 100644 index 00000000000..0445b364760 --- /dev/null +++ b/modules/msp/apm/trace/storage/elasticsearch/iterator.go @@ -0,0 +1,268 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "encoding/json" + "io" + "strconv" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/trace" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +func (p *provider) getSearchSource(sel *storage.Selector) *elastic.SearchSource { + searchSource := elastic.NewSearchSource() + query := elastic.NewBoolQuery().Filter(elastic.NewQueryStringQuery("trace_id.raw:" + sel.TraceId)) + return searchSource.Query(query) +} + +func (p *provider) Iterator(ctx context.Context, sel *storage.Selector) (storekit.Iterator, error) { + // TODO check org + indices := p.Loader.Indices(ctx, time.Now().Add(-time.Hour*24*7).UnixNano(), time.Now().UnixNano(), loader.KeyPath{ + Recursive: true, + }) + + return &scrollIterator{ + ctx: ctx, + sel: sel, + searchSource: p.getSearchSource(sel), + client: p.client, + timeout: p.Cfg.QueryTimeout, + pageSize: p.Cfg.ReadPageSize, + indices: indices, + }, nil +} + +type iteratorDir int8 + +const ( + iteratorInitial = iota + iteratorForward + iteratorBackward +) + +type scrollIterator struct { + log logs.Logger + ctx context.Context + sel *storage.Selector + searchSource *elastic.SearchSource + client *elastic.Client + timeout time.Duration + pageSize int + indices []string + + scrollIDs map[string]struct{} + lastScrollID string + dir iteratorDir + buffer []*trace.Span + value *trace.Span + size int64 + err error + closed bool +} + +func (it *scrollIterator) First() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Last() bool { + if it.checkClosed() { + return false + } + it.release() + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Next() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorBackward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorForward) + return it.yield() +} + +func (it *scrollIterator) Prev() bool { + if it.checkClosed() { + return false + } + if it.dir == iteratorForward { + it.err = storekit.ErrOpNotSupported + return false + } + if it.yield() { + return true + } + it.fetch(iteratorBackward) + return it.yield() +} + +func (it *scrollIterator) Value() storekit.Data { return it.value } +func (it *scrollIterator) Error() error { + if it.err == io.EOF { + return nil + } + return it.err +} + +func (it *scrollIterator) release() (err error) { + var list []string + for id := range it.scrollIDs { + if len(id) > 0 { + list = append(list, id) + } + } + if len(list) > 0 { + _, err = it.client.ClearScroll(list...).Do(context.TODO()) + if err != nil { + it.log.Errorf("failed to clear scroll: %s", err) + } + } + it.scrollIDs, it.lastScrollID = nil, "" + it.buffer = nil + it.value = nil + return nil +} + +func (it *scrollIterator) fetch(dir iteratorDir) error { + + if len(it.indices) <= 0 { + it.err = io.EOF + return it.err + } + minutes := int64(it.timeout.Minutes()) + if minutes < 1 { + minutes = 1 + } + keepalive := strconv.FormatInt(minutes, 10) + "m" + + it.dir = dir + it.buffer = nil + for it.err == nil && len(it.buffer) <= 0 { + func() error { + // do query + ctx, cancel := context.WithTimeout(it.ctx, it.timeout) + defer cancel() + var resp *elastic.SearchResult + if len(it.lastScrollID) <= 0 { + var ascending bool + if it.dir != iteratorBackward { + ascending = true + } + + resp, it.err = it.client.Scroll(it.indices...).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + SearchSource(it.searchSource).Size(it.pageSize).Sort("start_time", ascending).Do(ctx) + if it.err != nil { + return it.err + } + } else { + resp, it.err = it.client.Scroll(it.indices...).ScrollId(it.lastScrollID).KeepAlive(keepalive). + IgnoreUnavailable(true).AllowNoIndices(true). + Size(it.pageSize).Do(ctx) + if it.err != nil { + return it.err + } + } + + // save scrollID + if it.scrollIDs == nil { + it.scrollIDs = make(map[string]struct{}) + } + if resp != nil { + it.scrollIDs[resp.ScrollId] = struct{}{} + it.lastScrollID = resp.ScrollId + } + if resp == nil || resp.Hits == nil || len(resp.Hits.Hits) <= 0 { + it.err = io.EOF + return it.err + } + + // parse result + it.buffer = parseHits(resp.Hits.Hits) + it.size = resp.Hits.TotalHits + return nil + }() + } + return nil +} + +func (it *scrollIterator) yield() bool { + if len(it.buffer) > 0 { + it.value = it.buffer[0] + it.buffer = it.buffer[1:] + return true + } + return false +} + +func (it *scrollIterator) Close() error { + it.closed = true + it.release() + return nil +} + +func (it *scrollIterator) checkClosed() bool { + if it.closed { + if it.err == nil { + it.err = storekit.ErrIteratorClosed + } + return true + } + return false +} + +func parseHits(hits []*elastic.SearchHit) (list []*trace.Span) { + for _, hit := range hits { + if hit.Source == nil { + continue + } + data, err := parseData(*hit.Source) + if err != nil { + continue + } + list = append(list, data) + } + return list +} + +func parseData(bytes []byte) (*trace.Span, error) { + var data trace.Span + err := json.Unmarshal(bytes, &data) + if err != nil { + return nil, err + } + return &data, nil +} diff --git a/modules/msp/apm/trace/storage/elasticsearch/provider.go b/modules/msp/apm/trace/storage/elasticsearch/provider.go new file mode 100644 index 00000000000..342490220c2 --- /dev/null +++ b/modules/msp/apm/trace/storage/elasticsearch/provider.go @@ -0,0 +1,123 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "fmt" + "time" + + "github.com/olivere/elastic" + + "github.com/erda-project/erda-infra/base/logs" + "github.com/erda-project/erda-infra/base/servicehub" + "github.com/erda-project/erda-infra/providers/elasticsearch" + retention "github.com/erda-project/erda/modules/core/monitor/settings/retention-strategy" + "github.com/erda-project/erda/modules/core/monitor/storekit" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/creator" + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" + "github.com/erda-project/erda/modules/msp/apm/trace" + "github.com/erda-project/erda/modules/msp/apm/trace/storage" +) + +type ( + config struct { + QueryTimeout time.Duration `file:"query_timeout" default:"1m"` + WriteTimeout time.Duration `file:"write_timeout" default:"1m"` + ReadPageSize int `file:"read_page_size" default:"1024"` + IndexType string `file:"index_type" default:"spans"` + } + provider struct { + Cfg *config + Log logs.Logger + ES1 elasticsearch.Interface `autowired:"elasticsearch@span" optional:"true"` + ES2 elasticsearch.Interface `autowired:"elasticsearch" optional:"true"` + Loader loader.Interface `autowired:"elasticsearch.index.loader@span"` + Creator creator.Interface `autowired:"elasticsearch.index.creator@span" optional:"true"` + Retention retention.Interface `autowired:"storage-retention-strategy@span" optional:"true"` + client *elastic.Client + es elasticsearch.Interface + queryTimeout string + } +) + +func (p *provider) Init(ctx servicehub.Context) (err error) { + if p.ES1 != nil { + p.es = p.ES1 + } else if p.ES2 != nil { + p.es = p.ES2 + } else { + return fmt.Errorf("elasticsearch is required") + } + p.client = p.es.Client() + if p.Retention != nil { + ctx.AddTask(func(c context.Context) error { + p.Retention.Loading(ctx) + return nil + }) + } + return nil +} + +var _ storage.Storage = (*provider)(nil) + +func (p *provider) NewWriter(ctx context.Context) (storekit.BatchWriter, error) { + if p.Creator == nil || p.Retention == nil { + return nil, fmt.Errorf("elasticsearch.index.creator@span and storage-retention-strategy@span is required for Writer") + } + w := p.es.NewWriter(&elasticsearch.WriteOptions{ + Timeout: p.Cfg.WriteTimeout, + Enc: func(val interface{}) (index, id, typ string, body interface{}, err error) { + data := val.(*trace.Span) + var wait <-chan error + wait, index = p.Creator.Ensure(data.Tags["org_name"]) + if wait != nil { + select { + case <-wait: + case <-ctx.Done(): + return "", "", "", nil, storekit.ErrExitConsume + } + } + return index, data.SpanId, p.Cfg.IndexType, &Document{ + Span: data, + Date: getUnixMillisecond(data.EndTime), + }, nil + }, + }) + return w, nil +} + +// Document . +type Document struct { + *trace.Span + Date int64 `json:"@timestamp"` +} + +const maxUnixMillisecond int64 = 9999999999999 + +func getUnixMillisecond(ts int64) int64 { + if ts > maxUnixMillisecond { + return ts / int64(time.Millisecond) + } + return ts +} + +func init() { + servicehub.Register("span-storage-elasticsearch", &servicehub.Spec{ + Services: []string{"span-storage-elasticsearch-reader", "span-storage-writer"}, + ConfigFunc: func() interface{} { return &config{} }, + Creator: func() servicehub.Provider { return &provider{} }, + }) +} diff --git a/modules/msp/apm/trace/storage/elasticsearch/statistics.go b/modules/msp/apm/trace/storage/elasticsearch/statistics.go new file mode 100644 index 00000000000..92fe40513f9 --- /dev/null +++ b/modules/msp/apm/trace/storage/elasticsearch/statistics.go @@ -0,0 +1,44 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package elasticsearch + +import ( + "context" + "time" + + "github.com/erda-project/erda/modules/core/monitor/storekit/elasticsearch/index/loader" +) + +func (p *provider) Count(ctx context.Context, traceId string) int64 { + indices := p.Loader.Indices(ctx, time.Now().Add(-time.Hour*24*7).UnixNano(), time.Now().UnixNano(), loader.KeyPath{ + Recursive: true, + }) + + if len(indices) <= 0 { + return 0 + } + + // do query + ctx, cancel := context.WithTimeout(ctx, p.Cfg.QueryTimeout) + defer cancel() + + count, err := p.client.Count(indices...). + IgnoreUnavailable(true).AllowNoIndices(true).Q("trace_id.raw:" + traceId).Do(ctx) + if err != nil { + return 0 + } + + return count +} diff --git a/modules/msp/apm/trace/storage/storage.go b/modules/msp/apm/trace/storage/storage.go new file mode 100644 index 00000000000..cfd91e2fd28 --- /dev/null +++ b/modules/msp/apm/trace/storage/storage.go @@ -0,0 +1,35 @@ +// Copyright (c) 2021 Terminus, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + + "github.com/erda-project/erda/modules/core/monitor/storekit" +) + +type ( + // Selector . + Selector struct { + TraceId string + } + + // Storage . + Storage interface { + NewWriter(ctx context.Context) (storekit.BatchWriter, error) + Iterator(ctx context.Context, sel *Selector) (storekit.Iterator, error) + Count(ctx context.Context, traceId string) int64 + } +)