From 2f51c9120e6b200c44f0957ab62a40bc44b6ca7a Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Thu, 9 May 2024 23:33:49 +0530 Subject: [PATCH 1/6] Added Azure OpenAI embeddings provider for Classification feature. --- .../Classifai/Features/Classification.php | 9 +- includes/Classifai/Helpers.php | 2 +- .../Classifai/Providers/Azure/Embeddings.php | 993 ++++++++++++++++++ .../Classifai/Services/LanguageProcessing.php | 1 + 4 files changed, 1002 insertions(+), 3 deletions(-) create mode 100644 includes/Classifai/Providers/Azure/Embeddings.php diff --git a/includes/Classifai/Features/Classification.php b/includes/Classifai/Features/Classification.php index d429d720e..bd82444ee 100644 --- a/includes/Classifai/Features/Classification.php +++ b/includes/Classifai/Features/Classification.php @@ -5,6 +5,7 @@ use Classifai\Services\LanguageProcessing; use Classifai\Providers\Watson\NLU; use Classifai\Providers\OpenAI\Embeddings; +use Classifai\Providers\Azure\Embeddings as AzureEmbeddings; use WP_REST_Server; use WP_REST_Request; use WP_Error; @@ -41,6 +42,7 @@ public function __construct() { $this->supported_providers = [ NLU::ID => __( 'IBM Watson NLU', 'classifai' ), Embeddings::ID => __( 'OpenAI Embeddings', 'classifai' ), + AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ), ]; } @@ -249,6 +251,9 @@ public function save( int $post_id, array $results, bool $link = true ) { case Embeddings::ID: $results = $provider_instance->set_terms( $post_id, $results, $link ); break; + case AzureEmbeddings::ID: + $results = $provider_instance->set_terms( $post_id, $results, $link ); + break; } return $results; @@ -779,7 +784,7 @@ public function add_custom_settings_fields() { ); // Embeddings only supports existing terms. - if ( isset( $settings['provider'] ) && Embeddings::ID === $settings['provider'] ) { + if ( isset( $settings['provider'] ) && ( Embeddings::ID === $settings['provider'] || AzureEmbeddings::ID === $settings['provider'] ) ) { unset( $method_options['recommended_terms'] ); $settings['classification_method'] = 'existing_terms'; } @@ -876,7 +881,7 @@ public function sanitize_default_feature_settings( array $new_settings ): array $new_settings['classification_method'] = sanitize_text_field( $new_settings['classification_method'] ?? $settings['classification_method'] ); // Embeddings only supports existing terms. - if ( isset( $new_settings['provider'] ) && Embeddings::ID === $new_settings['provider'] ) { + if ( isset( $new_settings['provider'] ) && ( Embeddings::ID === $new_settings['provider'] || AzureEmbeddings::ID === $new_settings['provider'] ) ) { $new_settings['classification_method'] = 'existing_terms'; } diff --git a/includes/Classifai/Helpers.php b/includes/Classifai/Helpers.php index cd207ba0e..8f62cd785 100644 --- a/includes/Classifai/Helpers.php +++ b/includes/Classifai/Helpers.php @@ -610,7 +610,7 @@ function get_classification_feature_taxonomy( string $classify_by = '' ): string $taxonomy = $settings[ $classify_by . '_taxonomy' ]; } - if ( Embeddings::ID === $settings['provider'] ) { + if ( NLU::ID !== $settings['provider'] ) { $taxonomy = $classify_by; } diff --git a/includes/Classifai/Providers/Azure/Embeddings.php b/includes/Classifai/Providers/Azure/Embeddings.php new file mode 100644 index 000000000..277ebf4c8 --- /dev/null +++ b/includes/Classifai/Providers/Azure/Embeddings.php @@ -0,0 +1,993 @@ +feature_instance = $feature_instance; + + if ( + $this->feature_instance && + method_exists( $this->feature_instance, 'get_supported_taxonomies' ) + ) { + $settings = get_option( $this->feature_instance->get_option_name(), [] ); + $post_types = isset( $settings['post_types'] ) ? $settings['post_types'] : [ 'post' => 1 ]; + + foreach ( $this->feature_instance->get_supported_taxonomies( $post_types ) as $tax => $label ) { + $this->nlu_features[ $tax ] = [ + 'feature' => $label, + 'threshold' => __( 'Threshold (%)', 'classifai' ), + 'threshold_default' => 75, + 'taxonomy' => __( 'Taxonomy', 'classifai' ), + 'taxonomy_default' => $tax, + ]; + } + } + } + + /** + * Get the number of dimensions for the embeddings. + * + * @return int + */ + public function get_dimensions(): int { + /** + * Filter the dimensions we want for each embedding. + * + * Useful if you want to increase or decrease the length + * of each embedding. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_dimensions + * + * @param {int} $dimensions The default dimensions. + * + * @return {int} The dimensions. + */ + return apply_filters( 'classifai_azure_openai_embeddings_dimensions', $this->dimensions ); + } + + /** + * Get the maximum number of tokens. + * + * @return int + */ + public function get_max_tokens(): int { + /** + * Filter the max number of tokens. + * + * Useful if you want to change to a different model + * that uses a different number of tokens, or be more + * strict on the amount of tokens that can be used. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_max_tokens + * + * @param {int} $model The default maximum tokens. + * + * @return {int} The maximum tokens. + */ + return apply_filters( 'classifai_azure_openai_embeddings_max_tokens', $this->max_tokens ); + } + + /** + * Get the maximum number of terms we process. + * + * @return int + */ + public function get_max_terms(): int { + /** + * Filter the max number of terms. + * + * Default for this is 5000 but this filter can be used to change + * this, either decreasing to help with performance or increasing + * to ensure we consider more terms. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_max_terms + * + * @param {int} $terms The default maximum terms. + * + * @return {int} The maximum terms. + */ + return apply_filters( 'classifai_azure_openai_embeddings_max_terms', $this->max_terms ); + } + + /** + * Register what we need for the plugin. + * + * This only fires if can_register returns true. + */ + public function register() { + add_filter( 'classifai_feature_classification_get_default_settings', [ $this, 'modify_default_feature_settings' ], 10, 2 ); + + $feature = new Classification(); + + if ( + ! $feature->is_feature_enabled() || + $feature->get_feature_provider_instance()::ID !== static::ID + ) { + return; + } + + add_action( 'created_term', [ $this, 'generate_embeddings_for_term' ] ); + add_action( 'edited_terms', [ $this, 'generate_embeddings_for_term' ] ); + add_action( 'wp_ajax_get_post_classifier_embeddings_preview_data', array( $this, 'get_post_classifier_embeddings_preview_data' ) ); + } + + /** + * Modify the default settings for the classification feature. + * + * @param array $settings Current settings. + * @param Feature $feature_instance The feature instance. + * @return array + */ + public function modify_default_feature_settings( array $settings, $feature_instance ): array { + remove_filter( 'classifai_feature_classification_get_default_settings', [ $this, 'modify_default_feature_settings' ], 10, 2 ); + + if ( $feature_instance->get_settings( 'provider' ) !== static::ID ) { + return $settings; + } + + add_filter( 'classifai_feature_classification_get_default_settings', [ $this, 'modify_default_feature_settings' ], 10, 2 ); + + $defaults = []; + + foreach ( array_keys( $feature_instance->get_supported_taxonomies() ) as $tax ) { + $enabled = 'category' === $tax ? true : false; + + $defaults[ $tax ] = $enabled; + $defaults[ $tax . '_threshold' ] = 75; + $defaults[ $tax . '_taxonomy' ] = $tax; + } + + return array_merge( $settings, $defaults ); + } + + /** + * Sanitization for the options being saved. + * + * @param array $new_settings Array of settings about to be saved. + * @return array The sanitized settings to be saved. + */ + public function sanitize_settings( array $new_settings ): array { + $new_settings = parent::sanitize_settings( $new_settings ); + + // Trigger embedding generation for all terms in enabled taxonomies if the feature is on. + // TODO: Two issues here we need to address: 1 - for sites with lots of terms, this is likely to lead to timeouts or rate limit issues. Should move this to some sort of queue/cron handler; 2 - this only works on the second save due to checking the value of get_all_feature_taxonomies() which is not updated until the settings are saved. + if ( isset( $new_settings['status'] ) && 1 === (int) $new_settings['status'] ) { + foreach ( array_keys( $this->nlu_features ) as $feature_name ) { + if ( isset( $new_settings[ $feature_name ] ) && 1 === (int) $new_settings[ $feature_name ] ) { + $this->trigger_taxonomy_update( $feature_name ); + } + } + } + + return $new_settings; + } + + /** + * Build and return the API endpoint based on settings. + * + * @param \Classifai\Features\Feature $feature Feature instance + * @return string + */ + protected function prep_api_url( \Classifai\Features\Feature $feature = null ): string { + $settings = $feature->get_settings( static::ID ); + $endpoint = $settings['endpoint_url'] ?? ''; + $deployment = $settings['deployment'] ?? ''; + + if ( ! $endpoint ) { + return ''; + } + + if ( $deployment ) { + $endpoint = trailingslashit( $endpoint ) . str_replace( '{deployment-id}', $deployment, $this->embeddings_url ); + $endpoint = add_query_arg( 'api-version', $this->api_version, $endpoint ); + } + + return $endpoint; + } + + /** + * Authenticates our credentials. + * + * @param string $url Endpoint URL. + * @param string $api_key Api Key. + * @param string $deployment Deployment name. + * @return bool|WP_Error + */ + protected function authenticate_credentials( string $url, string $api_key, string $deployment ) { + $rtn = false; + + // This does basically the same thing that prep_api_url does but when running authentication, + // we don't have settings saved yet, which prep_api_url needs. + $endpoint = trailingslashit( $url ) . str_replace( '{deployment-id}', $deployment, $this->embeddings_url ); + $endpoint = add_query_arg( 'api-version', $this->api_version, $endpoint ); + + $request = wp_remote_post( + $endpoint, + [ + 'headers' => [ + 'api-key' => $api_key, + 'Content-Type' => 'application/json', + ], + 'body' => wp_json_encode( + [ + 'input' => 'This is a test', + 'dimensions' => $this->get_dimensions(), + ] + ), + ] + ); + + if ( ! is_wp_error( $request ) ) { + $response = json_decode( wp_remote_retrieve_body( $request ) ); + if ( ! empty( $response->error ) ) { + $rtn = new WP_Error( 'auth', $response->error->message ); + } else { + $rtn = true; + } + } + + return $rtn; + } + + + /** + * Get the threshold for the similarity calculation. + * + * @since x.x.x + * + * @param string $taxonomy Taxonomy slug. + * @return float + */ + public function get_threshold( string $taxonomy = '' ): float { + $settings = ( new Classification() )->get_settings(); + $threshold = 1; + + if ( ! empty( $taxonomy ) ) { + $threshold = isset( $settings[ $taxonomy . '_threshold' ] ) ? $settings[ $taxonomy . '_threshold' ] : 75; + } + + // Convert $threshold (%) to decimal. + $threshold = 1 - ( (float) $threshold / 100 ); + + /** + * Filter the threshold for the similarity calculation. + * + * @since x.x.x + * @hook classifai_threshold + * + * @param {float} $threshold The threshold to use. + * @param {string} $taxonomy The taxonomy to get the threshold for. + * + * @return {float} The threshold to use. + */ + return apply_filters( 'classifai_threshold', $threshold, $taxonomy ); + } + + /** + * Get the data to preview terms. + * + * @since 2.5.0 + * + * @return array + */ + public function get_post_classifier_embeddings_preview_data(): array { + $nonce = isset( $_POST['nonce'] ) ? sanitize_text_field( wp_unslash( $_POST['nonce'] ) ) : false; + + if ( ! $nonce || ! wp_verify_nonce( $nonce, 'classifai-previewer-action' ) ) { + wp_send_json_error( esc_html__( 'Failed nonce check.', 'classifai' ) ); + } + + $post_id = filter_input( INPUT_POST, 'post_id', FILTER_SANITIZE_NUMBER_INT ); + + $embeddings = $this->generate_embeddings_for_post( $post_id, true ); + $embeddings_terms = []; + + // Add terms to this item based on embedding data. + if ( $embeddings && ! is_wp_error( $embeddings ) ) { + $embeddings_terms = $this->get_terms( $embeddings ); + } + + return wp_send_json_success( $embeddings_terms ); + } + + /** + * Trigger embedding generation for content being saved. + * + * @param int $post_id ID of post being saved. + * @param bool $force Whether to force generation of embeddings even if they already exist. Default false. + * @return array|WP_Error + */ + public function generate_embeddings_for_post( int $post_id, bool $force = false ) { + // Don't run on autosaves. + if ( defined( 'DOING_AUTOSAVE' ) && DOING_AUTOSAVE ) { + return new WP_Error( 'invalid', esc_html__( 'Classification will not work during an autosave.', 'classifai' ) ); + } + + // Ensure the user has permissions to edit. + if ( ! current_user_can( 'edit_post', $post_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) { + return new WP_Error( 'invalid', esc_html__( 'User does not have permission to classify this item.', 'classifai' ) ); + } + + /** + * Filter whether ClassifAI should classify an item. + * + * Default is true, return false to skip classifying. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_should_classify + * + * @param {bool} $should_classify Whether the item should be classified. Default `true`, return `false` to skip. + * @param {int} $id The ID of the item to be considered for classification. + * @param {string} $type The type of item to be considered for classification. + * + * @return {bool} Whether the item should be classified. + */ + if ( ! apply_filters( 'classifai_azure_openai_embeddings_should_classify', true, $post_id, 'post' ) ) { + return new WP_Error( 'invalid', esc_html__( 'Classification is disabled for this item.', 'classifai' ) ); + } + + // Try to use the stored embeddings first. + if ( ! $force ) { + $embeddings = get_post_meta( $post_id, 'classifai_azure_openai_embeddings', true ); + + if ( ! empty( $embeddings ) ) { + return $embeddings; + } + } + + // Chunk the post content down. + $embeddings = []; + $content = $this->get_normalized_content( $post_id, 'post' ); + $content_chunks = $this->chunk_content( $content ); + + // Get the embeddings for each chunk. + if ( ! empty( $content_chunks ) ) { + foreach ( $content_chunks as $chunk ) { + $embedding = $this->generate_embedding( $chunk ); + + if ( $embedding && ! is_wp_error( $embedding ) ) { + $embeddings[] = array_map( 'floatval', $embedding ); + } + } + } + + // Store the embeddings for future use. + if ( ! empty( $embeddings ) ) { + update_post_meta( $post_id, 'classifai_azure_openai_embeddings', $embeddings ); + } + + return $embeddings; + } + + /** + * Add terms to a post based on embeddings. + * + * @param int $post_id ID of post to set terms on. + * @param array $embeddings Embeddings data. + * @param bool $link Whether to link the terms or not. + * @return array|WP_Error + */ + public function set_terms( int $post_id = 0, array $embeddings = [], bool $link = true ) { + if ( ! $post_id || ! get_post( $post_id ) ) { + return new WP_Error( 'post_id_required', esc_html__( 'A valid post ID is required to set terms.', 'classifai' ) ); + } + + if ( empty( $embeddings ) ) { + return new WP_Error( 'data_required', esc_html__( 'Valid embedding data is required to set terms.', 'classifai' ) ); + } + + $embeddings_similarity = []; + + // Iterate through all of our embedding chunks and run our similarity calculations. + foreach ( $embeddings as $embedding ) { + $embeddings_similarity = array_merge( $embeddings_similarity, $this->get_embeddings_similarity( $embedding ) ); + } + + // Ensure we have some results. + if ( empty( $embeddings_similarity ) ) { + return new WP_Error( 'invalid', esc_html__( 'No matching terms found.', 'classifai' ) ); + } + + // Sort the results by similarity. + usort( + $embeddings_similarity, + function ( $a, $b ) { + return $a['similarity'] <=> $b['similarity']; + } + ); + + // Remove duplicates based on the term_id field. + $uniques = array_unique( array_column( $embeddings_similarity, 'term_id' ) ); + $embeddings_similarity = array_intersect_key( $embeddings_similarity, $uniques ); + + $sorted_results = []; + + // Sort the results into taxonomy buckets. + foreach ( $embeddings_similarity as $item ) { + $sorted_results[ $item['taxonomy'] ][] = $item; + } + + $return = []; + + /** + * If $link is true, immediately link all the terms + * to the item. + * + * If it is false, build an array of term data that + * can be used to display the terms in the UI. + */ + foreach ( $sorted_results as $tax => $terms ) { + if ( $link ) { + wp_set_object_terms( $post_id, array_map( 'absint', array_column( $terms, 'term_id' ) ), $tax, false ); + } else { + $terms_to_link = []; + + foreach ( $terms as $term ) { + $found_term = get_term( $term['term_id'] ); + + if ( $found_term && ! is_wp_error( $found_term ) ) { + $terms_to_link[ $found_term->name ] = $term['term_id']; + } + } + + $return[ $tax ] = $terms_to_link; + } + } + + return empty( $return ) ? $embeddings_similarity : $return; + } + + /** + * Determine which terms best match a post based on embeddings. + * + * @param array $embeddings An array of embeddings data. + * @return array|WP_Error + */ + public function get_terms( array $embeddings = [] ) { + if ( empty( $embeddings ) ) { + return new WP_Error( 'data_required', esc_html__( 'Valid embedding data is required to get terms.', 'classifai' ) ); + } + + $embeddings_similarity = []; + + // Iterate through all of our embedding chunks and run our similarity calculations. + foreach ( $embeddings as $embedding ) { + $embeddings_similarity = array_merge( $embeddings_similarity, $this->get_embeddings_similarity( $embedding, false ) ); + } + + // Ensure we have some results. + if ( empty( $embeddings_similarity ) ) { + return new WP_Error( 'invalid', esc_html__( 'No matching terms found.', 'classifai' ) ); + } + + // Sort the results by similarity. + usort( + $embeddings_similarity, + function ( $a, $b ) { + return $a['similarity'] <=> $b['similarity']; + } + ); + + // Remove duplicates based on the term_id field. + $uniques = array_unique( array_column( $embeddings_similarity, 'term_id' ) ); + $embeddings_similarity = array_intersect_key( $embeddings_similarity, $uniques ); + + $sorted_results = []; + + // Sort the results into taxonomy buckets. + foreach ( $embeddings_similarity as $item ) { + $sorted_results[ $item['taxonomy'] ][] = $item; + } + + // Prepare the results. + $index = 0; + $results = []; + + foreach ( $sorted_results as $tax => $terms ) { + // Get the taxonomy name. + $taxonomy = get_taxonomy( $tax ); + $tax_name = $taxonomy->labels->singular_name; + + // Setup our taxonomy object. + $results[] = new \stdClass(); + + $results[ $index ]->{$tax_name} = []; + + foreach ( $terms as $term ) { + // Convert $similarity to percentage. + $similarity = round( ( 1 - $term['similarity'] ), 10 ); + + // Store the results. + $results[ $index ]->{$tax_name}[] = [ // phpcs:ignore Squiz.PHP.DisallowMultipleAssignments.Found + 'label' => get_term( $term['term_id'] )->name, + 'score' => $similarity, + ]; + } + + ++$index; + } + + return $results; + } + + /** + * Get the similarity between an embedding and all terms. + * + * @since x.x.x + * + * @param array $embedding Embedding data. + * @param bool $consider_threshold Whether to consider the threshold setting. + * @return array + */ + private function get_embeddings_similarity( array $embedding, bool $consider_threshold = true ): array { + $feature = new Classification(); + $embedding_similarity = []; + $taxonomies = $feature->get_all_feature_taxonomies(); + $calculations = new EmbeddingCalculations(); + + foreach ( $taxonomies as $tax ) { + $exclude = []; + + if ( is_numeric( $tax ) ) { + continue; + } + + if ( 'tags' === $tax ) { + $tax = 'post_tag'; + } + + if ( 'categories' === $tax ) { + $tax = 'category'; + + // Exclude the uncategorized term. + $uncat_term = get_term_by( 'name', 'Uncategorized', 'category' ); + if ( $uncat_term ) { + $exclude = [ $uncat_term->term_id ]; + } + } + + $terms = get_terms( + [ + 'taxonomy' => $tax, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_azure_openai_embeddings', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'number' => $this->get_max_terms(), + 'exclude' => $exclude, // phpcs:ignore WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude + ] + ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + continue; + } + + // Get threshold setting for this taxonomy. + $threshold = $this->get_threshold( $tax ); + + // Get embedding similarity for each term. + foreach ( $terms as $term_id ) { + if ( ! current_user_can( 'assign_term', $term_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) { + continue; + } + + $term_embedding = get_term_meta( $term_id, 'classifai_azure_openai_embeddings', true ); + + if ( ! empty( $term_embedding ) ) { + // Loop through the chunks and run a similarity calculation on each. + foreach ( $term_embedding as $chunk ) { + $similarity = $calculations->similarity( $embedding, $chunk ); + + if ( false !== $similarity && ( ! $consider_threshold || $similarity <= $threshold ) ) { + $embedding_similarity[] = [ + 'taxonomy' => $tax, + 'term_id' => $term_id, + 'similarity' => $similarity, + ]; + } + } + } + } + } + + return $embedding_similarity; + } + + /** + * Generate embedding data for all terms within a taxonomy. + * + * @param string $taxonomy Taxonomy slug. + * @param bool $all Whether to generate embeddings for all terms or just those without embeddings. + */ + private function trigger_taxonomy_update( string $taxonomy = '', bool $all = false ) { + $exclude = []; + + // Exclude the uncategorized term. + if ( 'category' === $taxonomy ) { + $uncat_term = get_term_by( 'name', 'Uncategorized', 'category' ); + if ( $uncat_term ) { + $exclude = [ $uncat_term->term_id ]; + } + } + + $args = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_azure_openai_embeddings', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'NOT EXISTS', + 'number' => $this->get_max_terms(), + 'exclude' => $exclude, // phpcs:ignore WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude + ]; + + // If we want all terms, remove our meta query. + if ( $all ) { + unset( $args['meta_key'], $args['meta_compare'] ); + } + + $terms = get_terms( $args ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return; + } + + // Generate embedding data for each term. + foreach ( $terms as $term_id ) { + /** @var int $term_id */ + $this->generate_embeddings_for_term( $term_id, $all ); + } + } + + /** + * Trigger embedding generation for term being saved. + * + * @param int $term_id ID of term being saved. + * @param bool $force Whether to force generation of embeddings even if they already exist. Default false. + * @return array|WP_Error + */ + public function generate_embeddings_for_term( int $term_id, bool $force = false ) { + // Ensure the user has permissions to edit. + if ( ! current_user_can( 'edit_term', $term_id ) ) { + return new WP_Error( 'invalid', esc_html__( 'User does not have valid permissions to edit this term.', 'classifai' ) ); + } + + $term = get_term( $term_id ); + + if ( ! is_a( $term, '\WP_Term' ) ) { + return new WP_Error( 'invalid', esc_html__( 'This is not a valid term.', 'classifai' ) ); + } + + $feature = new Classification(); + $taxonomies = $feature->get_all_feature_taxonomies(); + + if ( in_array( 'tags', $taxonomies, true ) ) { + $taxonomies[] = 'post_tag'; + } + + if ( in_array( 'categories', $taxonomies, true ) ) { + $taxonomies[] = 'category'; + } + + // Ensure this term is part of a taxonomy we support. + if ( ! in_array( $term->taxonomy, $taxonomies, true ) ) { + return new WP_Error( 'invalid', esc_html__( 'This taxonomy is not supported.', 'classifai' ) ); + } + + /** + * Filter whether ClassifAI should classify an item. + * + * Default is true, return false to skip classifying. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_should_classify + * + * @param {bool} $should_classify Whether the item should be classified. Default `true`, return `false` to skip. + * @param {int} $id The ID of the item to be considered for classification. + * @param {string} $type The type of item to be considered for classification. + * + * @return {bool} Whether the item should be classified. + */ + if ( ! apply_filters( 'classifai_azure_openai_embeddings_should_classify', true, $term_id, 'term' ) ) { + return new WP_Error( 'invalid', esc_html__( 'Classification is disabled for this item.', 'classifai' ) ); + } + + // Try to use the stored embeddings first. + $embeddings = get_term_meta( $term_id, 'classifai_azure_openai_embeddings', true ); + + if ( ! empty( $embeddings ) && ! $force ) { + return $embeddings; + } + + // Chunk the term content down. + $embeddings = []; + $content = $this->get_normalized_content( $term_id, 'term' ); + $content_chunks = $this->chunk_content( $content ); + + // Get the embeddings for each chunk. + if ( ! empty( $content_chunks ) ) { + foreach ( $content_chunks as $chunk ) { + $embedding = $this->generate_embedding( $chunk ); + + if ( $embedding && ! is_wp_error( $embedding ) ) { + $embeddings[] = array_map( 'floatval', $embedding ); + } + } + } + + // Store the embeddings for future use. + if ( ! empty( $embeddings ) ) { + update_term_meta( $term_id, 'classifai_azure_openai_embeddings', $embeddings ); + } + + return $embeddings; + } + + /** + * Generate an embedding for a particular piece of text. + * + * @param string $text Text to generate the embedding for. + * @return array|boolean|WP_Error + */ + public function generate_embedding( string $text = '' ) { + $feature = new Classification(); + $settings = $feature->get_settings(); + + // Ensure the feature is enabled. + if ( ! $feature->is_feature_enabled() ) { + return new WP_Error( 'not_enabled', esc_html__( 'Classification is disabled or OpenAI authentication failed. Please check your settings.', 'classifai' ) ); + } + + /** + * Filter the request body before sending to OpenAI. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_request_body + * + * @param {array} $body Request body that will be sent to OpenAI. + * @param {string} $text Text we are getting embeddings for. + * + * @return {array} Request body. + */ + $body = apply_filters( + 'classifai_azure_openai_embeddings_request_body', + [ + 'input' => $text, + 'dimensions' => $this->get_dimensions(), + ], + $text + ); + + // Make our API request. + $response = wp_remote_post( + $this->prep_api_url( $feature ), + [ + 'headers' => [ + 'api-key' => $settings[ static::ID ]['api_key'], + 'Content-Type' => 'application/json', + ], + 'body' => wp_json_encode( $body ), + ] + ); + $response = $this->get_result( $response ); + + set_transient( 'classifai_azure_openai_embeddings_latest_response', $response, DAY_IN_SECONDS * 30 ); + + if ( is_wp_error( $response ) ) { + return $response; + } + + if ( empty( $response['data'] ) ) { + return new WP_Error( 'no_data', esc_html__( 'No data returned from Azure OpenAI.', 'classifai' ) ); + } + + $return = []; + + // Parse out the embeddings response. + foreach ( $response['data'] as $data ) { + if ( ! isset( $data['embedding'] ) || ! is_array( $data['embedding'] ) ) { + continue; + } + + $return = $data['embedding']; + + break; + } + + return $return; + } + + /** + * Chunk content into smaller pieces with an overlap. + * + * @param string $content Content to chunk. + * @param int $chunk_size Size of each chunk, in words. + * @param int $overlap_size Overlap size for each chunk, in words. + * @return array + */ + public function chunk_content( string $content = '', int $chunk_size = 150, $overlap_size = 25 ): array { + // Remove multiple whitespaces. + $content = preg_replace( '/\s+/', ' ', $content ); + + // Split text by single whitespace. + $words = explode( ' ', $content ); + + $chunks = []; + $text_count = count( $words ); + + // Iterate through and chunk data with an overlap. + for ( $i = 0; $i < $text_count; $i += $chunk_size ) { + // Join a set of words into a string. + $chunk = implode( + ' ', + array_slice( + $words, + max( $i - $overlap_size, 0 ), + $i + $chunk_size + ) + ); + + array_push( $chunks, $chunk ); + } + + return $chunks; + } + + /** + * Get our content, ensuring it is normalized. + * + * @param int $id ID of item to get content from. + * @param string $type Type of content. Default 'post'. + * @return string + */ + public function get_normalized_content( int $id = 0, string $type = 'post' ): string { + $normalizer = new Normalizer(); + + // Get the content depending on the type. + switch ( $type ) { + case 'post': + // This will include the post_title and post_content. + $content = $normalizer->normalize( $id ); + break; + case 'term': + $content = ''; + $term = get_term( $id ); + + if ( is_a( $term, '\WP_Term' ) ) { + $content = $term->name . ' ' . $term->slug . ' ' . $term->description; + } + + break; + } + + /** + * Filter content that will get sent to OpenAI. + * + * @since x.x.x + * @hook classifai_azure_openai_embeddings_content + * + * @param {string} $content Content that will be sent to OpenAI. + * @param {int} $post_id ID of post we are submitting. + * @param {string} $type Type of content. + * + * @return {string} Content. + */ + return apply_filters( 'classifai_azure_openai_embeddings_content', $content, $id, $type ); + } + + /** + * Common entry point for all REST endpoints for this provider. + * + * @param int $post_id The Post Id we're processing. + * @param string $route_to_call The route we are processing. + * @param array $args Optional arguments to pass to the route. + * @return string|WP_Error + */ + public function rest_endpoint_callback( $post_id = 0, string $route_to_call = '', array $args = [] ) { + if ( ! $post_id || ! get_post( $post_id ) ) { + return new WP_Error( 'post_id_required', esc_html__( 'A valid post ID is required to run classification.', 'classifai' ) ); + } + + $route_to_call = strtolower( $route_to_call ); + $return = ''; + + // Handle all of our routes. + switch ( $route_to_call ) { + case 'classify': + $return = $this->generate_embeddings_for_post( $post_id, true ); + break; + } + + return $return; + } + + /** + * Returns the debug information for the provider settings. + * + * @return array + */ + public function get_debug_information(): array { + $settings = $this->feature_instance->get_settings(); + $debug_info = []; + + if ( $this->feature_instance instanceof Classification ) { + foreach ( array_keys( $this->feature_instance->get_supported_taxonomies() ) as $tax ) { + $debug_info[ "Taxonomy ($tax)" ] = Feature::get_debug_value_text( $settings[ $tax ], 1 ); + $debug_info[ "Taxonomy ($tax threshold)" ] = absint( $settings[ $tax . '_threshold' ] ); + } + + $debug_info[ __( 'Latest response', 'classifai' ) ] = $this->get_formatted_latest_response( get_transient( 'classifai_azure_openai_embeddings_latest_response' ) ); + } + + return apply_filters( + 'classifai_' . self::ID . '_debug_information', + $debug_info, + $settings, + $this->feature_instance + ); + } +} diff --git a/includes/Classifai/Services/LanguageProcessing.php b/includes/Classifai/Services/LanguageProcessing.php index 5566f60e5..436aae0e9 100644 --- a/includes/Classifai/Services/LanguageProcessing.php +++ b/includes/Classifai/Services/LanguageProcessing.php @@ -47,6 +47,7 @@ public static function get_service_providers(): array { 'Classifai\Providers\GoogleAI\GeminiAPI', 'Classifai\Providers\Azure\OpenAI', 'Classifai\Providers\AWS\AmazonPolly', + 'Classifai\Providers\Azure\Embeddings', ] ); } From 29354c9db5302d806b87c1c7ab7fbf7998f790a6 Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Thu, 9 May 2024 23:48:10 +0530 Subject: [PATCH 2/6] CLI Command fixes. --- includes/Classifai/Command/ClassifaiCommand.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/includes/Classifai/Command/ClassifaiCommand.php b/includes/Classifai/Command/ClassifaiCommand.php index ad50655b0..40648fa4e 100644 --- a/includes/Classifai/Command/ClassifaiCommand.php +++ b/includes/Classifai/Command/ClassifaiCommand.php @@ -11,8 +11,10 @@ use Classifai\Providers\Watson\APIRequest; use Classifai\Providers\Watson\Classifier; use Classifai\Normalizer; +use Classifai\Providers\Azure\Embeddings as AzureEmbeddings; use Classifai\Providers\Watson\PostClassifier; use Classifai\Providers\OpenAI\Embeddings; +use Classifai\Providers\Watson\NLU; use function Classifai\Providers\Watson\get_username; use function Classifai\Providers\Watson\get_password; @@ -64,7 +66,7 @@ public function post( $args = [], $opts = [] ) { $feature = new Classification(); $provider = $feature->get_feature_provider_instance(); - if ( Embeddings::ID !== $provider::ID ) { + if ( NLU::ID !== $provider::ID ) { \WP_CLI::error( 'This command is only available for the IBM Watson Provider' ); } @@ -965,8 +967,8 @@ public function embeddings( $args = [], $opts = [] ) { $feature = new Classification(); $provider = $feature->get_feature_provider_instance(); - if ( Embeddings::ID !== $provider::ID ) { - \WP_CLI::error( 'This command is only available for the OpenAI Embeddings feature' ); + if ( Embeddings::ID !== $provider::ID && AzureEmbeddings::ID !== $provider::ID ) { + \WP_CLI::error( 'This command is only available for the OpenAI Embeddings and Azure OpenAI Embeddings providers.' ); } $embeddings = new Embeddings( false ); From 98a25a4aec3f15a51d5cd0925b9d5c3ffdd2c240 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Thu, 9 May 2024 14:57:46 -0600 Subject: [PATCH 3/6] Fix a few things --- .../Classifai/Features/Classification.php | 16 ++++++------- includes/Classifai/Helpers.php | 2 +- .../Classifai/Providers/Azure/Embeddings.php | 24 +++++++------------ 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/includes/Classifai/Features/Classification.php b/includes/Classifai/Features/Classification.php index bd82444ee..f3b8427d7 100644 --- a/includes/Classifai/Features/Classification.php +++ b/includes/Classifai/Features/Classification.php @@ -4,7 +4,7 @@ use Classifai\Services\LanguageProcessing; use Classifai\Providers\Watson\NLU; -use Classifai\Providers\OpenAI\Embeddings; +use Classifai\Providers\OpenAI\Embeddings as OpenAIEmbeddings; use Classifai\Providers\Azure\Embeddings as AzureEmbeddings; use WP_REST_Server; use WP_REST_Request; @@ -40,9 +40,9 @@ public function __construct() { // Contains just the providers this feature supports. $this->supported_providers = [ - NLU::ID => __( 'IBM Watson NLU', 'classifai' ), - Embeddings::ID => __( 'OpenAI Embeddings', 'classifai' ), - AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ), + NLU::ID => __( 'IBM Watson NLU', 'classifai' ), + OpenAIEmbeddings::ID => __( 'OpenAI Embeddings', 'classifai' ), + AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ), ]; } @@ -248,10 +248,8 @@ public function save( int $post_id, array $results, bool $link = true ) { case NLU::ID: $results = $provider_instance->link( $post_id, $results, $link ); break; - case Embeddings::ID: - $results = $provider_instance->set_terms( $post_id, $results, $link ); - break; case AzureEmbeddings::ID: + case OpenAIEmbeddings::ID: $results = $provider_instance->set_terms( $post_id, $results, $link ); break; } @@ -784,7 +782,7 @@ public function add_custom_settings_fields() { ); // Embeddings only supports existing terms. - if ( isset( $settings['provider'] ) && ( Embeddings::ID === $settings['provider'] || AzureEmbeddings::ID === $settings['provider'] ) ) { + if ( isset( $settings['provider'] ) && ( OpenAIEmbeddings::ID === $settings['provider'] || AzureEmbeddings::ID === $settings['provider'] ) ) { unset( $method_options['recommended_terms'] ); $settings['classification_method'] = 'existing_terms'; } @@ -881,7 +879,7 @@ public function sanitize_default_feature_settings( array $new_settings ): array $new_settings['classification_method'] = sanitize_text_field( $new_settings['classification_method'] ?? $settings['classification_method'] ); // Embeddings only supports existing terms. - if ( isset( $new_settings['provider'] ) && ( Embeddings::ID === $new_settings['provider'] || AzureEmbeddings::ID === $new_settings['provider'] ) ) { + if ( isset( $new_settings['provider'] ) && ( OpenAIEmbeddings::ID === $new_settings['provider'] || AzureEmbeddings::ID === $new_settings['provider'] ) ) { $new_settings['classification_method'] = 'existing_terms'; } diff --git a/includes/Classifai/Helpers.php b/includes/Classifai/Helpers.php index f02e9903d..5968fe69b 100644 --- a/includes/Classifai/Helpers.php +++ b/includes/Classifai/Helpers.php @@ -5,7 +5,7 @@ use Classifai\Features\Classification; use Classifai\Providers\Provider; use Classifai\Admin\UserProfile; -use Classifai\Providers\OpenAI\Embeddings; +use Classifai\Providers\Watson\NLU; use Classifai\Services\Service; use Classifai\Services\ServicesManager; use WP_Error; diff --git a/includes/Classifai/Providers/Azure/Embeddings.php b/includes/Classifai/Providers/Azure/Embeddings.php index 277ebf4c8..9995b1d04 100644 --- a/includes/Classifai/Providers/Azure/Embeddings.php +++ b/includes/Classifai/Providers/Azure/Embeddings.php @@ -95,7 +95,7 @@ public function get_dimensions(): int { * Useful if you want to increase or decrease the length * of each embedding. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_dimensions * * @param {int} $dimensions The default dimensions. @@ -118,7 +118,7 @@ public function get_max_tokens(): int { * that uses a different number of tokens, or be more * strict on the amount of tokens that can be used. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_max_tokens * * @param {int} $model The default maximum tokens. @@ -141,7 +141,7 @@ public function get_max_terms(): int { * this, either decreasing to help with performance or increasing * to ensure we consider more terms. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_max_terms * * @param {int} $terms The default maximum terms. @@ -295,8 +295,6 @@ protected function authenticate_credentials( string $url, string $api_key, strin /** * Get the threshold for the similarity calculation. * - * @since x.x.x - * * @param string $taxonomy Taxonomy slug. * @return float */ @@ -314,7 +312,7 @@ public function get_threshold( string $taxonomy = '' ): float { /** * Filter the threshold for the similarity calculation. * - * @since x.x.x + * @since 2.5.0 * @hook classifai_threshold * * @param {float} $threshold The threshold to use. @@ -328,8 +326,6 @@ public function get_threshold( string $taxonomy = '' ): float { /** * Get the data to preview terms. * - * @since 2.5.0 - * * @return array */ public function get_post_classifier_embeddings_preview_data(): array { @@ -375,7 +371,7 @@ public function generate_embeddings_for_post( int $post_id, bool $force = false * * Default is true, return false to skip classifying. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_should_classify * * @param {bool} $should_classify Whether the item should be classified. Default `true`, return `false` to skip. @@ -575,8 +571,6 @@ function ( $a, $b ) { /** * Get the similarity between an embedding and all terms. * - * @since x.x.x - * * @param array $embedding Embedding data. * @param bool $consider_threshold Whether to consider the threshold setting. * @return array @@ -639,7 +633,7 @@ private function get_embeddings_similarity( array $embedding, bool $consider_thr if ( ! empty( $term_embedding ) ) { // Loop through the chunks and run a similarity calculation on each. foreach ( $term_embedding as $chunk ) { - $similarity = $calculations->similarity( $embedding, $chunk ); + $similarity = $calculations->cosine_similarity( $embedding, $chunk ); if ( false !== $similarity && ( ! $consider_threshold || $similarity <= $threshold ) ) { $embedding_similarity[] = [ @@ -743,7 +737,7 @@ public function generate_embeddings_for_term( int $term_id, bool $force = false * * Default is true, return false to skip classifying. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_should_classify * * @param {bool} $should_classify Whether the item should be classified. Default `true`, return `false` to skip. @@ -805,7 +799,7 @@ public function generate_embedding( string $text = '' ) { /** * Filter the request body before sending to OpenAI. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_request_body * * @param {array} $body Request body that will be sent to OpenAI. @@ -927,7 +921,7 @@ public function get_normalized_content( int $id = 0, string $type = 'post' ): st /** * Filter content that will get sent to OpenAI. * - * @since x.x.x + * @since 3.1.0 * @hook classifai_azure_openai_embeddings_content * * @param {string} $content Content that will be sent to OpenAI. From 6e95a0281696b3d612239b469a2fa15a620d4d97 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Thu, 9 May 2024 15:10:43 -0600 Subject: [PATCH 4/6] Add tests --- ...fy-content-azure-openai-embeddings.test.js | 278 ++++++++++++++++++ ...lassify-content-openai-embeddings.test.js} | 0 ...excerpt-generation-openai-chatgpt.test.js} | 0 ... => resize_content-openai-chatgpt.test.js} | 0 ... => speech-to-text-openai-whisper.test.js} | 0 ...> title-generation-openai-chatgpt.test.js} | 0 tests/test-plugin/e2e-test-plugin.php | 5 +- 7 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 tests/cypress/integration/language-processing/classify-content-azure-openai-embeddings.test.js rename tests/cypress/integration/language-processing/{classify-content-openapi-embeddings.test.js => classify-content-openai-embeddings.test.js} (100%) rename tests/cypress/integration/language-processing/{excerpt-generation-openapi-chatgpt.test.js => excerpt-generation-openai-chatgpt.test.js} (100%) rename tests/cypress/integration/language-processing/{resize_content-openapi-chatgpt.test.js => resize_content-openai-chatgpt.test.js} (100%) rename tests/cypress/integration/language-processing/{speech-to-text-openapi-whisper.test.js => speech-to-text-openai-whisper.test.js} (100%) rename tests/cypress/integration/language-processing/{title-generation-openapi-chatgpt.test.js => title-generation-openai-chatgpt.test.js} (100%) diff --git a/tests/cypress/integration/language-processing/classify-content-azure-openai-embeddings.test.js b/tests/cypress/integration/language-processing/classify-content-azure-openai-embeddings.test.js new file mode 100644 index 000000000..54865773c --- /dev/null +++ b/tests/cypress/integration/language-processing/classify-content-azure-openai-embeddings.test.js @@ -0,0 +1,278 @@ +describe( '[Language processing] Classify Content (Azure OpenAI) Tests', () => { + before( () => { + cy.login(); + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + cy.get( '#status' ).check(); + cy.get( '#provider' ).select( 'azure_openai_embeddings' ); + cy.get( '#submit' ).click(); + cy.optInAllFeatures(); + cy.disableClassicEditor(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( 'Can save Azure OpenAI Embeddings "Language Processing" settings', () => { + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + + cy.get( + 'input[name="classifai_feature_classification[azure_openai_embeddings][endpoint_url]"]' + ) + .clear() + .type( 'https://e2e-test-azure-openai-embeddings.test/' ); + cy.get( + 'input[name="classifai_feature_classification[azure_openai_embeddings][api_key]"]' + ) + .clear() + .type( 'password' ); + cy.get( + 'input[name="classifai_feature_classification[azure_openai_embeddings][deployment]"]' + ) + .clear() + .type( 'test' ); + cy.get( '#status' ).check(); + cy.get( '#classifai_feature_classification_post_types_post' ).check(); + cy.get( + '#classifai_feature_classification_post_statuses_publish' + ).check(); + cy.get( '#category' ).check(); + cy.get( '#category_threshold' ).clear().type( 100 ); + cy.get( '#submit' ).click(); + } ); + + it( 'Can create category and post and category will get auto-assigned', () => { + // Create test term. + cy.deleteAllTerms( 'category' ); + cy.createTerm( 'Test', 'category' ); + + // Create test post. + cy.createPost( { + title: 'Test embeddings', + content: 'Test embeddings content', + } ); + + // Close post publish panel. + const closePanelSelector = 'button[aria-label="Close panel"]'; + cy.get( 'body' ).then( ( $body ) => { + if ( $body.find( closePanelSelector ).length > 0 ) { + cy.get( closePanelSelector ).click(); + } + } ); + + // Open post settings sidebar. + cy.openDocumentSettingsSidebar(); + + // Find and open the category panel. + const panelButtonSelector = `.components-panel__body .components-panel__body-title button:contains("Categories")`; + + cy.get( panelButtonSelector ).then( ( $panelButton ) => { + // Find the panel container. + const $panel = $panelButton.parents( '.components-panel__body' ); + + // Open panel. + if ( ! $panel.hasClass( 'is-opened' ) ) { + cy.wrap( $panelButton ).click(); + } + + // Ensure our test category is checked. + cy.wrap( $panel ) + .find( + '.editor-post-taxonomies__hierarchical-terms-list .editor-post-taxonomies__hierarchical-terms-choice:first input' + ) + .should( 'be.checked' ); + cy.wrap( $panel ) + .find( '.editor-post-taxonomies__hierarchical-terms-list' ) + .children() + .contains( 'Test' ); + } ); + } ); + + it( 'Can see the preview on the settings page', () => { + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + + cy.get( '#submit' ).click(); + + // Click the Preview button. + const closePanelSelector = '#get-classifier-preview-data-btn'; + cy.get( closePanelSelector ).click(); + + // Check the term is received and visible. + cy.get( '.tax-row--Category' ).should( 'exist' ); + } ); + + it( 'Can create category and post and category will not get auto-assigned if feature turned off', () => { + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + cy.get( '#status' ).uncheck(); + cy.get( '#submit' ).click(); + + // Create test term. + cy.deleteAllTerms( 'category' ); + cy.createTerm( 'Test', 'category' ); + + // Create test post. + cy.createPost( { + title: 'Test embeddings disabled', + content: 'Test embeddings content', + } ); + + // Close post publish panel. + const closePanelSelector = 'button[aria-label="Close panel"]'; + cy.get( 'body' ).then( ( $body ) => { + if ( $body.find( closePanelSelector ).length > 0 ) { + cy.get( closePanelSelector ).click(); + } + } ); + + // Open post settings sidebar. + cy.openDocumentSettingsSidebar(); + + // Find and open the category panel. + const panelButtonSelector = `.components-panel__body .components-panel__body-title button:contains("Categories")`; + + cy.get( panelButtonSelector ).then( ( $panelButton ) => { + // Find the panel container. + const $panel = $panelButton.parents( '.components-panel__body' ); + + // Open panel. + if ( ! $panel.hasClass( 'is-opened' ) ) { + cy.wrap( $panelButton ).click(); + } + + // Ensure our test category is not checked. + cy.wrap( $panel ) + .find( + '.editor-post-taxonomies__hierarchical-terms-list .editor-post-taxonomies__hierarchical-terms-choice:first input' + ) + .should( 'be.checked' ); + cy.wrap( $panel ) + .find( + '.editor-post-taxonomies__hierarchical-terms-list .editor-post-taxonomies__hierarchical-terms-choice:first label' + ) + .contains( 'Uncategorized' ); + } ); + } ); + + it( 'Can see the enable button in a post (Classic Editor)', () => { + cy.enableClassicEditor(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + + cy.get( '#status' ).check(); + cy.get( '#classifai_feature_classification_post_types_post' ).check(); + cy.get( + '#classifai_feature_classification_post_statuses_publish' + ).check(); + cy.get( '#category' ).check(); + cy.get( '#submit' ).click(); + + cy.classicCreatePost( { + title: 'Embeddings test classic', + content: "This feature uses OpenAI's Embeddings capabilities.", + postType: 'post', + } ); + + cy.get( '#classifai_language_processing_metabox' ).should( 'exist' ); + cy.get( '#classifai-process-content' ).check(); + + cy.disableClassicEditor(); + } ); + + it( 'Can enable/disable content classification feature ', () => { + cy.disableClassicEditor(); + + // Disable feature. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + cy.get( '#status' ).uncheck(); + cy.get( '#submit' ).click(); + + // Verify that the feature is not available. + cy.verifyClassifyContentEnabled( false ); + + // Enable feature. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' + ); + cy.get( '#status' ).check(); + cy.get( '#submit' ).click(); + + // Verify that the feature is available. + cy.verifyClassifyContentEnabled( true ); + } ); + + it( 'Can enable/disable content classification feature by role', () => { + // Remove custom taxonomies so those don't interfere with the test. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing' + ); + + // Disable access for all users. + cy.disableFeatureForUsers(); + + cy.get( '#submit' ).click(); + + // Disable admin role. + cy.disableFeatureForRoles( 'feature_classification', [ + 'administrator', + ] ); + + // Verify that the feature is not available. + cy.verifyClassifyContentEnabled( false ); + + // Enable admin role. + cy.enableFeatureForRoles( 'feature_classification', [ + 'administrator', + ] ); + + // Verify that the feature is available. + cy.verifyClassifyContentEnabled( true ); + } ); + + it( 'Can enable/disable content classification feature by user', () => { + // Disable admin role. + cy.disableFeatureForRoles( 'feature_classification', [ + 'administrator', + ] ); + + // Verify that the feature is not available. + cy.verifyClassifyContentEnabled( false ); + + // Enable feature for admin user. + cy.enableFeatureForUsers( 'feature_classification', [ 'admin' ] ); + + // Verify that the feature is available. + cy.verifyClassifyContentEnabled( true ); + } ); + + it( 'User can opt-out content classification feature', () => { + // Enable user based opt-out. + cy.enableFeatureOptOut( + 'feature_classification', + 'azure_openai_embeddings' + ); + + // opt-out + cy.optOutFeature( 'feature_classification' ); + + // Verify that the feature is not available. + cy.verifyClassifyContentEnabled( false ); + + // opt-in + cy.optInFeature( 'feature_classification' ); + + // Verify that the feature is available. + cy.verifyClassifyContentEnabled( true ); + } ); +} ); diff --git a/tests/cypress/integration/language-processing/classify-content-openapi-embeddings.test.js b/tests/cypress/integration/language-processing/classify-content-openai-embeddings.test.js similarity index 100% rename from tests/cypress/integration/language-processing/classify-content-openapi-embeddings.test.js rename to tests/cypress/integration/language-processing/classify-content-openai-embeddings.test.js diff --git a/tests/cypress/integration/language-processing/excerpt-generation-openapi-chatgpt.test.js b/tests/cypress/integration/language-processing/excerpt-generation-openai-chatgpt.test.js similarity index 100% rename from tests/cypress/integration/language-processing/excerpt-generation-openapi-chatgpt.test.js rename to tests/cypress/integration/language-processing/excerpt-generation-openai-chatgpt.test.js diff --git a/tests/cypress/integration/language-processing/resize_content-openapi-chatgpt.test.js b/tests/cypress/integration/language-processing/resize_content-openai-chatgpt.test.js similarity index 100% rename from tests/cypress/integration/language-processing/resize_content-openapi-chatgpt.test.js rename to tests/cypress/integration/language-processing/resize_content-openai-chatgpt.test.js diff --git a/tests/cypress/integration/language-processing/speech-to-text-openapi-whisper.test.js b/tests/cypress/integration/language-processing/speech-to-text-openai-whisper.test.js similarity index 100% rename from tests/cypress/integration/language-processing/speech-to-text-openapi-whisper.test.js rename to tests/cypress/integration/language-processing/speech-to-text-openai-whisper.test.js diff --git a/tests/cypress/integration/language-processing/title-generation-openapi-chatgpt.test.js b/tests/cypress/integration/language-processing/title-generation-openai-chatgpt.test.js similarity index 100% rename from tests/cypress/integration/language-processing/title-generation-openapi-chatgpt.test.js rename to tests/cypress/integration/language-processing/title-generation-openai-chatgpt.test.js diff --git a/tests/test-plugin/e2e-test-plugin.php b/tests/test-plugin/e2e-test-plugin.php index c9e0a40a1..6753215d3 100644 --- a/tests/test-plugin/e2e-test-plugin.php +++ b/tests/test-plugin/e2e-test-plugin.php @@ -75,7 +75,10 @@ function classifai_test_mock_http_requests( $preempt, $parsed_args, $url ) { ), 'body' => file_get_contents( __DIR__ . '/text-to-speech.txt' ), ); - } elseif ( strpos( $url, 'https://api.openai.com/v1/embeddings' ) !== false ) { + } elseif ( + strpos( $url, 'https://api.openai.com/v1/embeddings' ) !== false || + strpos( $url, 'https://e2e-test-azure-openai-embeddings.test/openai/deployments' ) !== false + ) { $response = file_get_contents( __DIR__ . '/embeddings.json' ); } elseif ( strpos( $url, 'http://e2e-test-image-processing.test/vision/v3.2/analyze' ) !== false ) { $response = file_get_contents( __DIR__ . '/image_analyze.json' ); From d7a92e63a9686047b5d95814658ba6a94834b747 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Thu, 9 May 2024 15:50:34 -0600 Subject: [PATCH 5/6] Ensure the IBM Watson settings are configured properly if not run first --- .../classify-content-ibm-watson.test.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js b/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js index 578b153da..c7545fe93 100644 --- a/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js +++ b/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js @@ -4,6 +4,10 @@ describe( '[Language processing] Classify content (IBM Watson - NLU) Tests', () cy.visit( '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' ); + cy.get( '#status' ).check(); + cy.get( '#provider' ).select( 'ibm_watson_nlu' ); + cy.get( '#submit' ).click(); + cy.get( '#provider' ).select( 'ibm_watson_nlu' ); cy.get( '#endpoint_url' ) .clear() @@ -14,14 +18,15 @@ describe( '[Language processing] Classify content (IBM Watson - NLU) Tests', () cy.get( '#classifai_feature_classification_post_statuses_publish' ).check(); - cy.get( '#status' ).check(); - cy.get( '#submit' ).click(); - cy.get( '#provider' ).select( 'ibm_watson_nlu' ); cy.get( '#classifai_feature_classification_classification_method_recommended_terms' ).check(); cy.wait( 1000 ); cy.get( '#category' ).check(); + cy.get( '#category_threshold' ).clear().type( 70 ); + cy.get( '#keyword_threshold' ).clear().type( 70 ); + cy.get( '#entity_threshold' ).clear().type( 70 ); + cy.get( '#concept_threshold' ).clear().type( 70 ); cy.get( '#submit' ).click(); cy.optInAllFeatures(); cy.disableClassicEditor(); From eea9b617a52776183803acb1dac106b83dda3a01 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Thu, 9 May 2024 16:16:58 -0600 Subject: [PATCH 6/6] Ensure role and user based settings are reset for IBM Watson --- .../classify-content-ibm-watson.test.js | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js b/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js index c7545fe93..01bfff207 100644 --- a/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js +++ b/tests/cypress/integration/language-processing/classify-content-ibm-watson.test.js @@ -473,26 +473,23 @@ describe( '[Language processing] Classify content (IBM Watson - NLU) Tests', () cy.visit( '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_classification' ); - cy.get( - '#classifai_feature_classification_roles_administrator' - ).uncheck(); + // Disable access for all users. + cy.disableFeatureForUsers(); cy.get( '#submit' ).click(); - cy.get( '.notice' ).contains( 'Settings saved.' ); + + // Disable admin role. + cy.disableFeatureForRoles( 'feature_classification', [ + 'administrator', + ] ); // Verify that the feature is not available. cy.verifyClassifyContentEnabled( false ); // Enable access to admin role. - cy.visit( - '/wp-admin/tools.php?page=classifai&tab=language_processing&provider=watson_nlu' - ); - cy.get( - '#classifai_feature_classification_roles_administrator' - ).check(); - - cy.get( '#submit' ).click(); - cy.get( '.notice' ).contains( 'Settings saved.' ); + cy.enableFeatureForRoles( 'feature_classification', [ + 'administrator', + ] ); // Verify that the feature is available. cy.verifyClassifyContentEnabled( true );